Spaces:
Runtime error
Runtime error
Update main.py
Browse files
main.py
CHANGED
@@ -250,28 +250,6 @@ save_path = "/content/drive/My Drive/fine_tuned_tacotron2.pth"
|
|
250 |
|
251 |
# Save the model's state dictionary using torch.save
|
252 |
torch.save(model.state_dict(), save_path)
|
253 |
-
|
254 |
-
|
255 |
-
import librosa
|
256 |
-
import soundfile as sf
|
257 |
-
|
258 |
-
def adjust_pitch(audio_path, pitch_factor):
|
259 |
-
# Load audio
|
260 |
-
y, sr = librosa.load(audio_path)
|
261 |
-
# Adjust pitch
|
262 |
-
y_shifted = librosa.effects.pitch_shift(y, sr, n_steps=pitch_factor)
|
263 |
-
# Save adjusted audio
|
264 |
-
sf.write(audio_path, y_shifted, sr)
|
265 |
-
|
266 |
-
def adjust_speed(audio_path, speed_factor):
|
267 |
-
# Load the audio file
|
268 |
-
y, sr = librosa.load(audio_path)
|
269 |
-
|
270 |
-
# Adjust the speed (this alters the duration of the audio)
|
271 |
-
y_speeded = librosa.effects.time_stretch(y, speed_factor)
|
272 |
-
|
273 |
-
# Save the adjusted audio
|
274 |
-
sf.write(audio_path, y_speeded, sr)
|
275 |
|
276 |
|
277 |
"""Set up the Gradio interface"""
|
@@ -298,8 +276,18 @@ emotion_settings = {
|
|
298 |
"shame": {"pitch": 0.8, "speed": 0.85},
|
299 |
}
|
300 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
301 |
|
302 |
-
# Function to process text or file input and generate audio
|
303 |
def emotion_aware_tts_pipeline(input_text=None, file_input=None):
|
304 |
try:
|
305 |
# Get text from input or file
|
@@ -320,9 +308,13 @@ def emotion_aware_tts_pipeline(input_text=None, file_input=None):
|
|
320 |
|
321 |
# Generate audio
|
322 |
audio_path = "output.wav"
|
323 |
-
tts_model.tts_to_file(text=input_text, file_path=audio_path
|
324 |
-
|
325 |
|
|
|
|
|
|
|
|
|
|
|
326 |
|
327 |
return f"Detected Emotion: {emotion} (Confidence: {confidence:.2f})", audio_path
|
328 |
else:
|
@@ -330,7 +322,7 @@ def emotion_aware_tts_pipeline(input_text=None, file_input=None):
|
|
330 |
except Exception as e:
|
331 |
return f"Error: {str(e)}", None
|
332 |
|
333 |
-
|
334 |
|
335 |
# Define Gradio interface
|
336 |
iface = gr.Interface(
|
|
|
250 |
|
251 |
# Save the model's state dictionary using torch.save
|
252 |
torch.save(model.state_dict(), save_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
253 |
|
254 |
|
255 |
"""Set up the Gradio interface"""
|
|
|
276 |
"shame": {"pitch": 0.8, "speed": 0.85},
|
277 |
}
|
278 |
|
279 |
+
import soundfile as sf
|
280 |
+
|
281 |
+
def adjust_audio_speed(audio_path, speed_factor):
|
282 |
+
y, sr = librosa.load(audio_path)
|
283 |
+
y_speeded = librosa.effects.time_stretch(y, speed_factor)
|
284 |
+
sf.write(audio_path, y_speeded, sr)
|
285 |
+
|
286 |
+
def adjust_audio_pitch(audio_path, pitch_factor):
|
287 |
+
y, sr = librosa.load(audio_path)
|
288 |
+
y_shifted = librosa.effects.pitch_shift(y, sr, n_steps=pitch_factor)
|
289 |
+
sf.write(audio_path, y_shifted, sr)
|
290 |
|
|
|
291 |
def emotion_aware_tts_pipeline(input_text=None, file_input=None):
|
292 |
try:
|
293 |
# Get text from input or file
|
|
|
308 |
|
309 |
# Generate audio
|
310 |
audio_path = "output.wav"
|
311 |
+
tts_model.tts_to_file(text=input_text, file_path=audio_path)
|
|
|
312 |
|
313 |
+
# Adjust pitch and speed using librosa
|
314 |
+
if pitch != 1.0:
|
315 |
+
adjust_audio_pitch(audio_path, pitch)
|
316 |
+
if speed != 1.0:
|
317 |
+
adjust_audio_speed(audio_path, speed)
|
318 |
|
319 |
return f"Detected Emotion: {emotion} (Confidence: {confidence:.2f})", audio_path
|
320 |
else:
|
|
|
322 |
except Exception as e:
|
323 |
return f"Error: {str(e)}", None
|
324 |
|
325 |
+
|
326 |
|
327 |
# Define Gradio interface
|
328 |
iface = gr.Interface(
|