Spaces:

Anita-19
/

emotion-aware-tts

Running

App Files Files Community

Anita-19 commited on Jan 23

Commit

4b3b013

verified ·

1 Parent(s): f16c455

Update main.py

Browse files

Files changed (1) hide show

main.py +18 -26

main.py CHANGED Viewed

@@ -250,28 +250,6 @@ save_path = "/content/drive/My Drive/fine_tuned_tacotron2.pth"
 # Save the model's state dictionary using torch.save
 torch.save(model.state_dict(), save_path)
-import librosa
-import soundfile as sf
-def adjust_pitch(audio_path, pitch_factor):
-    # Load audio
-    y, sr = librosa.load(audio_path)
-    # Adjust pitch
-    y_shifted = librosa.effects.pitch_shift(y, sr, n_steps=pitch_factor)
-    # Save adjusted audio
-    sf.write(audio_path, y_shifted, sr)
-def adjust_speed(audio_path, speed_factor):
-    # Load the audio file
-    y, sr = librosa.load(audio_path)
-    # Adjust the speed (this alters the duration of the audio)
-    y_speeded = librosa.effects.time_stretch(y, speed_factor)
-    # Save the adjusted audio
-    sf.write(audio_path, y_speeded, sr)
 """Set up the Gradio interface"""
@@ -298,8 +276,18 @@ emotion_settings = {
     "shame": {"pitch": 0.8, "speed": 0.85},
 }
-# Function to process text or file input and generate audio
 def emotion_aware_tts_pipeline(input_text=None, file_input=None):
     try:
         # Get text from input or file
@@ -320,9 +308,13 @@ def emotion_aware_tts_pipeline(input_text=None, file_input=None):
             # Generate audio
             audio_path = "output.wav"
-            tts_model.tts_to_file(text=input_text, file_path=audio_path, speed=speed, pitch=pitch)
             return f"Detected Emotion: {emotion} (Confidence: {confidence:.2f})", audio_path
         else:
@@ -330,7 +322,7 @@ def emotion_aware_tts_pipeline(input_text=None, file_input=None):
     except Exception as e:
         return f"Error: {str(e)}", None
 # Define Gradio interface
 iface = gr.Interface(

 # Save the model's state dictionary using torch.save
 torch.save(model.state_dict(), save_path)
 """Set up the Gradio interface"""
     "shame": {"pitch": 0.8, "speed": 0.85},
 }
+import soundfile as sf
+def adjust_audio_speed(audio_path, speed_factor):
+    y, sr = librosa.load(audio_path)
+    y_speeded = librosa.effects.time_stretch(y, speed_factor)
+    sf.write(audio_path, y_speeded, sr)
+def adjust_audio_pitch(audio_path, pitch_factor):
+    y, sr = librosa.load(audio_path)
+    y_shifted = librosa.effects.pitch_shift(y, sr, n_steps=pitch_factor)
+    sf.write(audio_path, y_shifted, sr)
 def emotion_aware_tts_pipeline(input_text=None, file_input=None):
     try:
         # Get text from input or file
             # Generate audio
             audio_path = "output.wav"
+            tts_model.tts_to_file(text=input_text, file_path=audio_path)
+            # Adjust pitch and speed using librosa
+            if pitch != 1.0:
+                adjust_audio_pitch(audio_path, pitch)
+            if speed != 1.0:
+                adjust_audio_speed(audio_path, speed)
             return f"Detected Emotion: {emotion} (Confidence: {confidence:.2f})", audio_path
         else:
     except Exception as e:
         return f"Error: {str(e)}", None
 # Define Gradio interface
 iface = gr.Interface(