Spaces:

Anita-19
/

emotion-aware-tts

Running

App Files Files Community

Anita-19 commited on Jan 16

Commit

cac8321

verified ·

1 Parent(s): 331d92d

Update main.py

Browse files

Files changed (1) hide show

main.py +20 -4

main.py CHANGED Viewed

@@ -296,7 +296,18 @@ emotion_settings = {
     "neutral": {"pitch": 1.0, "speed": 1.0},
 }
-# Function to process text or file input and generate audio
 def emotion_aware_tts_pipeline(input_text=None, file_input=None):
     try:
         # Get text from input or file
@@ -304,7 +315,7 @@ def emotion_aware_tts_pipeline(input_text=None, file_input=None):
             with open(file_input.name, 'r') as file:
                 input_text = file.read()
-          if input_text:
             # Detect emotion
             emotion_data = emotion_classifier(input_text)[0]
             emotion = emotion_data['label']
@@ -317,16 +328,21 @@ def emotion_aware_tts_pipeline(input_text=None, file_input=None):
             # Generate audio
             audio_path = "output.wav"
-            tts_model.tts_to_file(text=input_text, file_path=audio_path, speed=speed, pitch=pitch)
             return f"Detected Emotion: {emotion} (Confidence: {confidence:.2f})", audio_path
         else:
             return "Please provide input text or file", None
     except Exception as e:
-        # Return error message if something goes wrong
         return f"Error: {str(e)}", None
 # Define Gradio interface
 iface = gr.Interface(
     fn=emotion_aware_tts_pipeline,

     "neutral": {"pitch": 1.0, "speed": 1.0},
 }
+import soundfile as sf
+def adjust_audio_speed(audio_path, speed_factor):
+    y, sr = librosa.load(audio_path)
+    y_speeded = librosa.effects.time_stretch(y, speed_factor)
+    sf.write(audio_path, y_speeded, sr)
+def adjust_audio_pitch(audio_path, pitch_factor):
+    y, sr = librosa.load(audio_path)
+    y_shifted = librosa.effects.pitch_shift(y, sr, n_steps=pitch_factor)
+    sf.write(audio_path, y_shifted, sr)
 def emotion_aware_tts_pipeline(input_text=None, file_input=None):
     try:
         # Get text from input or file
             with open(file_input.name, 'r') as file:
                 input_text = file.read()
+        if input_text:
             # Detect emotion
             emotion_data = emotion_classifier(input_text)[0]
             emotion = emotion_data['label']
             # Generate audio
             audio_path = "output.wav"
+            tts_model.tts_to_file(text=input_text, file_path=audio_path)
+            # Adjust pitch and speed using librosa
+            if pitch != 1.0:
+                adjust_audio_pitch(audio_path, pitch)
+            if speed != 1.0:
+                adjust_audio_speed(audio_path, speed)
             return f"Detected Emotion: {emotion} (Confidence: {confidence:.2f})", audio_path
         else:
             return "Please provide input text or file", None
     except Exception as e:
         return f"Error: {str(e)}", None
 # Define Gradio interface
 iface = gr.Interface(
     fn=emotion_aware_tts_pipeline,