Spaces:

capradeepgujaran
/

VoiceOversV3

Running

App Files Files Community

capradeepgujaran commited on Oct 7, 2024

Commit

2ae3aa9

verified ·

1 Parent(s): 20f3cab

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -17

app.py CHANGED Viewed

@@ -15,46 +15,56 @@ def text_to_speech_with_emotion(text, emotion, language='en'):
         fp.seek(0)
         audio = AudioSegment.from_mp3(fp)
-    # Adjust audio based on emotion
     if emotion == "Happy":
-        audio = audio.speedup(playback_speed=1.2)
     elif emotion == "Sad":
-        audio = audio.speedup(playback_speed=0.8)
     elif emotion == "Angry":
-        audio = audio + 5  # Increase volume slightly
     return audio
-def generate_emotion_sound(emotion, duration):
-    sr = 22050
     t = np.linspace(0, duration, int(sr * duration), False)
     if emotion == "Happy":
         freq = 440  # A4 note
-        audio = np.sin(2 * np.pi * freq * t) * np.exp(-0.5 * t)
     elif emotion == "Sad":
         freq = 294  # D4 note
-        audio = np.sin(2 * np.pi * freq * t) * np.exp(-0.1 * t)
     elif emotion == "Angry":
         freq = 392  # G4 note
-        audio = np.sign(np.sin(2 * np.pi * freq * t)) * np.exp(-0.3 * t)
     else:  # Neutral
         freq = 329  # E4 note
-        audio = np.sin(2 * np.pi * freq * t) * np.exp(-0.2 * t)
-    audio = np.concatenate([audio, np.zeros(int(sr * 0.5))])  # Add 0.5s silence
     audio = audio / np.max(np.abs(audio))  # Normalize
-    return AudioSegment(audio.tobytes(), frame_rate=sr, sample_width=2, channels=1)
 def generate_dialogue_and_sound(text, emotion, language):
     speech = text_to_speech_with_emotion(text, emotion, language)
-    sound = generate_emotion_sound(emotion, 2)  # 2 seconds of emotion sound
     # Combine speech and sound
-    combined = speech.overlay(sound, position=0)
     with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
-        combined.export(fp.name, format="mp3")
         return fp.name
 # Gradio interface
@@ -66,8 +76,8 @@ iface = gr.Interface(
         gr.Dropdown(["en", "es", "fr", "de", "it"], label="Language", value="en")
     ],
     outputs=gr.Audio(label="Generated Dialogue with Emotion"),
-    title="Emotional Dialogue and Sound Generator",
-    description="Generate dialogues with different emotions and matching sound effects."
 )
 iface.launch()

         fp.seek(0)
         audio = AudioSegment.from_mp3(fp)
+    # Adjust audio based on emotion (more subtle adjustments)
     if emotion == "Happy":
+        audio = audio.speedup(playback_speed=1.1)
     elif emotion == "Sad":
+        audio = audio.speedup(playback_speed=0.9)
     elif emotion == "Angry":
+        audio = audio + 2  # Increase volume slightly
+    # Apply a high-pass filter to reduce low-frequency noise
+    audio = audio.high_pass_filter(80)
     return audio
+def generate_subtle_emotion_sound(emotion, duration):
+    sr = 44100  # Higher sample rate for better quality
     t = np.linspace(0, duration, int(sr * duration), False)
     if emotion == "Happy":
         freq = 440  # A4 note
+        audio = np.sin(2 * np.pi * freq * t) * np.exp(-3 * t)
     elif emotion == "Sad":
         freq = 294  # D4 note
+        audio = np.sin(2 * np.pi * freq * t) * np.exp(-2 * t)
     elif emotion == "Angry":
         freq = 392  # G4 note
+        audio = np.sign(np.sin(2 * np.pi * freq * t)) * np.exp(-4 * t)
     else:  # Neutral
         freq = 329  # E4 note
+        audio = np.sin(2 * np.pi * freq * t) * np.exp(-3 * t)
     audio = audio / np.max(np.abs(audio))  # Normalize
+    return AudioSegment(audio.astype(np.float32).tobytes(),
+                        frame_rate=sr, sample_width=4, channels=1)
 def generate_dialogue_and_sound(text, emotion, language):
     speech = text_to_speech_with_emotion(text, emotion, language)
+    sound = generate_subtle_emotion_sound(emotion, 1)  # 1 second of subtle emotion sound
+    # Reduce the volume of the sound effect
+    sound = sound - 12  # Reduce volume by 12 dB
     # Combine speech and sound
+    combined = speech.overlay(sound, position=0, gain_during_overlay=-12)
+    # Apply some final processing
+    combined = combined.compress_dynamic_range()  # Smooth out volume differences
+    combined = combined.normalize()  # Normalize the final audio
     with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
+        combined.export(fp.name, format="mp3", bitrate="192k")  # Higher bitrate for better quality
         return fp.name
 # Gradio interface
         gr.Dropdown(["en", "es", "fr", "de", "it"], label="Language", value="en")
     ],
     outputs=gr.Audio(label="Generated Dialogue with Emotion"),
+    title="Improved Emotional Dialogue Generator",
+    description="Generate clear dialogues with subtle emotional cues."
 )
 iface.launch()