Spaces:

capradeepgujaran
/

VoiceOversV3

Sleeping

App Files Files Community

capradeepgujaran commited on Oct 8, 2024

Commit

db58593

verified ·

1 Parent(s): 4882c44

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -31

app.py CHANGED Viewed

@@ -5,6 +5,17 @@ import os
 from gtts import gTTS
 from pydub import AudioSegment
 from pydub.generators import WhiteNoise, Sine
 def text_to_speech_with_emotion(text, lang, emotion):
     try:
@@ -13,51 +24,63 @@ def text_to_speech_with_emotion(text, lang, emotion):
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
             tts.save(fp.name)
-            # Load the audio file and apply simple emotion effects
             audio = AudioSegment.from_mp3(fp.name)
-            if emotion == "Happy":
-                audio = audio.speedup(playback_speed=1.1)
-            elif emotion == "Sad":
-                audio = audio.speedup(playback_speed=0.9)
-            elif emotion == "Angry":
-                audio = audio + 5  # Increase volume
-                audio = audio.compress_dynamic_range(threshold=-15.0, ratio=3.0, attack=5.0, release=50.0)
-            audio.export(fp.name, format="mp3")
-            return fp.name, f"Speech generated successfully with simulated {emotion} emotion"
     except Exception as e:
         return None, f"Error in speech generation: {str(e)}"
 def generate_sound_effect(description, duration):
     try:
-        sample_rate = 44100
-        channels = 2
         duration_ms = int(duration * 1000)
         if "rain" in description.lower():
-            sound = WhiteNoise().to_audio_segment(duration=duration_ms)
-            sound = sound.apply_gain(-10)  # Make it softer
         elif "car horn" in description.lower():
-            sound = Sine(440).to_audio_segment(duration=100)  # Short beep
-            sound = sound.append(AudioSegment.silent(duration=50), crossfade=25)
-            sound = sound * 3  # Repeat the beep
         elif "wind" in description.lower():
-            sound = WhiteNoise().to_audio_segment(duration=duration_ms)
-            sound = sound.apply_gain(-15)  # Make it softer
-            sound = sound.low_pass_filter(1000)  # Remove high frequencies
         elif "bird" in description.lower():
-            sound = Sine(1000).to_audio_segment(duration=100)
-            sound = sound.append(Sine(1200).to_audio_segment(duration=100), crossfade=25)
-            sound = sound.append(AudioSegment.silent(duration=200))
-            sound = sound * int(duration * 2)  # Repeat chirps
         else:
-            # Default to a simple tone
-            sound = Sine(440).to_audio_segment(duration=duration_ms)
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
-            sound.export(fp.name, format="wav")
-            return fp.name, f"Sound effect generated for '{description}'"
     except Exception as e:
         return None, f"Error in sound effect generation: {str(e)}"
@@ -107,7 +130,7 @@ def evaluate_emotion(ref_audio, gen_audio, uttwise_score=False):
 # Gradio interface
 with gr.Blocks() as iface:
-    gr.Markdown("# Improved TTS and Sound Generation Tool")
     with gr.Tab("Text-to-Speech"):
         text_input = gr.Textbox(label="Enter text for speech generation")
@@ -118,7 +141,7 @@ with gr.Blocks() as iface:
         speech_message = gr.Textbox(label="Message")
     with gr.Tab("Sound Effect Generation"):
-        sfx_input = gr.Textbox(label="Enter description for sound effect (e.g., 'rain', 'car horn', 'wind', 'bird')")
         sfx_duration = gr.Slider(minimum=1, maximum=10, value=3, label="Duration (seconds)")
         sfx_button = gr.Button("Generate Sound Effect")
         sfx_output = gr.Audio(label="Generated Sound Effect")

 from gtts import gTTS
 from pydub import AudioSegment
 from pydub.generators import WhiteNoise, Sine
+import random
+def apply_emotion(audio, emotion):
+    if emotion == "Happy":
+        return audio.speedup(playback_speed=1.1).pitch_shift(semitones=1)
+    elif emotion == "Sad":
+        return audio.speedup(playback_speed=0.9).pitch_shift(semitones=-1)
+    elif emotion == "Angry":
+        return audio.speedup(playback_speed=1.05).compress_dynamic_range(threshold=-15.0, ratio=4.0).apply_gain(5)
+    else:  # Neutral
+        return audio
 def text_to_speech_with_emotion(text, lang, emotion):
     try:
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
             tts.save(fp.name)
             audio = AudioSegment.from_mp3(fp.name)
+            audio = apply_emotion(audio, emotion)
+            # Improve audio quality
+            audio = audio.compress_dynamic_range(threshold=-20.0, ratio=2.0)
+            audio = audio.normalize()
+            output_path = tempfile.mktemp(suffix=".mp3")
+            audio.export(output_path, format="mp3", bitrate="192k")
+            return output_path, f"Speech generated successfully with {emotion} emotion"
     except Exception as e:
         return None, f"Error in speech generation: {str(e)}"
+def generate_complex_sound(base_freq, duration_ms, harmonics=3, noise_level=0.1):
+    sample_rate = 44100
+    t = np.linspace(0, duration_ms / 1000, int(sample_rate * duration_ms / 1000), False)
+    audio = np.sin(2 * np.pi * base_freq * t)
+    for i in range(2, harmonics + 1):
+        audio += (1 / i) * np.sin(2 * np.pi * (base_freq * i) * t)
+    noise = np.random.normal(0, noise_level, audio.shape)
+    audio += noise
+    audio = np.int16(audio / np.max(np.abs(audio)) * 32767)
+    return AudioSegment(audio.tobytes(), frame_rate=sample_rate, sample_width=2, channels=1)
 def generate_sound_effect(description, duration):
     try:
         duration_ms = int(duration * 1000)
         if "rain" in description.lower():
+            sound = WhiteNoise().to_audio_segment(duration=duration_ms).apply_gain(-10)
         elif "car horn" in description.lower():
+            sound = generate_complex_sound(440, 200, harmonics=5, noise_level=0.05)
+            sound = sound.append(AudioSegment.silent(duration=100))
+            sound = sound * 3
         elif "wind" in description.lower():
+            sound = WhiteNoise().to_audio_segment(duration=duration_ms).apply_gain(-15).low_pass_filter(1000)
         elif "bird" in description.lower():
+            chirp = generate_complex_sound(random.uniform(2000, 4000), 100, harmonics=2, noise_level=0.02)
+            sound = chirp + AudioSegment.silent(duration=200)
+            sound = sound * int(duration * 2)
+        elif "ocean" in description.lower():
+            sound = WhiteNoise().to_audio_segment(duration=duration_ms).apply_gain(-20).low_pass_filter(500)
+        elif "thunder" in description.lower():
+            sound = WhiteNoise().to_audio_segment(duration=500).apply_gain(10)
+            sound = sound.fade_in(100).fade_out(300)
+            sound = sound + AudioSegment.silent(duration=duration_ms - 500)
         else:
+            # Generate a more complex default sound
+            base_freq = random.uniform(200, 800)
+            sound = generate_complex_sound(base_freq, duration_ms, harmonics=4, noise_level=0.1)
+        output_path = tempfile.mktemp(suffix=".wav")
+        sound.export(output_path, format="wav")
+        return output_path, f"Sound effect generated for '{description}'"
     except Exception as e:
         return None, f"Error in sound effect generation: {str(e)}"
 # Gradio interface
 with gr.Blocks() as iface:
+    gr.Markdown("# Enhanced TTS and Sound Generation Tool")
     with gr.Tab("Text-to-Speech"):
         text_input = gr.Textbox(label="Enter text for speech generation")
         speech_message = gr.Textbox(label="Message")
     with gr.Tab("Sound Effect Generation"):
+        sfx_input = gr.Textbox(label="Enter description for sound effect (e.g., 'rain', 'car horn', 'wind', 'bird', 'ocean', 'thunder')")
         sfx_duration = gr.Slider(minimum=1, maximum=10, value=3, label="Duration (seconds)")
         sfx_button = gr.Button("Generate Sound Effect")
         sfx_output = gr.Audio(label="Generated Sound Effect")