capradeepgujaran commited on
Commit
db58593
·
verified ·
1 Parent(s): 4882c44

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -31
app.py CHANGED
@@ -5,6 +5,17 @@ import os
5
  from gtts import gTTS
6
  from pydub import AudioSegment
7
  from pydub.generators import WhiteNoise, Sine
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  def text_to_speech_with_emotion(text, lang, emotion):
10
  try:
@@ -13,51 +24,63 @@ def text_to_speech_with_emotion(text, lang, emotion):
13
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
14
  tts.save(fp.name)
15
 
16
- # Load the audio file and apply simple emotion effects
17
  audio = AudioSegment.from_mp3(fp.name)
 
18
 
19
- if emotion == "Happy":
20
- audio = audio.speedup(playback_speed=1.1)
21
- elif emotion == "Sad":
22
- audio = audio.speedup(playback_speed=0.9)
23
- elif emotion == "Angry":
24
- audio = audio + 5 # Increase volume
25
- audio = audio.compress_dynamic_range(threshold=-15.0, ratio=3.0, attack=5.0, release=50.0)
26
 
27
- audio.export(fp.name, format="mp3")
28
- return fp.name, f"Speech generated successfully with simulated {emotion} emotion"
 
29
  except Exception as e:
30
  return None, f"Error in speech generation: {str(e)}"
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  def generate_sound_effect(description, duration):
33
  try:
34
- sample_rate = 44100
35
- channels = 2
36
  duration_ms = int(duration * 1000)
37
 
38
  if "rain" in description.lower():
39
- sound = WhiteNoise().to_audio_segment(duration=duration_ms)
40
- sound = sound.apply_gain(-10) # Make it softer
41
  elif "car horn" in description.lower():
42
- sound = Sine(440).to_audio_segment(duration=100) # Short beep
43
- sound = sound.append(AudioSegment.silent(duration=50), crossfade=25)
44
- sound = sound * 3 # Repeat the beep
45
  elif "wind" in description.lower():
46
- sound = WhiteNoise().to_audio_segment(duration=duration_ms)
47
- sound = sound.apply_gain(-15) # Make it softer
48
- sound = sound.low_pass_filter(1000) # Remove high frequencies
49
  elif "bird" in description.lower():
50
- sound = Sine(1000).to_audio_segment(duration=100)
51
- sound = sound.append(Sine(1200).to_audio_segment(duration=100), crossfade=25)
52
- sound = sound.append(AudioSegment.silent(duration=200))
53
- sound = sound * int(duration * 2) # Repeat chirps
 
 
 
 
 
54
  else:
55
- # Default to a simple tone
56
- sound = Sine(440).to_audio_segment(duration=duration_ms)
 
57
 
58
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
59
- sound.export(fp.name, format="wav")
60
- return fp.name, f"Sound effect generated for '{description}'"
61
  except Exception as e:
62
  return None, f"Error in sound effect generation: {str(e)}"
63
 
@@ -107,7 +130,7 @@ def evaluate_emotion(ref_audio, gen_audio, uttwise_score=False):
107
 
108
  # Gradio interface
109
  with gr.Blocks() as iface:
110
- gr.Markdown("# Improved TTS and Sound Generation Tool")
111
 
112
  with gr.Tab("Text-to-Speech"):
113
  text_input = gr.Textbox(label="Enter text for speech generation")
@@ -118,7 +141,7 @@ with gr.Blocks() as iface:
118
  speech_message = gr.Textbox(label="Message")
119
 
120
  with gr.Tab("Sound Effect Generation"):
121
- sfx_input = gr.Textbox(label="Enter description for sound effect (e.g., 'rain', 'car horn', 'wind', 'bird')")
122
  sfx_duration = gr.Slider(minimum=1, maximum=10, value=3, label="Duration (seconds)")
123
  sfx_button = gr.Button("Generate Sound Effect")
124
  sfx_output = gr.Audio(label="Generated Sound Effect")
 
5
  from gtts import gTTS
6
  from pydub import AudioSegment
7
  from pydub.generators import WhiteNoise, Sine
8
+ import random
9
+
10
+ def apply_emotion(audio, emotion):
11
+ if emotion == "Happy":
12
+ return audio.speedup(playback_speed=1.1).pitch_shift(semitones=1)
13
+ elif emotion == "Sad":
14
+ return audio.speedup(playback_speed=0.9).pitch_shift(semitones=-1)
15
+ elif emotion == "Angry":
16
+ return audio.speedup(playback_speed=1.05).compress_dynamic_range(threshold=-15.0, ratio=4.0).apply_gain(5)
17
+ else: # Neutral
18
+ return audio
19
 
20
  def text_to_speech_with_emotion(text, lang, emotion):
21
  try:
 
24
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
25
  tts.save(fp.name)
26
 
 
27
  audio = AudioSegment.from_mp3(fp.name)
28
+ audio = apply_emotion(audio, emotion)
29
 
30
+ # Improve audio quality
31
+ audio = audio.compress_dynamic_range(threshold=-20.0, ratio=2.0)
32
+ audio = audio.normalize()
 
 
 
 
33
 
34
+ output_path = tempfile.mktemp(suffix=".mp3")
35
+ audio.export(output_path, format="mp3", bitrate="192k")
36
+ return output_path, f"Speech generated successfully with {emotion} emotion"
37
  except Exception as e:
38
  return None, f"Error in speech generation: {str(e)}"
39
 
40
+ def generate_complex_sound(base_freq, duration_ms, harmonics=3, noise_level=0.1):
41
+ sample_rate = 44100
42
+ t = np.linspace(0, duration_ms / 1000, int(sample_rate * duration_ms / 1000), False)
43
+ audio = np.sin(2 * np.pi * base_freq * t)
44
+
45
+ for i in range(2, harmonics + 1):
46
+ audio += (1 / i) * np.sin(2 * np.pi * (base_freq * i) * t)
47
+
48
+ noise = np.random.normal(0, noise_level, audio.shape)
49
+ audio += noise
50
+
51
+ audio = np.int16(audio / np.max(np.abs(audio)) * 32767)
52
+ return AudioSegment(audio.tobytes(), frame_rate=sample_rate, sample_width=2, channels=1)
53
+
54
  def generate_sound_effect(description, duration):
55
  try:
 
 
56
  duration_ms = int(duration * 1000)
57
 
58
  if "rain" in description.lower():
59
+ sound = WhiteNoise().to_audio_segment(duration=duration_ms).apply_gain(-10)
 
60
  elif "car horn" in description.lower():
61
+ sound = generate_complex_sound(440, 200, harmonics=5, noise_level=0.05)
62
+ sound = sound.append(AudioSegment.silent(duration=100))
63
+ sound = sound * 3
64
  elif "wind" in description.lower():
65
+ sound = WhiteNoise().to_audio_segment(duration=duration_ms).apply_gain(-15).low_pass_filter(1000)
 
 
66
  elif "bird" in description.lower():
67
+ chirp = generate_complex_sound(random.uniform(2000, 4000), 100, harmonics=2, noise_level=0.02)
68
+ sound = chirp + AudioSegment.silent(duration=200)
69
+ sound = sound * int(duration * 2)
70
+ elif "ocean" in description.lower():
71
+ sound = WhiteNoise().to_audio_segment(duration=duration_ms).apply_gain(-20).low_pass_filter(500)
72
+ elif "thunder" in description.lower():
73
+ sound = WhiteNoise().to_audio_segment(duration=500).apply_gain(10)
74
+ sound = sound.fade_in(100).fade_out(300)
75
+ sound = sound + AudioSegment.silent(duration=duration_ms - 500)
76
  else:
77
+ # Generate a more complex default sound
78
+ base_freq = random.uniform(200, 800)
79
+ sound = generate_complex_sound(base_freq, duration_ms, harmonics=4, noise_level=0.1)
80
 
81
+ output_path = tempfile.mktemp(suffix=".wav")
82
+ sound.export(output_path, format="wav")
83
+ return output_path, f"Sound effect generated for '{description}'"
84
  except Exception as e:
85
  return None, f"Error in sound effect generation: {str(e)}"
86
 
 
130
 
131
  # Gradio interface
132
  with gr.Blocks() as iface:
133
+ gr.Markdown("# Enhanced TTS and Sound Generation Tool")
134
 
135
  with gr.Tab("Text-to-Speech"):
136
  text_input = gr.Textbox(label="Enter text for speech generation")
 
141
  speech_message = gr.Textbox(label="Message")
142
 
143
  with gr.Tab("Sound Effect Generation"):
144
+ sfx_input = gr.Textbox(label="Enter description for sound effect (e.g., 'rain', 'car horn', 'wind', 'bird', 'ocean', 'thunder')")
145
  sfx_duration = gr.Slider(minimum=1, maximum=10, value=3, label="Duration (seconds)")
146
  sfx_button = gr.Button("Generate Sound Effect")
147
  sfx_output = gr.Audio(label="Generated Sound Effect")