capradeepgujaran commited on
Commit
2ae3aa9
·
verified ·
1 Parent(s): 20f3cab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -17
app.py CHANGED
@@ -15,46 +15,56 @@ def text_to_speech_with_emotion(text, emotion, language='en'):
15
  fp.seek(0)
16
  audio = AudioSegment.from_mp3(fp)
17
 
18
- # Adjust audio based on emotion
19
  if emotion == "Happy":
20
- audio = audio.speedup(playback_speed=1.2)
21
  elif emotion == "Sad":
22
- audio = audio.speedup(playback_speed=0.8)
23
  elif emotion == "Angry":
24
- audio = audio + 5 # Increase volume slightly
 
 
 
25
 
26
  return audio
27
 
28
- def generate_emotion_sound(emotion, duration):
29
- sr = 22050
30
  t = np.linspace(0, duration, int(sr * duration), False)
31
 
32
  if emotion == "Happy":
33
  freq = 440 # A4 note
34
- audio = np.sin(2 * np.pi * freq * t) * np.exp(-0.5 * t)
35
  elif emotion == "Sad":
36
  freq = 294 # D4 note
37
- audio = np.sin(2 * np.pi * freq * t) * np.exp(-0.1 * t)
38
  elif emotion == "Angry":
39
  freq = 392 # G4 note
40
- audio = np.sign(np.sin(2 * np.pi * freq * t)) * np.exp(-0.3 * t)
41
  else: # Neutral
42
  freq = 329 # E4 note
43
- audio = np.sin(2 * np.pi * freq * t) * np.exp(-0.2 * t)
44
 
45
- audio = np.concatenate([audio, np.zeros(int(sr * 0.5))]) # Add 0.5s silence
46
  audio = audio / np.max(np.abs(audio)) # Normalize
47
- return AudioSegment(audio.tobytes(), frame_rate=sr, sample_width=2, channels=1)
 
48
 
49
  def generate_dialogue_and_sound(text, emotion, language):
50
  speech = text_to_speech_with_emotion(text, emotion, language)
51
- sound = generate_emotion_sound(emotion, 2) # 2 seconds of emotion sound
 
 
 
52
 
53
  # Combine speech and sound
54
- combined = speech.overlay(sound, position=0)
 
 
 
 
55
 
56
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
57
- combined.export(fp.name, format="mp3")
58
  return fp.name
59
 
60
  # Gradio interface
@@ -66,8 +76,8 @@ iface = gr.Interface(
66
  gr.Dropdown(["en", "es", "fr", "de", "it"], label="Language", value="en")
67
  ],
68
  outputs=gr.Audio(label="Generated Dialogue with Emotion"),
69
- title="Emotional Dialogue and Sound Generator",
70
- description="Generate dialogues with different emotions and matching sound effects."
71
  )
72
 
73
  iface.launch()
 
15
  fp.seek(0)
16
  audio = AudioSegment.from_mp3(fp)
17
 
18
+ # Adjust audio based on emotion (more subtle adjustments)
19
  if emotion == "Happy":
20
+ audio = audio.speedup(playback_speed=1.1)
21
  elif emotion == "Sad":
22
+ audio = audio.speedup(playback_speed=0.9)
23
  elif emotion == "Angry":
24
+ audio = audio + 2 # Increase volume slightly
25
+
26
+ # Apply a high-pass filter to reduce low-frequency noise
27
+ audio = audio.high_pass_filter(80)
28
 
29
  return audio
30
 
31
+ def generate_subtle_emotion_sound(emotion, duration):
32
+ sr = 44100 # Higher sample rate for better quality
33
  t = np.linspace(0, duration, int(sr * duration), False)
34
 
35
  if emotion == "Happy":
36
  freq = 440 # A4 note
37
+ audio = np.sin(2 * np.pi * freq * t) * np.exp(-3 * t)
38
  elif emotion == "Sad":
39
  freq = 294 # D4 note
40
+ audio = np.sin(2 * np.pi * freq * t) * np.exp(-2 * t)
41
  elif emotion == "Angry":
42
  freq = 392 # G4 note
43
+ audio = np.sign(np.sin(2 * np.pi * freq * t)) * np.exp(-4 * t)
44
  else: # Neutral
45
  freq = 329 # E4 note
46
+ audio = np.sin(2 * np.pi * freq * t) * np.exp(-3 * t)
47
 
 
48
  audio = audio / np.max(np.abs(audio)) # Normalize
49
+ return AudioSegment(audio.astype(np.float32).tobytes(),
50
+ frame_rate=sr, sample_width=4, channels=1)
51
 
52
  def generate_dialogue_and_sound(text, emotion, language):
53
  speech = text_to_speech_with_emotion(text, emotion, language)
54
+ sound = generate_subtle_emotion_sound(emotion, 1) # 1 second of subtle emotion sound
55
+
56
+ # Reduce the volume of the sound effect
57
+ sound = sound - 12 # Reduce volume by 12 dB
58
 
59
  # Combine speech and sound
60
+ combined = speech.overlay(sound, position=0, gain_during_overlay=-12)
61
+
62
+ # Apply some final processing
63
+ combined = combined.compress_dynamic_range() # Smooth out volume differences
64
+ combined = combined.normalize() # Normalize the final audio
65
 
66
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
67
+ combined.export(fp.name, format="mp3", bitrate="192k") # Higher bitrate for better quality
68
  return fp.name
69
 
70
  # Gradio interface
 
76
  gr.Dropdown(["en", "es", "fr", "de", "it"], label="Language", value="en")
77
  ],
78
  outputs=gr.Audio(label="Generated Dialogue with Emotion"),
79
+ title="Improved Emotional Dialogue Generator",
80
+ description="Generate clear dialogues with subtle emotional cues."
81
  )
82
 
83
  iface.launch()