capradeepgujaran commited on
Commit
91c5e4d
·
verified ·
1 Parent(s): a4db718

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -23
app.py CHANGED
@@ -2,44 +2,77 @@ import gradio as gr
2
  from gtts import gTTS
3
  import numpy as np
4
  import io
5
- import base64
 
 
 
6
  import os
7
 
8
  def text_to_speech(text, language='en'):
9
  tts = gTTS(text=text, lang=language)
10
- fp = io.BytesIO()
11
- tts.write_to_fp(fp)
12
- fp.seek(0)
13
- return fp
14
 
15
- def generate_sound(frequency, duration):
16
- # Generate a simple sine wave
17
- t = np.linspace(0, duration, int(44100 * duration), False)
18
  audio = np.sin(2 * np.pi * frequency * t)
19
- return (44100, audio.astype(np.float32))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- def generate_voiceover(text, language):
22
- audio_fp = text_to_speech(text, language)
23
- return audio_fp
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- def generate_sound_effect(frequency, duration):
26
- return generate_sound(frequency, duration)
 
 
 
 
 
 
 
 
27
 
28
  # Gradio interface
29
  iface = gr.Interface(
30
- fn=[generate_voiceover, generate_sound_effect],
31
  inputs=[
32
  gr.Textbox(label="Enter text for speech generation"),
33
  gr.Dropdown(choices=["en", "es", "fr", "de", "it"], label="Select Language", value="en"),
34
- gr.Number(label="Frequency (Hz)", value=440),
35
- gr.Number(label="Duration (seconds)", value=1)
 
36
  ],
37
- outputs=[
38
- gr.Audio(label="Generated Speech"),
39
- gr.Audio(label="Generated Sound Effect")
40
- ],
41
- title="Simple Text-to-Speech and Sound Generation Tool",
42
- description="Generate speech from text and create simple sound effects."
43
  )
44
 
45
  iface.launch()
 
2
  from gtts import gTTS
3
  import numpy as np
4
  import io
5
+ import librosa
6
+ import soundfile as sf
7
+ from pydub import AudioSegment
8
+ import tempfile
9
  import os
10
 
11
  def text_to_speech(text, language='en'):
12
  tts = gTTS(text=text, lang=language)
13
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
14
+ tts.save(fp.name)
15
+ return fp.name
 
16
 
17
+ def generate_sound(frequency, duration, effect):
18
+ # Generate a simple waveform
19
+ t = np.linspace(0, duration, int(22050 * duration), False)
20
  audio = np.sin(2 * np.pi * frequency * t)
21
+
22
+ # Apply effects
23
+ if effect == "Reverb":
24
+ audio = librosa.effects.remix(audio, intervals=librosa.effects.split(audio, top_db=20))
25
+ elif effect == "Pitch Shift":
26
+ audio = librosa.effects.pitch_shift(audio, sr=22050, n_steps=4)
27
+ elif effect == "Time Stretch":
28
+ audio = librosa.effects.time_stretch(audio, rate=0.8)
29
+
30
+ # Normalize audio
31
+ audio = librosa.util.normalize(audio)
32
+
33
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
34
+ sf.write(fp.name, audio, 22050)
35
+ return fp.name
36
 
37
+ def combine_audio(speech_file, sound_file):
38
+ speech = AudioSegment.from_file(speech_file)
39
+ sound = AudioSegment.from_file(sound_file)
40
+
41
+ # Adjust volumes
42
+ speech = speech - 3 # Reduce speech volume by 3dB
43
+ sound = sound - 6 # Reduce sound volume by 6dB
44
+
45
+ # Overlay sound onto speech
46
+ combined = speech.overlay(sound, position=0)
47
+
48
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
49
+ combined.export(fp.name, format="mp3")
50
+ return fp.name
51
 
52
+ def generate_audio(text, language, frequency, duration, effect):
53
+ speech_file = text_to_speech(text, language)
54
+ sound_file = generate_sound(frequency, duration, effect)
55
+ combined_file = combine_audio(speech_file, sound_file)
56
+
57
+ # Clean up temporary files
58
+ os.unlink(speech_file)
59
+ os.unlink(sound_file)
60
+
61
+ return combined_file
62
 
63
  # Gradio interface
64
  iface = gr.Interface(
65
+ fn=generate_audio,
66
  inputs=[
67
  gr.Textbox(label="Enter text for speech generation"),
68
  gr.Dropdown(choices=["en", "es", "fr", "de", "it"], label="Select Language", value="en"),
69
+ gr.Slider(minimum=20, maximum=2000, value=440, label="Frequency (Hz)"),
70
+ gr.Slider(minimum=0.1, maximum=5, value=1, label="Duration (seconds)"),
71
+ gr.Dropdown(choices=["None", "Reverb", "Pitch Shift", "Time Stretch"], label="Sound Effect", value="None")
72
  ],
73
+ outputs=gr.Audio(label="Generated Audio"),
74
+ title="Enhanced Audio Generation Tool",
75
+ description="Generate speech from text and combine it with customizable sound effects."
 
 
 
76
  )
77
 
78
  iface.launch()