capradeepgujaran commited on
Commit
a4db718
·
verified ·
1 Parent(s): 05609a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -47
app.py CHANGED
@@ -1,57 +1,45 @@
1
  import gradio as gr
2
  from gtts import gTTS
 
 
 
3
  import os
4
- import tempfile
5
- from transformers import pipeline
6
- import torch
7
 
8
- # Initialize text-to-audio pipeline for sound generation
9
- try:
10
- device = 0 if torch.cuda.is_available() else -1
11
- text_to_audio = pipeline("text-to-audio", model="facebook/musicgen-small", device=device)
12
- except Exception as e:
13
- print(f"Error initializing text-to-audio pipeline: {e}")
14
- text_to_audio = None
15
 
16
- def generate_speech(text, language):
17
- try:
18
- tts = gTTS(text=text, lang=language)
19
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
20
- tts.save(fp.name)
21
- return gr.Audio(value=fp.name, type="filepath"), "Speech generated successfully"
22
- except Exception as e:
23
- return None, f"Error in speech generation: {str(e)}"
24
 
25
- def generate_sound(text):
26
- try:
27
- if text_to_audio is not None:
28
- audio = text_to_audio(text, forward_params={"do_sample": True, "max_new_tokens": 256})
29
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
30
- audio['audio'].save(fp.name)
31
- return gr.Audio(value=fp.name, type="filepath"), "Sound generated successfully"
32
- else:
33
- return None, "Text-to-audio pipeline not available. Check logs for initialization error."
34
- except Exception as e:
35
- return None, f"Error in sound generation: {str(e)}"
36
 
37
- # Gradio interface
38
- with gr.Blocks() as iface:
39
- gr.Markdown("# Lightweight Text-to-Speech and Text-to-Sound Generation Tool")
40
-
41
- with gr.Tab("Text-to-Speech"):
42
- text_input = gr.Textbox(label="Enter text for speech generation")
43
- language_input = gr.Dropdown(["en", "es", "fr", "de", "it"], label="Select Language", value="en")
44
- speech_button = gr.Button("Generate Speech")
45
- speech_output = gr.Audio(label="Generated Speech")
46
- speech_message = gr.Textbox(label="Message")
47
-
48
- with gr.Tab("Text-to-Sound"):
49
- sound_input = gr.Textbox(label="Enter text description for sound generation")
50
- sound_button = gr.Button("Generate Sound")
51
- sound_output = gr.Audio(label="Generated Sound")
52
- sound_message = gr.Textbox(label="Message")
53
 
54
- speech_button.click(generate_speech, inputs=[text_input, language_input], outputs=[speech_output, speech_message])
55
- sound_button.click(generate_sound, inputs=[sound_input], outputs=[sound_output, sound_message])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  iface.launch()
 
1
  import gradio as gr
2
  from gtts import gTTS
3
+ import numpy as np
4
+ import io
5
+ import base64
6
  import os
 
 
 
7
 
8
+ def text_to_speech(text, language='en'):
9
+ tts = gTTS(text=text, lang=language)
10
+ fp = io.BytesIO()
11
+ tts.write_to_fp(fp)
12
+ fp.seek(0)
13
+ return fp
 
14
 
15
+ def generate_sound(frequency, duration):
16
+ # Generate a simple sine wave
17
+ t = np.linspace(0, duration, int(44100 * duration), False)
18
+ audio = np.sin(2 * np.pi * frequency * t)
19
+ return (44100, audio.astype(np.float32))
 
 
 
20
 
21
+ def generate_voiceover(text, language):
22
+ audio_fp = text_to_speech(text, language)
23
+ return audio_fp
 
 
 
 
 
 
 
 
24
 
25
+ def generate_sound_effect(frequency, duration):
26
+ return generate_sound(frequency, duration)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ # Gradio interface
29
+ iface = gr.Interface(
30
+ fn=[generate_voiceover, generate_sound_effect],
31
+ inputs=[
32
+ gr.Textbox(label="Enter text for speech generation"),
33
+ gr.Dropdown(choices=["en", "es", "fr", "de", "it"], label="Select Language", value="en"),
34
+ gr.Number(label="Frequency (Hz)", value=440),
35
+ gr.Number(label="Duration (seconds)", value=1)
36
+ ],
37
+ outputs=[
38
+ gr.Audio(label="Generated Speech"),
39
+ gr.Audio(label="Generated Sound Effect")
40
+ ],
41
+ title="Simple Text-to-Speech and Sound Generation Tool",
42
+ description="Generate speech from text and create simple sound effects."
43
+ )
44
 
45
  iface.launch()