Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from asr import transcribe_auto # Import ASR function
|
3 |
+
from ttsmms import download, TTS
|
4 |
+
from langdetect import detect
|
5 |
+
|
6 |
+
# Download and load TTS models for Swahili and English
|
7 |
+
swahili_dir = download("swh", "./data/swahili")
|
8 |
+
english_dir = download("eng", "./data/english")
|
9 |
+
|
10 |
+
swahili_tts = TTS(swahili_dir)
|
11 |
+
english_tts = TTS(english_dir)
|
12 |
+
|
13 |
+
# Function to handle ASR β TTS
|
14 |
+
def asr_to_tts(audio):
|
15 |
+
# Step 1: Transcribe Speech
|
16 |
+
transcribed_text = transcribe_auto(audio)
|
17 |
+
|
18 |
+
# Step 2: Detect Language & Generate Speech
|
19 |
+
lang = detect(transcribed_text)
|
20 |
+
wav_path = "./output.wav"
|
21 |
+
|
22 |
+
if lang == "sw": # Swahili
|
23 |
+
swahili_tts.synthesis(transcribed_text, wav_path=wav_path)
|
24 |
+
else: # Default to English
|
25 |
+
english_tts.synthesis(transcribed_text, wav_path=wav_path)
|
26 |
+
|
27 |
+
return transcribed_text, wav_path # Return both text & generated speech
|
28 |
+
|
29 |
+
# Gradio Interface
|
30 |
+
with gr.Blocks() as demo:
|
31 |
+
gr.Markdown("<h2 style='text-align: center;'>Multilingual Speech-to-Text & Text-to-Speech</h2>")
|
32 |
+
|
33 |
+
with gr.Row():
|
34 |
+
audio_input = gr.Audio(source="microphone", type="filepath", label="π€ Speak Here")
|
35 |
+
text_output = gr.Textbox(label="π Transcription", interactive=False)
|
36 |
+
audio_output = gr.Audio(label="π Generated Speech")
|
37 |
+
|
38 |
+
submit_button = gr.Button("Transcribe & Speak π")
|
39 |
+
|
40 |
+
submit_button.click(fn=asr_to_tts, inputs=[audio_input], outputs=[text_output, audio_output])
|
41 |
+
|
42 |
+
# Run the App
|
43 |
+
if __name__ == "__main__":
|
44 |
+
demo.launch()
|
45 |
+
|