Futuresony's picture
Create app.py
3da6c4d verified
raw
history blame
1.51 kB
import gradio as gr
from asr import transcribe_auto # Import ASR function
from ttsmms import download, TTS
from langdetect import detect
# Download and load TTS models for Swahili and English
swahili_dir = download("swh", "./data/swahili")
english_dir = download("eng", "./data/english")
swahili_tts = TTS(swahili_dir)
english_tts = TTS(english_dir)
# Function to handle ASR β†’ TTS
def asr_to_tts(audio):
# Step 1: Transcribe Speech
transcribed_text = transcribe_auto(audio)
# Step 2: Detect Language & Generate Speech
lang = detect(transcribed_text)
wav_path = "./output.wav"
if lang == "sw": # Swahili
swahili_tts.synthesis(transcribed_text, wav_path=wav_path)
else: # Default to English
english_tts.synthesis(transcribed_text, wav_path=wav_path)
return transcribed_text, wav_path # Return both text & generated speech
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("<h2 style='text-align: center;'>Multilingual Speech-to-Text & Text-to-Speech</h2>")
with gr.Row():
audio_input = gr.Audio(source="microphone", type="filepath", label="🎀 Speak Here")
text_output = gr.Textbox(label="πŸ“ Transcription", interactive=False)
audio_output = gr.Audio(label="πŸ”Š Generated Speech")
submit_button = gr.Button("Transcribe & Speak πŸ”„")
submit_button.click(fn=asr_to_tts, inputs=[audio_input], outputs=[text_output, audio_output])
# Run the App
if __name__ == "__main__":
demo.launch()