Spaces:
Sleeping
Sleeping
import gradio as gr | |
from asr import transcribe_auto # Import ASR function | |
from ttsmms import download, TTS | |
from langdetect import detect | |
# Download and load TTS models for Swahili and English | |
swahili_dir = download("swh", "./data/swahili") | |
english_dir = download("eng", "./data/english") | |
swahili_tts = TTS(swahili_dir) | |
english_tts = TTS(english_dir) | |
# Function to handle ASR β TTS | |
def asr_to_tts(audio): | |
# Step 1: Transcribe Speech | |
transcribed_text = transcribe_auto(audio) | |
# Step 2: Detect Language & Generate Speech | |
lang = detect(transcribed_text) | |
wav_path = "./output.wav" | |
if lang == "sw": # Swahili | |
swahili_tts.synthesis(transcribed_text, wav_path=wav_path) | |
else: # Default to English | |
english_tts.synthesis(transcribed_text, wav_path=wav_path) | |
return transcribed_text, wav_path # Return both text & generated speech | |
# Gradio Interface | |
with gr.Blocks() as demo: | |
gr.Markdown("<h2 style='text-align: center;'>Multilingual Speech-to-Text & Text-to-Speech</h2>") | |
with gr.Row(): | |
audio_input = gr.Audio(source="microphone", type="filepath", label="π€ Speak Here") | |
text_output = gr.Textbox(label="π Transcription", interactive=False) | |
audio_output = gr.Audio(label="π Generated Speech") | |
submit_button = gr.Button("Transcribe & Speak π") | |
submit_button.click(fn=asr_to_tts, inputs=[audio_input], outputs=[text_output, audio_output]) | |
# Run the App | |
if __name__ == "__main__": | |
demo.launch() | |