import gradio as gr from asr import transcribe_auto # Import ASR function from ttsmms import download, TTS from langdetect import detect # Download and load TTS models for Swahili and English swahili_dir = download("swh", "./data/swahili") english_dir = download("eng", "./data/english") swahili_tts = TTS(swahili_dir) english_tts = TTS(english_dir) # Function to handle ASR → TTS def asr_to_tts(audio): # Step 1: Transcribe Speech transcribed_text = transcribe_auto(audio) # Step 2: Detect Language & Generate Speech lang = detect(transcribed_text) wav_path = "./output.wav" if lang == "sw": # Swahili swahili_tts.synthesis(transcribed_text, wav_path=wav_path) else: # Default to English english_tts.synthesis(transcribed_text, wav_path=wav_path) return transcribed_text, wav_path # Return both text & generated speech # Gradio Interface with gr.Blocks() as demo: gr.Markdown("

Multilingual Speech-to-Text & Text-to-Speech

") with gr.Row(): audio_input = gr.Audio(source="microphone", type="filepath", label="🎤 Speak Here") text_output = gr.Textbox(label="📝 Transcription", interactive=False) audio_output = gr.Audio(label="🔊 Generated Speech") submit_button = gr.Button("Transcribe & Speak 🔄") submit_button.click(fn=asr_to_tts, inputs=[audio_input], outputs=[text_output, audio_output]) # Run the App if __name__ == "__main__": demo.launch()