Spaces:
Running
Running
File size: 1,509 Bytes
3da6c4d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
import gradio as gr
from asr import transcribe_auto # Import ASR function
from ttsmms import download, TTS
from langdetect import detect
# Download and load TTS models for Swahili and English
swahili_dir = download("swh", "./data/swahili")
english_dir = download("eng", "./data/english")
swahili_tts = TTS(swahili_dir)
english_tts = TTS(english_dir)
# Function to handle ASR β TTS
def asr_to_tts(audio):
# Step 1: Transcribe Speech
transcribed_text = transcribe_auto(audio)
# Step 2: Detect Language & Generate Speech
lang = detect(transcribed_text)
wav_path = "./output.wav"
if lang == "sw": # Swahili
swahili_tts.synthesis(transcribed_text, wav_path=wav_path)
else: # Default to English
english_tts.synthesis(transcribed_text, wav_path=wav_path)
return transcribed_text, wav_path # Return both text & generated speech
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("<h2 style='text-align: center;'>Multilingual Speech-to-Text & Text-to-Speech</h2>")
with gr.Row():
audio_input = gr.Audio(source="microphone", type="filepath", label="π€ Speak Here")
text_output = gr.Textbox(label="π Transcription", interactive=False)
audio_output = gr.Audio(label="π Generated Speech")
submit_button = gr.Button("Transcribe & Speak π")
submit_button.click(fn=asr_to_tts, inputs=[audio_input], outputs=[text_output, audio_output])
# Run the App
if __name__ == "__main__":
demo.launch()
|