import gradio as gr
from asr import transcribe_auto  # Import ASR function
from ttsmms import download, TTS
from langdetect import detect

# Download and load TTS models for Swahili and English
swahili_dir = download("swh", "./data/swahili")
english_dir = download("eng", "./data/english")

swahili_tts = TTS(swahili_dir)
english_tts = TTS(english_dir)

# Function to handle ASR → TTS
def asr_to_tts(audio):
    # Step 1: Transcribe Speech
    transcribed_text = transcribe_auto(audio)

    # Step 2: Detect Language & Generate Speech
    lang = detect(transcribed_text)
    wav_path = "./output.wav"

    if lang == "sw":  # Swahili
        swahili_tts.synthesis(transcribed_text, wav_path=wav_path)
    else:  # Default to English
        english_tts.synthesis(transcribed_text, wav_path=wav_path)

    return transcribed_text, wav_path  # Return both text & generated speech

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("<h2 style='text-align: center;'>Multilingual Speech-to-Text & Text-to-Speech</h2>")
    
    with gr.Row():
        audio_input = gr.Audio(source="microphone", type="filepath", label="🎤 Speak Here")
        text_output = gr.Textbox(label="📝 Transcription", interactive=False)
        audio_output = gr.Audio(label="🔊 Generated Speech")

    submit_button = gr.Button("Transcribe & Speak 🔄")

    submit_button.click(fn=asr_to_tts, inputs=[audio_input], outputs=[text_output, audio_output])

# Run the App
if __name__ == "__main__":
    demo.launch()