File size: 1,509 Bytes
3da6c4d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import gradio as gr
from asr import transcribe_auto  # Import ASR function
from ttsmms import download, TTS
from langdetect import detect

# Download and load TTS models for Swahili and English
swahili_dir = download("swh", "./data/swahili")
english_dir = download("eng", "./data/english")

swahili_tts = TTS(swahili_dir)
english_tts = TTS(english_dir)

# Function to handle ASR β†’ TTS
def asr_to_tts(audio):
    # Step 1: Transcribe Speech
    transcribed_text = transcribe_auto(audio)

    # Step 2: Detect Language & Generate Speech
    lang = detect(transcribed_text)
    wav_path = "./output.wav"

    if lang == "sw":  # Swahili
        swahili_tts.synthesis(transcribed_text, wav_path=wav_path)
    else:  # Default to English
        english_tts.synthesis(transcribed_text, wav_path=wav_path)

    return transcribed_text, wav_path  # Return both text & generated speech

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("<h2 style='text-align: center;'>Multilingual Speech-to-Text & Text-to-Speech</h2>")
    
    with gr.Row():
        audio_input = gr.Audio(source="microphone", type="filepath", label="🎀 Speak Here")
        text_output = gr.Textbox(label="πŸ“ Transcription", interactive=False)
        audio_output = gr.Audio(label="πŸ”Š Generated Speech")

    submit_button = gr.Button("Transcribe & Speak πŸ”„")

    submit_button.click(fn=asr_to_tts, inputs=[audio_input], outputs=[text_output, audio_output])

# Run the App
if __name__ == "__main__":
    demo.launch()