import gradio as gr from transformers import pipeline, MarianMTModel, MarianTokenizer import edge_tts import asyncio # Load Whisper for English ASR asr_en = pipeline("automatic-speech-recognition", model="openai/whisper-base") # Load MarianMT for Yoruba-English (can switch for Igbo/Hausa) mt_model_name = "Helsinki-NLP/opus-mt-yo-en" tokenizer = MarianTokenizer.from_pretrained(mt_model_name) model = MarianMTModel.from_pretrained(mt_model_name) def translate_speech(audio): transcription = asr_en(audio)["text"] inputs = tokenizer(transcription, return_tensors="pt", padding=True) translated_tokens = model.generate(**inputs) translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True) return transcription, translated_text def translate_text(text): inputs = tokenizer(text, return_tensors="pt", padding=True) translated_tokens = model.generate(**inputs) translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True) return translated_text with gr.Blocks() as demo: gr.Markdown("### Multilingual Realtime Translator - English ↔ Yoruba (V1)") with gr.Tab("Speech Input"): mic = gr.Audio(type="filepath", label="🎙️ Click to record", interactive=True) transcription = gr.Textbox(label="Transcription") translation = gr.Textbox(label="Translation") translate_btn = gr.Button("Translate Speech") translate_btn.click(translate_speech, inputs=mic, outputs=[transcription, translation]) with gr.Tab("Text Input"): input_text = gr.Textbox(label="Enter text") translated_text = gr.Textbox(label="Translated text") input_text.submit(translate_text, inputs=input_text, outputs=translated_text) demo.launch()