Spaces:
Running
Running
import gradio as gr | |
from transformers import pipeline, MarianMTModel, MarianTokenizer | |
import edge_tts | |
import asyncio | |
# Load Whisper for English ASR | |
asr_en = pipeline("automatic-speech-recognition", model="openai/whisper-base") | |
# Load MarianMT for Yoruba-English (can switch for Igbo/Hausa) | |
mt_model_name = "Helsinki-NLP/opus-mt-yo-en" | |
tokenizer = MarianTokenizer.from_pretrained(mt_model_name) | |
model = MarianMTModel.from_pretrained(mt_model_name) | |
def translate_speech(audio): | |
transcription = asr_en(audio)["text"] | |
inputs = tokenizer(transcription, return_tensors="pt", padding=True) | |
translated_tokens = model.generate(**inputs) | |
translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True) | |
return transcription, translated_text | |
def translate_text(text): | |
inputs = tokenizer(text, return_tensors="pt", padding=True) | |
translated_tokens = model.generate(**inputs) | |
translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True) | |
return translated_text | |
with gr.Blocks() as demo: | |
gr.Markdown("### Multilingual Realtime Translator - English β Yoruba (V1)") | |
with gr.Tab("Speech Input"): | |
mic = gr.Audio(source="microphone", type="filepath", label="Speak Now") | |
transcription = gr.Textbox(label="Transcription") | |
translation = gr.Textbox(label="Translation") | |
mic.submit(translate_speech, inputs=mic, outputs=[transcription, translation]) | |
with gr.Tab("Text Input"): | |
input_text = gr.Textbox(label="Enter text") | |
translated_text = gr.Textbox(label="Translated text") | |
input_text.submit(translate_text, inputs=input_text, outputs=translated_text) | |
demo.launch() | |