ubiodee's picture
Upload 3 files
c410fd5 verified
raw
history blame
1.7 kB
import gradio as gr
from transformers import pipeline, MarianMTModel, MarianTokenizer
import edge_tts
import asyncio
# Load Whisper for English ASR
asr_en = pipeline("automatic-speech-recognition", model="openai/whisper-base")
# Load MarianMT for Yoruba-English (can switch for Igbo/Hausa)
mt_model_name = "Helsinki-NLP/opus-mt-yo-en"
tokenizer = MarianTokenizer.from_pretrained(mt_model_name)
model = MarianMTModel.from_pretrained(mt_model_name)
def translate_speech(audio):
transcription = asr_en(audio)["text"]
inputs = tokenizer(transcription, return_tensors="pt", padding=True)
translated_tokens = model.generate(**inputs)
translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
return transcription, translated_text
def translate_text(text):
inputs = tokenizer(text, return_tensors="pt", padding=True)
translated_tokens = model.generate(**inputs)
translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
return translated_text
with gr.Blocks() as demo:
gr.Markdown("### Multilingual Realtime Translator - English ↔ Yoruba (V1)")
with gr.Tab("Speech Input"):
mic = gr.Audio(source="microphone", type="filepath", label="Speak Now")
transcription = gr.Textbox(label="Transcription")
translation = gr.Textbox(label="Translation")
mic.submit(translate_speech, inputs=mic, outputs=[transcription, translation])
with gr.Tab("Text Input"):
input_text = gr.Textbox(label="Enter text")
translated_text = gr.Textbox(label="Translated text")
input_text.submit(translate_text, inputs=input_text, outputs=translated_text)
demo.launch()