Spaces:
Running
Running
import speech_recognition as sr | |
from transformers import pipeline | |
import edge_tts | |
import asyncio | |
# Load Whisper model | |
asr = pipeline("automatic-speech-recognition", model="openai/whisper-small") | |
# Load translation model (Yoruba β English) | |
from transformers import MarianMTModel, MarianTokenizer | |
mt_model_name = "Helsinki-NLP/opus-mt-yo-en" | |
tokenizer = MarianTokenizer.from_pretrained(mt_model_name) | |
model = MarianMTModel.from_pretrained(mt_model_name) | |
# TTS | |
async def speak(text): | |
communicate = edge_tts.Communicate(text, "en-US-GuyNeural") | |
await communicate.save("output.mp3") | |
def translate_text(text): | |
inputs = tokenizer(text, return_tensors="pt", padding=True) | |
translated = model.generate(**inputs) | |
return tokenizer.decode(translated[0], skip_special_tokens=True) | |
# Real-time mic input | |
recognizer = sr.Recognizer() | |
with sr.Microphone() as source: | |
print("Speak now...") | |
audio = recognizer.listen(source) | |
print("Processing...") | |
# Speech to Text | |
result = asr(audio.get_wav_data())["text"] | |
print("Transcribed:", result) | |
# Translate | |
translation = translate_text(result) | |
print("Translated:", translation) | |
# Speak | |
asyncio.run(speak(translation)) | |