import speech_recognition as sr from transformers import pipeline import edge_tts import asyncio # Load Whisper model asr = pipeline("automatic-speech-recognition", model="openai/whisper-small") # Load translation model (Yoruba → English) from transformers import MarianMTModel, MarianTokenizer mt_model_name = "Helsinki-NLP/opus-mt-yo-en" tokenizer = MarianTokenizer.from_pretrained(mt_model_name) model = MarianMTModel.from_pretrained(mt_model_name) # TTS async def speak(text): communicate = edge_tts.Communicate(text, "en-US-GuyNeural") await communicate.save("output.mp3") def translate_text(text): inputs = tokenizer(text, return_tensors="pt", padding=True) translated = model.generate(**inputs) return tokenizer.decode(translated[0], skip_special_tokens=True) # Real-time mic input recognizer = sr.Recognizer() with sr.Microphone() as source: print("Speak now...") audio = recognizer.listen(source) print("Processing...") # Speech to Text result = asr(audio.get_wav_data())["text"] print("Transcribed:", result) # Translate translation = translate_text(result) print("Translated:", translation) # Speak asyncio.run(speak(translation))