Spaces:
Runtime error
Runtime error
File size: 1,701 Bytes
da5250a 05fd694 13b10f1 d7dfa49 292172d bda48ea 726d965 292172d 726d965 05fd694 d7dfa49 292172d 05fd694 292172d d7dfa49 726d965 292172d 05fd694 292172d 05fd694 292172d 05fd694 292172d 05fd694 292172d 6cfff67 0856a96 b2604a4 05fd694 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import os
import gradio as gr
import whisper
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from gtts import gTTS
# Load Whisper STT model
whisper_model = whisper.load_model("base")
# Load translation models
tokenizer = AutoTokenizer.from_pretrained("alirezamsh/small100")
model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
def translate_speech(audio):
audio = audio[0]
audio = whisper.pad_or_trim(audio)
mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
_, probs = whisper_model.detect_language(mel)
options = whisper.DecodingOptions(fp16=False)
result = whisper.decode(whisper_model, mel, options)
text = result.text
# Translate text
tokenizer.src_lang = 'en' # Assuming the input is always in English
encoded_text = tokenizer(text, return_tensors="pt")
generated_tokens = model.generate(**encoded_text)
translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
# Text-to-speech (TTS)
tts = gTTS(text=translated_text, lang='en') # Assuming the target language is English
audio_path = "translated_audio.mp3"
tts.save(audio_path)
return audio_path
def translate_speech_interface(audio):
translated_audio = translate_speech(audio)
translated_audio_bytes = open(translated_audio, "rb").read()
return translated_audio_bytes
audio_recording = gr.inputs.Audio(source="microphone", type="numpy", label="Record your speech")
output_audio = gr.outputs.Audio(type="numpy", label="Translated Audio")
iface = gr.Interface(fn=translate_speech_interface, inputs=audio_recording, outputs=output_audio, title="Speech Translator")
iface.launch()
|