Spaces:
Runtime error
Runtime error
Commit
·
b9553d2
1
Parent(s):
bda48ea
Update app.py
Browse files
app.py
CHANGED
@@ -11,9 +11,9 @@ whisper_model = whisper.load_model("base")
|
|
11 |
tokenizer = AutoTokenizer.from_pretrained("alirezamsh/small100")
|
12 |
model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
|
13 |
|
14 |
-
def translate_speech(audio):
|
15 |
-
audio = audio
|
16 |
-
audio = whisper.pad_or_trim(audio)
|
17 |
mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
|
18 |
_, probs = whisper_model.detect_language(mel)
|
19 |
options = whisper.DecodingOptions(fp16=False)
|
@@ -21,26 +21,30 @@ def translate_speech(audio):
|
|
21 |
text = result.text
|
22 |
|
23 |
# Translate text
|
24 |
-
tokenizer.src_lang =
|
25 |
encoded_text = tokenizer(text, return_tensors="pt")
|
26 |
generated_tokens = model.generate(**encoded_text)
|
27 |
translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
|
28 |
|
29 |
# Text-to-speech (TTS)
|
30 |
-
tts = gTTS(text=translated_text, lang=
|
31 |
audio_path = "translated_audio.mp3"
|
32 |
tts.save(audio_path)
|
33 |
|
34 |
return audio_path
|
35 |
|
36 |
-
def translate_speech_interface(audio):
|
37 |
-
translated_audio = translate_speech(audio)
|
38 |
translated_audio_bytes = open(translated_audio, "rb").read()
|
39 |
|
40 |
return translated_audio_bytes
|
41 |
|
42 |
audio_recording = gr.inputs.Audio(source="microphone", type="numpy", label="Record your speech")
|
|
|
|
|
43 |
output_audio = gr.outputs.Audio(type="numpy", label="Translated Audio")
|
44 |
|
45 |
-
iface = gr.Interface(fn=translate_speech_interface, inputs=audio_recording, outputs=output_audio, title="Speech Translator")
|
46 |
iface.launch()
|
|
|
|
|
|
11 |
tokenizer = AutoTokenizer.from_pretrained("alirezamsh/small100")
|
12 |
model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
|
13 |
|
14 |
+
def translate_speech(audio, target_lang):
|
15 |
+
audio = audio.astype("float32")
|
16 |
+
audio = whisper.pad_or_trim(audio, whisper_model.audio_config.sample_rate)
|
17 |
mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
|
18 |
_, probs = whisper_model.detect_language(mel)
|
19 |
options = whisper.DecodingOptions(fp16=False)
|
|
|
21 |
text = result.text
|
22 |
|
23 |
# Translate text
|
24 |
+
tokenizer.src_lang = target_lang # Assuming the input is always in English
|
25 |
encoded_text = tokenizer(text, return_tensors="pt")
|
26 |
generated_tokens = model.generate(**encoded_text)
|
27 |
translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
|
28 |
|
29 |
# Text-to-speech (TTS)
|
30 |
+
tts = gTTS(text=translated_text, lang=target_lang)
|
31 |
audio_path = "translated_audio.mp3"
|
32 |
tts.save(audio_path)
|
33 |
|
34 |
return audio_path
|
35 |
|
36 |
+
def translate_speech_interface(audio, target_lang):
|
37 |
+
translated_audio = translate_speech(audio, target_lang)
|
38 |
translated_audio_bytes = open(translated_audio, "rb").read()
|
39 |
|
40 |
return translated_audio_bytes
|
41 |
|
42 |
audio_recording = gr.inputs.Audio(source="microphone", type="numpy", label="Record your speech")
|
43 |
+
lang_choices = ["ru", "fr", "en", "de"]
|
44 |
+
lang_dropdown = gr.inputs.Dropdown(lang_choices, label="Select Language to Translate")
|
45 |
output_audio = gr.outputs.Audio(type="numpy", label="Translated Audio")
|
46 |
|
47 |
+
iface = gr.Interface(fn=translate_speech_interface, inputs=[audio_recording, lang_dropdown], outputs=output_audio, title="Speech Translator")
|
48 |
iface.launch()
|
49 |
+
|
50 |
+
|