Spaces:
Build error
Build error
File size: 2,513 Bytes
8d7bec1 36bec1c ec72da9 bbee8bf 8d7bec1 bc7920f 8d7bec1 bbee8bf 8d7bec1 7f66b95 8d7bec1 7f66b95 8d7bec1 7f66b95 8d7bec1 7f66b95 8d7bec1 7f66b95 8d7bec1 7f66b95 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import os
import gradio as gr
import whisper
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from gtts import gTTS
import sentencepiece
import sounddevice as sd
import soundfile as sf
import tempfile
def translate_voice(audio, target_lang):
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
temp_filename = temp_audio.name
sf.write(temp_filename, audio, 16000)
model = whisper.load_model("base").float()
audio = whisper.load_audio(temp_filename)
audio = whisper.pad_or_trim(audio)
mel = whisper.log_mel_spectrogram(audio).to(model.device).float()
_, probs = model.detect_language(mel)
options = whisper.DecodingOptions(fp16=False)
result = whisper.decode(model, mel, options)
text = result.text
lang = max(probs, key=probs.get)
tokenizer = AutoTokenizer.from_pretrained("alirezamsh/small100")
model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
tokenizer.src_lang = target_lang
encoded_bg = tokenizer(text, return_tensors="pt")
generated_tokens = model.generate(**encoded_bg)
translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
tts = gTTS(text=translated_text, lang=target_lang)
filename = "to_speech.mp3"
tts.save(filename)
return filename, text, translated_text, target_lang
def toggle_record(button):
global is_recording
if button:
button.text = "Stop Recording"
is_recording = True
else:
button.text = "Start Recording"
is_recording = False
def record_audio():
global is_recording
fs = 16000
audio = []
while is_recording:
block = sd.rec(int(fs), samplerate=fs, channels=1)
audio.append(block)
audio = sd.playrec(audio, samplerate=fs, channels=1)
sd.wait()
return audio.flatten()
is_recording = False
iface = gr.Interface(
fn=translate_voice,
inputs=[
gr.inputs.Button(label="Start Recording", type="boolean", toggle=True, default=False, onclick=toggle_record),
gr.inputs.Dropdown(choices=['en', 'ru', 'de', 'fr'], label="Target Language")
],
outputs=[
gr.outputs.Audio(type="filepath", label="Translated Audio"),
gr.outputs.Textbox(label="Original Text"),
gr.outputs.Textbox(label="Translated Text"),
gr.outputs.Textbox(label="Target Language"),
]
)
iface.launch()
|