s2s / app.py
frogcho123's picture
Update app.py
8ba1b29
raw
history blame
1.56 kB
import os
import gradio as gr
import soundfile as sf
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from gtts import gTTS
# Load the translation model
translation_tokenizer = AutoTokenizer.from_pretrained("alirezamsh/small100")
translation_model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
# Available target languages
available_languages = {
'Russian': 'ru',
'Spanish': 'es',
'English': 'en',
'Greek': 'gr'
}
# Function to translate the audio
def translate_audio(audio_file, target_language):
to_lang = available_languages[target_language]
# Load audio
audio, sample_rate = sf.read(audio_file.name)
# Translate the text
translation_tokenizer.src_lang = to_lang
encoded_bg = translation_tokenizer(audio, return_tensors="pt", padding=True, truncation=True)
generated_tokens = translation_model.generate(**encoded_bg)
translated_audio = translation_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
# Save translated audio
output_file = "translated_audio.wav"
sf.write(output_file, translated_audio, sample_rate)
return output_file
# Gradio interface
audio_input = gr.inputs.Audio(label="Upload audio file")
language_dropdown = gr.inputs.Dropdown(choices=list(available_languages.keys()), label="Select Target Language")
audio_output = gr.outputs.Audio(label="Translated audio file")
iface = gr.Interface(fn=translate_audio, inputs=[audio_input, language_dropdown], outputs=audio_output, title="Audio Translation Demo")
iface.launch()