test_gradio / app.py
Pranjal12345's picture
Update app.py
8d609d2
raw
history blame
2.04 kB
import gradio as gr
from faster_whisper import WhisperModel
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
from utils import lang_ids
model_size = "medium"
ts_model = WhisperModel(model_size, device = "cpu", compute_type = "int8")
lang_list = list(lang_ids.keys())
def translate_audio(inputs,target_language):
if inputs is None:
raise gr.Error("No audio file submitted! Please upload an audio file before submitting your request.")
segments, _ = ts_model.transcribe(inputs, task="translate")
target_lang = lang_ids[target_language]
if target_language == 'English':
lst = ''
for segment in segments:
lst = lst + segment.text
return lst
else:
model = MBartForConditionalGeneration.from_pretrained("sanjitaa/mbart-many-to-many")
tokenizer = MBart50TokenizerFast.from_pretrained("sanjitaa/mbart-many-to-many")
tokenizer.src_lang = "en_XX"
translated_text = ''
for segment in segments:
encoded_chunk = tokenizer(segment.text, return_tensors="pt")
generated_tokens = model.generate(
**encoded_chunk,
forced_bos_token_id=tokenizer.lang_code_to_id[target_lang]
)
translated_chunk = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
translated_text = translated_text + translated_chunk[0]
return translated_text
translation_interface = gr.Interface(
fn=translate_audio,
inputs=[
gr.inputs.Audio(source="upload", type="filepath", label="Audio file"),
gr.Dropdown(lang_list, value="English", label="Target Language"),
],
outputs="text",
layout="horizontal",
theme="huggingface",
title="Translate Audio to English",
description=(
"Translate audio inputs to English using the"
),
allow_flagging="never",
)
if __name__ == "__main__":
translation_interface.launch()