Spaces:
Sleeping
Sleeping
File size: 2,122 Bytes
d1e1d4f 174a3fe d1e1d4f 7ce1960 174a3fe 33a55a6 174a3fe d1e1d4f 33a55a6 d1e1d4f 174a3fe d1e1d4f 174a3fe d1e1d4f 33a55a6 d1e1d4f 174a3fe d1e1d4f 174a3fe 4c1a6a6 174a3fe d1e1d4f 174a3fe d1e1d4f 174a3fe d1e1d4f 9576bde |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import gradio as gr
from transformers import pipeline
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
from utils import lang_ids
MODEL_NAME = "Pranjal12345/pranjal_whisper_medium"
BATCH_SIZE = 8
model = MBartForConditionalGeneration.from_pretrained("sanjitaa/mbart-many-to-many")
tokenizer = MBart50TokenizerFast.from_pretrained("sanjitaa/mbart-many-to-many")
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device='cpu',
)
lang_list = list(lang_ids.keys())
def split_into_sentences(text):
sentences = text.replace('?', '.').replace('!', '.').split('.')
return [sentence.strip() for sentence in sentences if sentence]
def translate_audio(inputs,target_language):
if inputs is None:
raise gr.Error("No audio file submitted! Please upload an audio file before submitting your request.")
text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": "translate"}, return_timestamps=True)["text"]
target_lang = lang_ids[target_language]
if target_language == 'English':
return text
else:
tokenizer.src_lang = "en_XX"
chunks = split_into_sentences(text)
translated_text = ''
for segment in chunks:
encoded_chunk = tokenizer(segment, return_tensors="pt")
generated_tokens = model.generate(
**encoded_chunk,
forced_bos_token_id=tokenizer.lang_code_to_id[target_lang]
)
translated_chunk = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
translated_text = translated_text + translated_chunk[0]
return translated_text
inputs=[
gr.Audio(label="Audio file"),
gr.Dropdown(lang_list, value="English", label="Target Language"),
]
description = "Audio translation"
translation_interface = gr.Interface(
fn=translate_audio,
inputs= inputs,
outputs="text",
title="Speech Translation",
description= description
)
translation_interface.launch() |