Pranjal12345 commited on
Commit
5a4d77d
1 Parent(s): 8d609d2

Upload main.py

Browse files
Files changed (1) hide show
  1. main.py +67 -0
main.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
4
+ from utils import lang_ids
5
+ import nltk
6
+ nltk.download('punkt')
7
+
8
+ MODEL_NAME = "openai/whisper-medium"
9
+ BATCH_SIZE = 8
10
+ FILE_LIMIT_MB = 1000
11
+
12
+ pipe = pipeline(
13
+ task="automatic-speech-recognition",
14
+ model=MODEL_NAME,
15
+ chunk_length_s=30,
16
+ device='cpu',
17
+ )
18
+
19
+ lang_list = list(lang_ids.keys())
20
+
21
+ def translate_audio(inputs,target_language):
22
+ if inputs is None:
23
+ raise gr.Error("No audio file submitted! Please upload an audio file before submitting your request.")
24
+
25
+ text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": "translate"}, return_timestamps=True)["text"]
26
+
27
+ target_lang = lang_ids[target_language]
28
+
29
+ if target_language == 'English':
30
+ return text
31
+
32
+ else:
33
+ model = MBartForConditionalGeneration.from_pretrained("sanjitaa/mbart-many-to-many")
34
+ tokenizer = MBart50TokenizerFast.from_pretrained("sanjitaa/mbart-many-to-many")
35
+
36
+ tokenizer.src_lang = "en_XX"
37
+ chunks = nltk.tokenize.sent_tokenize(text)
38
+ translated_text = ''
39
+
40
+ for segment in chunks:
41
+ encoded_chunk = tokenizer(segment, return_tensors="pt")
42
+ generated_tokens = model.generate(
43
+
44
+ **encoded_chunk,
45
+ forced_bos_token_id=tokenizer.lang_code_to_id[target_lang]
46
+ )
47
+ translated_chunk = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
48
+ translated_text = translated_text + translated_chunk[0]
49
+ return translated_text
50
+
51
+ inputs=[
52
+ gr.inputs.Audio(source="upload", type="filepath", label="Audio file"),
53
+ gr.Dropdown(lang_list, value="English", label="Target Language"),
54
+ ]
55
+ description = "Audio translation"
56
+
57
+
58
+ translation_interface = gr.Interface(
59
+ fn=translate_audio,
60
+ inputs= inputs,
61
+ outputs="text",
62
+ title="Speech Translation",
63
+ description= description
64
+ )
65
+
66
+ if __name__ == "__main__":
67
+ translation_interface.launch()