Pranjal12345 commited on
Commit
174a3fe
1 Parent(s): 5a4d77d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -21
app.py CHANGED
@@ -1,10 +1,20 @@
1
  import gradio as gr
2
- from faster_whisper import WhisperModel
3
  from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
4
  from utils import lang_ids
 
 
5
 
6
- model_size = "medium"
7
- ts_model = WhisperModel(model_size, device = "cpu", compute_type = "int8")
 
 
 
 
 
 
 
 
8
 
9
  lang_list = list(lang_ids.keys())
10
 
@@ -12,25 +22,23 @@ def translate_audio(inputs,target_language):
12
  if inputs is None:
13
  raise gr.Error("No audio file submitted! Please upload an audio file before submitting your request.")
14
 
15
- segments, _ = ts_model.transcribe(inputs, task="translate")
16
 
17
  target_lang = lang_ids[target_language]
18
 
19
  if target_language == 'English':
20
- lst = ''
21
- for segment in segments:
22
- lst = lst + segment.text
23
- return lst
24
 
25
  else:
26
  model = MBartForConditionalGeneration.from_pretrained("sanjitaa/mbart-many-to-many")
27
  tokenizer = MBart50TokenizerFast.from_pretrained("sanjitaa/mbart-many-to-many")
28
 
29
  tokenizer.src_lang = "en_XX"
 
30
  translated_text = ''
31
 
32
- for segment in segments:
33
- encoded_chunk = tokenizer(segment.text, return_tensors="pt")
34
  generated_tokens = model.generate(
35
 
36
  **encoded_chunk,
@@ -40,20 +48,19 @@ def translate_audio(inputs,target_language):
40
  translated_text = translated_text + translated_chunk[0]
41
  return translated_text
42
 
 
 
 
 
 
 
 
43
  translation_interface = gr.Interface(
44
  fn=translate_audio,
45
- inputs=[
46
- gr.inputs.Audio(source="upload", type="filepath", label="Audio file"),
47
- gr.Dropdown(lang_list, value="English", label="Target Language"),
48
- ],
49
  outputs="text",
50
- layout="horizontal",
51
- theme="huggingface",
52
- title="Translate Audio to English",
53
- description=(
54
- "Translate audio inputs to English using the"
55
- ),
56
- allow_flagging="never",
57
  )
58
 
59
  if __name__ == "__main__":
 
1
  import gradio as gr
2
+ from transformers import pipeline
3
  from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
4
  from utils import lang_ids
5
+ import nltk
6
+ nltk.download('punkt')
7
 
8
+ MODEL_NAME = "openai/whisper-medium"
9
+ BATCH_SIZE = 8
10
+ FILE_LIMIT_MB = 1000
11
+
12
+ pipe = pipeline(
13
+ task="automatic-speech-recognition",
14
+ model=MODEL_NAME,
15
+ chunk_length_s=30,
16
+ device='cpu',
17
+ )
18
 
19
  lang_list = list(lang_ids.keys())
20
 
 
22
  if inputs is None:
23
  raise gr.Error("No audio file submitted! Please upload an audio file before submitting your request.")
24
 
25
+ text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": "translate"}, return_timestamps=True)["text"]
26
 
27
  target_lang = lang_ids[target_language]
28
 
29
  if target_language == 'English':
30
+ return text
 
 
 
31
 
32
  else:
33
  model = MBartForConditionalGeneration.from_pretrained("sanjitaa/mbart-many-to-many")
34
  tokenizer = MBart50TokenizerFast.from_pretrained("sanjitaa/mbart-many-to-many")
35
 
36
  tokenizer.src_lang = "en_XX"
37
+ chunks = nltk.tokenize.sent_tokenize(text)
38
  translated_text = ''
39
 
40
+ for segment in chunks:
41
+ encoded_chunk = tokenizer(segment, return_tensors="pt")
42
  generated_tokens = model.generate(
43
 
44
  **encoded_chunk,
 
48
  translated_text = translated_text + translated_chunk[0]
49
  return translated_text
50
 
51
+ inputs=[
52
+ gr.inputs.Audio(source="upload", type="filepath", label="Audio file"),
53
+ gr.Dropdown(lang_list, value="English", label="Target Language"),
54
+ ]
55
+ description = "Audio translation"
56
+
57
+
58
  translation_interface = gr.Interface(
59
  fn=translate_audio,
60
+ inputs= inputs,
 
 
 
61
  outputs="text",
62
+ title="Speech Translation",
63
+ description= description
 
 
 
 
 
64
  )
65
 
66
  if __name__ == "__main__":