DebasishDhal99 commited on
Commit
1571261
·
1 Parent(s): 5d4c1da

Add model size options for whisper

Browse files
Files changed (2) hide show
  1. app.py +6 -1
  2. backend/audio_to_tgt.py +2 -2
app.py CHANGED
@@ -45,7 +45,12 @@ description_audio = "Upload an audio file to extract text and translate it to En
45
 
46
  audio_interface = gr.Interface(
47
  fn=src_audio_to_eng_translator,
48
- inputs=gr.Audio(label="Upload an Audio file", type="filepath"),
 
 
 
 
 
49
  outputs=gr.Textbox(label="Translated Text in English"),
50
  title=heading_audio,
51
  description=description_audio
 
45
 
46
  audio_interface = gr.Interface(
47
  fn=src_audio_to_eng_translator,
48
+ inputs=[gr.Audio(label="Upload an Audio file", type="filepath"),
49
+ gr.Dropdown(
50
+ choices=["turbo", "base", "tiny", "small", "medium", "large"],
51
+ label="Select Whisper Model size",
52
+ )
53
+ ],
54
  outputs=gr.Textbox(label="Translated Text in English"),
55
  title=heading_audio,
56
  description=description_audio
backend/audio_to_tgt.py CHANGED
@@ -10,10 +10,10 @@ def audio_to_numpy(audio_file_input):
10
 
11
  return samples / np.iinfo(audio.array_type).max
12
 
13
- def src_audio_to_eng_translator(audio_file_input):
14
  audio_data = audio_to_numpy(audio_file_input)
15
 
16
- model = whisper.load_model("turbo")
17
  result = model.transcribe(audio_data)
18
 
19
  translated_text = GoogleTranslator(source='auto', target='en').translate(result["text"])
 
10
 
11
  return samples / np.iinfo(audio.array_type).max
12
 
13
+ def src_audio_to_eng_translator(audio_file_input, model_size = "turbo"):
14
  audio_data = audio_to_numpy(audio_file_input)
15
 
16
+ model = whisper.load_model(model_size)
17
  result = model.transcribe(audio_data)
18
 
19
  translated_text = GoogleTranslator(source='auto', target='en').translate(result["text"])