Commit
·
1571261
1
Parent(s):
5d4c1da
Add model size options for whisper
Browse files- app.py +6 -1
- backend/audio_to_tgt.py +2 -2
app.py
CHANGED
@@ -45,7 +45,12 @@ description_audio = "Upload an audio file to extract text and translate it to En
|
|
45 |
|
46 |
audio_interface = gr.Interface(
|
47 |
fn=src_audio_to_eng_translator,
|
48 |
-
inputs=gr.Audio(label="Upload an Audio file", type="filepath"),
|
|
|
|
|
|
|
|
|
|
|
49 |
outputs=gr.Textbox(label="Translated Text in English"),
|
50 |
title=heading_audio,
|
51 |
description=description_audio
|
|
|
45 |
|
46 |
audio_interface = gr.Interface(
|
47 |
fn=src_audio_to_eng_translator,
|
48 |
+
inputs=[gr.Audio(label="Upload an Audio file", type="filepath"),
|
49 |
+
gr.Dropdown(
|
50 |
+
choices=["turbo", "base", "tiny", "small", "medium", "large"],
|
51 |
+
label="Select Whisper Model size",
|
52 |
+
)
|
53 |
+
],
|
54 |
outputs=gr.Textbox(label="Translated Text in English"),
|
55 |
title=heading_audio,
|
56 |
description=description_audio
|
backend/audio_to_tgt.py
CHANGED
@@ -10,10 +10,10 @@ def audio_to_numpy(audio_file_input):
|
|
10 |
|
11 |
return samples / np.iinfo(audio.array_type).max
|
12 |
|
13 |
-
def src_audio_to_eng_translator(audio_file_input):
|
14 |
audio_data = audio_to_numpy(audio_file_input)
|
15 |
|
16 |
-
model = whisper.load_model(
|
17 |
result = model.transcribe(audio_data)
|
18 |
|
19 |
translated_text = GoogleTranslator(source='auto', target='en').translate(result["text"])
|
|
|
10 |
|
11 |
return samples / np.iinfo(audio.array_type).max
|
12 |
|
13 |
+
def src_audio_to_eng_translator(audio_file_input, model_size = "turbo"):
|
14 |
audio_data = audio_to_numpy(audio_file_input)
|
15 |
|
16 |
+
model = whisper.load_model(model_size)
|
17 |
result = model.transcribe(audio_data)
|
18 |
|
19 |
translated_text = GoogleTranslator(source='auto', target='en').translate(result["text"])
|