Spaces:
Runtime error
Runtime error
+ moel size selection
Browse files
app.py
CHANGED
@@ -4,12 +4,17 @@ import whisper
|
|
4 |
from whisper import tokenizer
|
5 |
import time
|
6 |
|
7 |
-
|
|
|
8 |
AUTO_DETECT_LANG = "Auto Detect"
|
9 |
|
10 |
-
def transcribe(audio, state={}, delay=1.2, lang=None, translate=False):
|
11 |
time.sleep(delay - 1)
|
12 |
|
|
|
|
|
|
|
|
|
13 |
transcription = model.transcribe(
|
14 |
audio,
|
15 |
language = lang if lang != AUTO_DETECT_LANG else None
|
@@ -27,11 +32,13 @@ def transcribe(audio, state={}, delay=1.2, lang=None, translate=False):
|
|
27 |
state['translation'] += translation.text + " "
|
28 |
|
29 |
return state['transcription'], state['translation'], state, f"detected language: {transcription['language']}"
|
30 |
-
|
31 |
|
32 |
title = "OpenAI's Whisper Real-time Demo"
|
33 |
description = "A simple demo of OpenAI's [**Whisper**](https://github.com/openai/whisper) speech recognition model."
|
34 |
|
|
|
|
|
35 |
delay_slider = gr.inputs.Slider(minimum=1, maximum=5, default=1.2, label="Rate of transcription")
|
36 |
|
37 |
available_languages = sorted(tokenizer.TO_LANGUAGE_CODE.keys())
|
@@ -58,6 +65,7 @@ gr.Interface(
|
|
58 |
inputs=[
|
59 |
gr.Audio(source="microphone", type="filepath", streaming=True),
|
60 |
state,
|
|
|
61 |
delay_slider,
|
62 |
lang_dropdown,
|
63 |
translate_checkbox
|
|
|
4 |
from whisper import tokenizer
|
5 |
import time
|
6 |
|
7 |
+
current_size = 'base'
|
8 |
+
model = whisper.load_model(current_size)
|
9 |
AUTO_DETECT_LANG = "Auto Detect"
|
10 |
|
11 |
+
def transcribe(audio, state={}, model_size='base', delay=1.2, lang=None, translate=False):
|
12 |
time.sleep(delay - 1)
|
13 |
|
14 |
+
if model_size != current_size:
|
15 |
+
model = whisper.load_model(model_size)
|
16 |
+
current_size = model_size
|
17 |
+
|
18 |
transcription = model.transcribe(
|
19 |
audio,
|
20 |
language = lang if lang != AUTO_DETECT_LANG else None
|
|
|
32 |
state['translation'] += translation.text + " "
|
33 |
|
34 |
return state['transcription'], state['translation'], state, f"detected language: {transcription['language']}"
|
35 |
+
|
36 |
|
37 |
title = "OpenAI's Whisper Real-time Demo"
|
38 |
description = "A simple demo of OpenAI's [**Whisper**](https://github.com/openai/whisper) speech recognition model."
|
39 |
|
40 |
+
model_size = gr.Dropdown(label="Model size", choices=['base', 'tiny', 'small', 'medium', 'large'], value='base')
|
41 |
+
|
42 |
delay_slider = gr.inputs.Slider(minimum=1, maximum=5, default=1.2, label="Rate of transcription")
|
43 |
|
44 |
available_languages = sorted(tokenizer.TO_LANGUAGE_CODE.keys())
|
|
|
65 |
inputs=[
|
66 |
gr.Audio(source="microphone", type="filepath", streaming=True),
|
67 |
state,
|
68 |
+
model_size,
|
69 |
delay_slider,
|
70 |
lang_dropdown,
|
71 |
translate_checkbox
|