anzorq commited on
Commit
492c47b
Β·
1 Parent(s): 2bf0c14

+ moel size selection

Browse files
Files changed (1) hide show
  1. app.py +11 -3
app.py CHANGED
@@ -4,12 +4,17 @@ import whisper
4
  from whisper import tokenizer
5
  import time
6
 
7
- model = whisper.load_model("base")
 
8
  AUTO_DETECT_LANG = "Auto Detect"
9
 
10
- def transcribe(audio, state={}, delay=1.2, lang=None, translate=False):
11
  time.sleep(delay - 1)
12
 
 
 
 
 
13
  transcription = model.transcribe(
14
  audio,
15
  language = lang if lang != AUTO_DETECT_LANG else None
@@ -27,11 +32,13 @@ def transcribe(audio, state={}, delay=1.2, lang=None, translate=False):
27
  state['translation'] += translation.text + " "
28
 
29
  return state['transcription'], state['translation'], state, f"detected language: {transcription['language']}"
30
-
31
 
32
  title = "OpenAI's Whisper Real-time Demo"
33
  description = "A simple demo of OpenAI's [**Whisper**](https://github.com/openai/whisper) speech recognition model."
34
 
 
 
35
  delay_slider = gr.inputs.Slider(minimum=1, maximum=5, default=1.2, label="Rate of transcription")
36
 
37
  available_languages = sorted(tokenizer.TO_LANGUAGE_CODE.keys())
@@ -58,6 +65,7 @@ gr.Interface(
58
  inputs=[
59
  gr.Audio(source="microphone", type="filepath", streaming=True),
60
  state,
 
61
  delay_slider,
62
  lang_dropdown,
63
  translate_checkbox
 
4
  from whisper import tokenizer
5
  import time
6
 
7
+ current_size = 'base'
8
+ model = whisper.load_model(current_size)
9
  AUTO_DETECT_LANG = "Auto Detect"
10
 
11
+ def transcribe(audio, state={}, model_size='base', delay=1.2, lang=None, translate=False):
12
  time.sleep(delay - 1)
13
 
14
+ if model_size != current_size:
15
+ model = whisper.load_model(model_size)
16
+ current_size = model_size
17
+
18
  transcription = model.transcribe(
19
  audio,
20
  language = lang if lang != AUTO_DETECT_LANG else None
 
32
  state['translation'] += translation.text + " "
33
 
34
  return state['transcription'], state['translation'], state, f"detected language: {transcription['language']}"
35
+
36
 
37
  title = "OpenAI's Whisper Real-time Demo"
38
  description = "A simple demo of OpenAI's [**Whisper**](https://github.com/openai/whisper) speech recognition model."
39
 
40
+ model_size = gr.Dropdown(label="Model size", choices=['base', 'tiny', 'small', 'medium', 'large'], value='base')
41
+
42
  delay_slider = gr.inputs.Slider(minimum=1, maximum=5, default=1.2, label="Rate of transcription")
43
 
44
  available_languages = sorted(tokenizer.TO_LANGUAGE_CODE.keys())
 
65
  inputs=[
66
  gr.Audio(source="microphone", type="filepath", streaming=True),
67
  state,
68
+ model_size,
69
  delay_slider,
70
  lang_dropdown,
71
  translate_checkbox