anzorq commited on
Commit
9b33f31
·
1 Parent(s): d411901

Update app.py

Browse files

load lang list dynamically from tokenizer

Files changed (1) hide show
  1. app.py +9 -37
app.py CHANGED
@@ -1,9 +1,11 @@
1
  import os
2
  import gradio as gr
3
  import whisper
 
4
  import time
5
 
6
  model = whisper.load_model("base")
 
7
 
8
  def transcribe(audio, state={}, delay=0.2, lang=None, translate=False):
9
  time.sleep(delay)
@@ -31,43 +33,13 @@ title = "OpenAI's Whisper Real-time Demo"
31
  description = "A simple demo of OpenAI's [**Whisper**](https://github.com/openai/whisper) speech recognition model."
32
 
33
  delay_slider = gr.inputs.Slider(minimum=0, maximum=5, default=0.2, label="Rate of transcription (1 sec + this value)")
34
- lang_dropdown = gr.inputs.Dropdown(choices=["auto", "english", "afrikaans",
35
- "albanian", "amharic", "arabic",
36
- "armenian", "assamese", "azerbaijani",
37
- "bashkir", "basque", "belarusian",
38
- "bengali", "bosnian", "breton",
39
- "bulgarian", "catalan", "chinese",
40
- "croatian", "czech", "danish",
41
- "dutch", "estonian", "faroese",
42
- "finnish", "french", "galician",
43
- "georgian", "german", "greek",
44
- "gujarati", "haitian creole", "hausa",
45
- "hawaiian", "hebrew", "hindi",
46
- "hungarian", "icelandic", "indonesian",
47
- "italian", "japanese", "javanese",
48
- "kannada", "kazakh", "khmer",
49
- "korean", "kyrgyz", "lao",
50
- "latin", "latvian", "lingala",
51
- "lithuanian", "luxembourgish", "macedonian",
52
- "malagasy", "malay", "malayalam",
53
- "maltese", "maori", "marathi",
54
- "mongolian", "myanmar", "nepali",
55
- "norwegian", "nyanja", "nynorsk",
56
- "occitan", "oriya", "pashto",
57
- "persian", "polish", "portuguese",
58
- "punjabi", "romanian", "russian",
59
- "sanskrit", "sardinian", "serbian",
60
- "shona", "sindhi", "sinhala",
61
- "slovak", "slovenian", "somali",
62
- "spanish", "sundanese", "swahili",
63
- "swedish", "tagalog", "tajik",
64
- "tamil", "tatar", "telugu",
65
- "thai", "tigrinya", "tibetan",
66
- "turkish", "turkmen", "ukrainian",
67
- "urdu", "uzbek", "vietnamese",
68
- "welsh", "xhosa", "yiddish",
69
- "yoruba"],
70
- label="Language", default="auto", type="value")
71
 
72
  translate_checkbox = gr.inputs.Checkbox(label="Translate to English", default=False)
73
 
 
1
  import os
2
  import gradio as gr
3
  import whisper
4
+ from whisper import tokenizer
5
  import time
6
 
7
  model = whisper.load_model("base")
8
+ AUTO_DETECT_LANG = "Auto Detect"
9
 
10
  def transcribe(audio, state={}, delay=0.2, lang=None, translate=False):
11
  time.sleep(delay)
 
33
  description = "A simple demo of OpenAI's [**Whisper**](https://github.com/openai/whisper) speech recognition model."
34
 
35
  delay_slider = gr.inputs.Slider(minimum=0, maximum=5, default=0.2, label="Rate of transcription (1 sec + this value)")
36
+
37
+ available_languages = sorted(tokenizer.TO_LANGUAGE_CODE.keys())
38
+ available_languages = [AUTO_DETECT_LANG]+available_languages
39
+ lang_dropdown = gr.inputs.Dropdown(choices=available_languages, label="Language", default=AUTO_DETECT_LANG, type="value")
40
+
41
+ if lang_dropdown==AUTO_DETECT_LANG:
42
+ lang_dropdown=None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  translate_checkbox = gr.inputs.Checkbox(label="Translate to English", default=False)
45