chinmaydan commited on
Commit
88413ab
·
1 Parent(s): 84718ed

troubleshooting the detect language part

Browse files
Files changed (1) hide show
  1. app.py +10 -2
app.py CHANGED
@@ -1,11 +1,14 @@
 
1
  import os
2
  os.system("pip install git+https://github.com/openai/whisper.git")
3
  import gradio as gr
4
  import whisper
5
 
 
6
  model = whisper.load_model("small")
7
 
8
 
 
9
  language_id_lookup = {
10
  "English" : "en",
11
  "German" : "de",
@@ -22,8 +25,13 @@ language_id_lookup = {
22
  }
23
 
24
 
 
 
 
 
25
  def predict(audio, language, mic_audio=None):
26
- # audio = tuple (sample_rate, frames) or (sample_rate, (frames, channels))
 
27
  if mic_audio is not None:
28
  input_audio = mic_audio
29
  elif audio is not None:
@@ -37,7 +45,7 @@ def predict(audio, language, mic_audio=None):
37
  mel = whisper.log_mel_spectrogram(audio).to(model.device)
38
 
39
  if(language == "Detect Language"):
40
- outLanguage, probs = model.detect_language(mel)
41
  print("Detected language is: " + outLanguage)
42
  else:
43
  outLanguage = language_id_lookup[language.split()[0]]
 
1
+ # imports
2
  import os
3
  os.system("pip install git+https://github.com/openai/whisper.git")
4
  import gradio as gr
5
  import whisper
6
 
7
+ # the model we are using for ASR, options are small, medium, large and largev2 (large and largev2 don't fit on huggingface cpu)
8
  model = whisper.load_model("small")
9
 
10
 
11
+ # A table to look up all the languages
12
  language_id_lookup = {
13
  "English" : "en",
14
  "German" : "de",
 
25
  }
26
 
27
 
28
+
29
+ # The predict function. audio, language and mic_audio are all parameters directly passed by gradio
30
+ # which means they are user inputted. They are specified in gr.inputs[] block at the bottom. The
31
+ # gr.outputs[] block will specify the output type.
32
  def predict(audio, language, mic_audio=None):
33
+
34
+ # checks if mic_audio is used, otherwise feeds model uploaded audio
35
  if mic_audio is not None:
36
  input_audio = mic_audio
37
  elif audio is not None:
 
45
  mel = whisper.log_mel_spectrogram(audio).to(model.device)
46
 
47
  if(language == "Detect Language"):
48
+ outLanguage, probs = model._detect_language(mel)
49
  print("Detected language is: " + outLanguage)
50
  else:
51
  outLanguage = language_id_lookup[language.split()[0]]