iqbalc commited on
Commit
40ce253
·
1 Parent(s): 0b3cb65

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -8
app.py CHANGED
@@ -1,31 +1,32 @@
1
  import os
2
  os.system("pip install git+https://github.com/openai/whisper.git")
 
3
  import gradio as gr
4
  import whisper
5
  model = whisper.load_model("large")
6
 
7
  import time
 
8
  def transcribe(audio):
9
-
10
- #time.sleep(3)
11
- # load audio and pad/trim it to fit 30 seconds
12
  audio = whisper.load_audio(audio)
13
  audio = whisper.pad_or_trim(audio)
14
 
15
- # make log-Mel spectrogram and move to the same device as the model
16
  mel = whisper.log_mel_spectrogram(audio).to(model.device)
17
 
18
  # detect the spoken language
19
  _, probs = model.detect_language(mel)
20
  print(f"Detected language: {max(probs, key=probs.get)}")
21
 
22
- # decode the audio
23
- options = whisper.DecodingOptions()
24
  result = whisper.decode(model, mel, options)
 
25
  return result.text
26
-
27
  gr.Interface(
28
- title = 'Speech to text Demo',
29
  fn=transcribe,
30
  inputs=[
31
  gr.inputs.Audio(source="microphone", type="filepath")
 
1
  import os
2
  os.system("pip install git+https://github.com/openai/whisper.git")
3
+
4
  import gradio as gr
5
  import whisper
6
  model = whisper.load_model("large")
7
 
8
  import time
9
+
10
  def transcribe(audio):
11
+ # load audio for 30 seconds
 
 
12
  audio = whisper.load_audio(audio)
13
  audio = whisper.pad_or_trim(audio)
14
 
15
+ # make log-Mel spectrogram and move to device as the model
16
  mel = whisper.log_mel_spectrogram(audio).to(model.device)
17
 
18
  # detect the spoken language
19
  _, probs = model.detect_language(mel)
20
  print(f"Detected language: {max(probs, key=probs.get)}")
21
 
22
+ # decoding the audio
23
+ options = whisper.DecodingOptions(fp16 = False)
24
  result = whisper.decode(model, mel, options)
25
+ print(result.text)
26
  return result.text
27
+
28
  gr.Interface(
29
+ title = 'Speech to text Demo with OpenAI',
30
  fn=transcribe,
31
  inputs=[
32
  gr.inputs.Audio(source="microphone", type="filepath")