andgrt commited on
Commit
cfcd1f4
·
1 Parent(s): 2b8da86

fix: whisper model params

Browse files
Files changed (1) hide show
  1. app.py +11 -1
app.py CHANGED
@@ -84,7 +84,17 @@ def transcribe(image, audio):
84
 
85
  y = y.astype(np.float32)
86
  y /= np.max(np.abs(y))
87
- return generate_answer(image, transcriber({"sampling_rate": sr, "raw": y})["text"])
 
 
 
 
 
 
 
 
 
 
88
 
89
 
90
  qa_interface = gr.Interface(
 
84
 
85
  y = y.astype(np.float32)
86
  y /= np.max(np.abs(y))
87
+
88
+ input_features = transcriber.feature_extractor(
89
+ y, sampling_rate=sr, return_tensors="pt"
90
+ ).input_features
91
+
92
+ transcription = transcriber.model.generate(input_features)
93
+ transcription_text = transcriber.tokenizer.decode(
94
+ transcription[0], skip_special_tokens=True
95
+ )
96
+
97
+ return generate_answer(image, transcription_text)
98
 
99
 
100
  qa_interface = gr.Interface(