hushell commited on
Commit
a2e5112
·
1 Parent(s): 9f77152
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -26,6 +26,7 @@ def transcribe(aud_inp):
26
  aud = whisper.pad_or_trim(aud)
27
  mel = whisper.log_mel_spectrogram(aud).to(device)
28
  _, probs = WHISPER_MODEL.detect_language(mel)
 
29
 
30
  if device == "cpu":
31
  options = whisper.DecodingOptions(fp16 = False, language=WHISPER_LANG)
@@ -241,8 +242,8 @@ with tap_input:
241
  wav_audio_data = st_audiorec()
242
  if wav_audio_data is not None:
243
  st.audio(wav_audio_data, format='audio/wav')
244
- bytes_to_wav(wav_audio_data, sample_rate=16000)
245
- user_input = transcribe(wav_audio_data)
246
 
247
  if submitted or wav_audio_data is not None:
248
  st.session_state['user_input_content'] = user_input
 
26
  aud = whisper.pad_or_trim(aud)
27
  mel = whisper.log_mel_spectrogram(aud).to(device)
28
  _, probs = WHISPER_MODEL.detect_language(mel)
29
+ print(f"spectrogram.shape = {mel}")
30
 
31
  if device == "cpu":
32
  options = whisper.DecodingOptions(fp16 = False, language=WHISPER_LANG)
 
242
  wav_audio_data = st_audiorec()
243
  if wav_audio_data is not None:
244
  st.audio(wav_audio_data, format='audio/wav')
245
+ bytes_to_wav(wav_audio_data, output_wav_file='output.wav', sample_rate=16000)
246
+ user_input = transcribe('output.wav')
247
 
248
  if submitted or wav_audio_data is not None:
249
  st.session_state['user_input_content'] = user_input