hushell commited on
Commit
4064286
·
1 Parent(s): 38d8edb

refine recording

Browse files
Files changed (1) hide show
  1. app.py +18 -4
app.py CHANGED
@@ -22,10 +22,7 @@ WHISPER_MODEL.to(device)
22
  def transcribe(aud_inp):
23
  if aud_inp is None:
24
  return ""
25
- if isinstance(aud_inp, str):
26
- aud = whisper.load_audio(aud_inp)
27
- elif isinstance(aud_inp, bytes): # if st_audiorec
28
- aud = np.frombuffer(aud_inp, dtype=np.uint8).flatten().astype(np.float32) / 255.0
29
  aud = whisper.pad_or_trim(aud)
30
  mel = whisper.log_mel_spectrogram(aud).to(device)
31
  _, probs = WHISPER_MODEL.detect_language(mel)
@@ -43,6 +40,22 @@ def transcribe(aud_inp):
43
  return result_text
44
 
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  st.set_page_config(page_title='ChatGPT Assistant', layout='wide', page_icon='🤖')
47
  # 自定义元素样式
48
  st.markdown(css_code, unsafe_allow_html=True)
@@ -232,6 +245,7 @@ with tap_input:
232
  wav_audio_data = st_audiorec()
233
  if wav_audio_data is not None:
234
  st.audio(wav_audio_data, format='audio/wav')
 
235
  user_input = transcribe(wav_audio_data)
236
 
237
  if submitted or wav_audio_data is not None:
 
22
  def transcribe(aud_inp):
23
  if aud_inp is None:
24
  return ""
25
+ aud = whisper.load_audio(aud_inp)
 
 
 
26
  aud = whisper.pad_or_trim(aud)
27
  mel = whisper.log_mel_spectrogram(aud).to(device)
28
  _, probs = WHISPER_MODEL.detect_language(mel)
 
40
  return result_text
41
 
42
 
43
+ def bytes_to_wav(wav_bytes, output_wav_file = "output.wav",
44
+ sample_width = 1, # 1 byte per sample
45
+ sample_rate = 44100, # Sample rate in Hz
46
+ num_channels = 1, # Mono audio
47
+ ):
48
+ # Create a WAV file in pcm_s16le format using the wave module
49
+ with wave.open(output_wav_file, 'wb') as wav_file:
50
+ wav_file.setnchannels(num_channels)
51
+ wav_file.setsampwidth(sample_width)
52
+ wav_file.setframerate(sample_rate)
53
+ wav_file.setcomptype('NONE', 'not compressed')
54
+ # Write the audio data from wav_bytes to the WAV file
55
+ wav_file.writeframes(wav_bytes)
56
+
57
+
58
+ ######################################################################################
59
  st.set_page_config(page_title='ChatGPT Assistant', layout='wide', page_icon='🤖')
60
  # 自定义元素样式
61
  st.markdown(css_code, unsafe_allow_html=True)
 
245
  wav_audio_data = st_audiorec()
246
  if wav_audio_data is not None:
247
  st.audio(wav_audio_data, format='audio/wav')
248
+ bytes_to_wav(wav_audio_data, sample_rate=16000)
249
  user_input = transcribe(wav_audio_data)
250
 
251
  if submitted or wav_audio_data is not None: