Spaces:
Runtime error
Runtime error
refine recording
Browse files
app.py
CHANGED
@@ -22,10 +22,7 @@ WHISPER_MODEL.to(device)
|
|
22 |
def transcribe(aud_inp):
|
23 |
if aud_inp is None:
|
24 |
return ""
|
25 |
-
|
26 |
-
aud = whisper.load_audio(aud_inp)
|
27 |
-
elif isinstance(aud_inp, bytes): # if st_audiorec
|
28 |
-
aud = np.frombuffer(aud_inp, dtype=np.uint8).flatten().astype(np.float32) / 255.0
|
29 |
aud = whisper.pad_or_trim(aud)
|
30 |
mel = whisper.log_mel_spectrogram(aud).to(device)
|
31 |
_, probs = WHISPER_MODEL.detect_language(mel)
|
@@ -43,6 +40,22 @@ def transcribe(aud_inp):
|
|
43 |
return result_text
|
44 |
|
45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
st.set_page_config(page_title='ChatGPT Assistant', layout='wide', page_icon='🤖')
|
47 |
# 自定义元素样式
|
48 |
st.markdown(css_code, unsafe_allow_html=True)
|
@@ -232,6 +245,7 @@ with tap_input:
|
|
232 |
wav_audio_data = st_audiorec()
|
233 |
if wav_audio_data is not None:
|
234 |
st.audio(wav_audio_data, format='audio/wav')
|
|
|
235 |
user_input = transcribe(wav_audio_data)
|
236 |
|
237 |
if submitted or wav_audio_data is not None:
|
|
|
22 |
def transcribe(aud_inp):
|
23 |
if aud_inp is None:
|
24 |
return ""
|
25 |
+
aud = whisper.load_audio(aud_inp)
|
|
|
|
|
|
|
26 |
aud = whisper.pad_or_trim(aud)
|
27 |
mel = whisper.log_mel_spectrogram(aud).to(device)
|
28 |
_, probs = WHISPER_MODEL.detect_language(mel)
|
|
|
40 |
return result_text
|
41 |
|
42 |
|
43 |
+
def bytes_to_wav(wav_bytes, output_wav_file = "output.wav",
|
44 |
+
sample_width = 1, # 1 byte per sample
|
45 |
+
sample_rate = 44100, # Sample rate in Hz
|
46 |
+
num_channels = 1, # Mono audio
|
47 |
+
):
|
48 |
+
# Create a WAV file in pcm_s16le format using the wave module
|
49 |
+
with wave.open(output_wav_file, 'wb') as wav_file:
|
50 |
+
wav_file.setnchannels(num_channels)
|
51 |
+
wav_file.setsampwidth(sample_width)
|
52 |
+
wav_file.setframerate(sample_rate)
|
53 |
+
wav_file.setcomptype('NONE', 'not compressed')
|
54 |
+
# Write the audio data from wav_bytes to the WAV file
|
55 |
+
wav_file.writeframes(wav_bytes)
|
56 |
+
|
57 |
+
|
58 |
+
######################################################################################
|
59 |
st.set_page_config(page_title='ChatGPT Assistant', layout='wide', page_icon='🤖')
|
60 |
# 自定义元素样式
|
61 |
st.markdown(css_code, unsafe_allow_html=True)
|
|
|
245 |
wav_audio_data = st_audiorec()
|
246 |
if wav_audio_data is not None:
|
247 |
st.audio(wav_audio_data, format='audio/wav')
|
248 |
+
bytes_to_wav(wav_audio_data, sample_rate=16000)
|
249 |
user_input = transcribe(wav_audio_data)
|
250 |
|
251 |
if submitted or wav_audio_data is not None:
|