Spaces:
Running
Running
update
Browse files
api.py
CHANGED
@@ -294,19 +294,24 @@ def transcribe_audio_gradio(audio, language="auto"):
|
|
294 |
return "请上传音频文件"
|
295 |
|
296 |
# 读取音频数据
|
297 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
298 |
|
299 |
# 转换为单声道
|
300 |
-
if
|
301 |
-
|
302 |
-
|
303 |
-
# 转换为numpy array并归一化
|
304 |
-
input_wav = waveform.numpy().astype(np.float32)
|
305 |
|
306 |
# 重采样到16kHz
|
307 |
-
if
|
308 |
-
resampler = torchaudio.transforms.Resample(
|
309 |
-
|
|
|
310 |
|
311 |
# 模型推理
|
312 |
text = model.generate(
|
|
|
294 |
return "请上传音频文件"
|
295 |
|
296 |
# 读取音频数据
|
297 |
+
fs, input_wav = audio
|
298 |
+
|
299 |
+
print('------------------------------')
|
300 |
+
print(fs, type(fs))
|
301 |
+
print(input_wav, type(input_wav))
|
302 |
+
print('------------------------------')
|
303 |
+
|
304 |
+
input_wav = input_wav.astype(np.float32) / np.iinfo(np.int16).max
|
305 |
|
306 |
# 转换为单声道
|
307 |
+
if len(input_wav.shape) > 1:
|
308 |
+
input_wav = input_wav.mean(-1)
|
|
|
|
|
|
|
309 |
|
310 |
# 重采样到16kHz
|
311 |
+
if fs != 16000:
|
312 |
+
resampler = torchaudio.transforms.Resample(fs, 16000)
|
313 |
+
input_wav_t = torch.from_numpy(input_wav).to(torch.float32)
|
314 |
+
input_wav = resampler(input_wav_t[None, :])[0, :].numpy()
|
315 |
|
316 |
# 模型推理
|
317 |
text = model.generate(
|