megatrump commited on
Commit
3b8b027
·
1 Parent(s): 51e0a26
Files changed (1) hide show
  1. api.py +14 -9
api.py CHANGED
@@ -294,19 +294,24 @@ def transcribe_audio_gradio(audio, language="auto"):
294
  return "请上传音频文件"
295
 
296
  # 读取音频数据
297
- waveform, sample_rate = audio
 
 
 
 
 
 
 
298
 
299
  # 转换为单声道
300
- if waveform.shape[0] > 1:
301
- waveform = waveform.mean(dim=0)
302
-
303
- # 转换为numpy array并归一化
304
- input_wav = waveform.numpy().astype(np.float32)
305
 
306
  # 重采样到16kHz
307
- if sample_rate != 16000:
308
- resampler = torchaudio.transforms.Resample(sample_rate, 16000)
309
- input_wav = resampler(torch.from_numpy(input_wav)[None, :])[0, :].numpy()
 
310
 
311
  # 模型推理
312
  text = model.generate(
 
294
  return "请上传音频文件"
295
 
296
  # 读取音频数据
297
+ fs, input_wav = audio
298
+
299
+ print('------------------------------')
300
+ print(fs, type(fs))
301
+ print(input_wav, type(input_wav))
302
+ print('------------------------------')
303
+
304
+ input_wav = input_wav.astype(np.float32) / np.iinfo(np.int16).max
305
 
306
  # 转换为单声道
307
+ if len(input_wav.shape) > 1:
308
+ input_wav = input_wav.mean(-1)
 
 
 
309
 
310
  # 重采样到16kHz
311
+ if fs != 16000:
312
+ resampler = torchaudio.transforms.Resample(fs, 16000)
313
+ input_wav_t = torch.from_numpy(input_wav).to(torch.float32)
314
+ input_wav = resampler(input_wav_t[None, :])[0, :].numpy()
315
 
316
  # 模型推理
317
  text = model.generate(