megatrump commited on
Commit
bc343c9
·
1 Parent(s): 08ef38f

对其了输入长度

Browse files
Files changed (1) hide show
  1. api.py +10 -1
api.py CHANGED
@@ -215,6 +215,15 @@ async def process_audio(audio_data: bytes, language: str = "auto") -> str:
215
  resampler = torchaudio.transforms.Resample(sample_rate, 16000)
216
  input_wav = resampler(torch.from_numpy(input_wav)[None, :])[0, :].numpy()
217
 
 
 
 
 
 
 
 
 
 
218
  # Model inference
219
  text = model.generate(
220
  input=input_wav,
@@ -222,7 +231,7 @@ async def process_audio(audio_data: bytes, language: str = "auto") -> str:
222
  language=language,
223
  use_itn=True,
224
  batch_size_s=500,
225
- # merge_vad=True
226
  )
227
 
228
  # Format result
 
215
  resampler = torchaudio.transforms.Resample(sample_rate, 16000)
216
  input_wav = resampler(torch.from_numpy(input_wav)[None, :])[0, :].numpy()
217
 
218
+ target_length = 90 * sample_rate
219
+ current_length = input_wav.shape[0]
220
+ if current_length < target_length:
221
+ padding_length = target_length - current_length
222
+ padding = np.zeros(padding_length, dtype=np.float32)
223
+ input_wav = np.concatenate((input_wav, padding))
224
+ elif current_length > target_length:
225
+ input_wav = input_wav[:target_length]
226
+
227
  # Model inference
228
  text = model.generate(
229
  input=input_wav,
 
231
  language=language,
232
  use_itn=True,
233
  batch_size_s=500,
234
+ merge_vad=True
235
  )
236
 
237
  # Format result