Spaces:
Running
Running
修改了拼接的方法
Browse files
api.py
CHANGED
@@ -215,14 +215,14 @@ async def process_audio(audio_data: bytes, language: str = "auto") -> str:
|
|
215 |
resampler = torchaudio.transforms.Resample(sample_rate, 16000)
|
216 |
input_wav = resampler(torch.from_numpy(input_wav)[None, :])[0, :].numpy()
|
217 |
|
218 |
-
|
219 |
-
|
|
|
|
|
220 |
if current_length < target_length:
|
221 |
padding_length = target_length - current_length
|
222 |
-
padding = np.zeros(padding_length, dtype=np.float32)
|
223 |
input_wav = np.concatenate((input_wav, padding))
|
224 |
-
elif current_length > target_length:
|
225 |
-
input_wav = input_wav[:target_length]
|
226 |
|
227 |
# Model inference
|
228 |
text = model.generate(
|
|
|
215 |
resampler = torchaudio.transforms.Resample(sample_rate, 16000)
|
216 |
input_wav = resampler(torch.from_numpy(input_wav)[None, :])[0, :].numpy()
|
217 |
|
218 |
+
print(input_wav.shape)
|
219 |
+
|
220 |
+
target_length = 90 * 16000
|
221 |
+
current_length = input_wav.shape[1]
|
222 |
if current_length < target_length:
|
223 |
padding_length = target_length - current_length
|
224 |
+
padding = np.zeros((1, padding_length), dtype=np.float32)
|
225 |
input_wav = np.concatenate((input_wav, padding))
|
|
|
|
|
226 |
|
227 |
# Model inference
|
228 |
text = model.generate(
|