megatrump commited on
Commit
4a1f483
·
1 Parent(s): f1b8fcd

final version

Browse files
Files changed (1) hide show
  1. api.py +0 -6
api.py CHANGED
@@ -203,26 +203,20 @@ async def process_audio(audio_data: bytes, language: str = "auto") -> str:
203
  audio_buffer = BytesIO(audio_data)
204
  waveform, sample_rate = torchaudio.load(audio_buffer)
205
 
206
- print(1, waveform.shape)
207
-
208
  # Convert to mono channel
209
  if waveform.shape[0] > 1:
210
  waveform = waveform.mean(dim=0)
211
  else:
212
  waveform = np.squeeze(waveform)
213
- print(2, waveform.shape)
214
 
215
  # Convert to numpy array and normalize
216
  input_wav = waveform.numpy().astype(np.float32)
217
- print(3, input_wav.shape)
218
 
219
  # Resample to 16kHz if needed
220
  if sample_rate != 16000:
221
  resampler = torchaudio.transforms.Resample(sample_rate, 16000)
222
  input_wav = resampler(torch.from_numpy(input_wav)[None, :])[0, :].numpy()
223
 
224
- print(4, input_wav.shape)
225
-
226
  # Model inference
227
  text = model.generate(
228
  input=input_wav,
 
203
  audio_buffer = BytesIO(audio_data)
204
  waveform, sample_rate = torchaudio.load(audio_buffer)
205
 
 
 
206
  # Convert to mono channel
207
  if waveform.shape[0] > 1:
208
  waveform = waveform.mean(dim=0)
209
  else:
210
  waveform = np.squeeze(waveform)
 
211
 
212
  # Convert to numpy array and normalize
213
  input_wav = waveform.numpy().astype(np.float32)
 
214
 
215
  # Resample to 16kHz if needed
216
  if sample_rate != 16000:
217
  resampler = torchaudio.transforms.Resample(sample_rate, 16000)
218
  input_wav = resampler(torch.from_numpy(input_wav)[None, :])[0, :].numpy()
219
 
 
 
220
  # Model inference
221
  text = model.generate(
222
  input=input_wav,