Spaces:

megatrump
/

test-FunAudioLLM

Sleeping

megatrump commited on Mar 6

Commit

4a1f483

1 Parent(s): f1b8fcd

final version

Files changed (1) hide show

api.py CHANGED Viewed

@@ -203,26 +203,20 @@ async def process_audio(audio_data: bytes, language: str = "auto") -> str:
         audio_buffer = BytesIO(audio_data)
         waveform, sample_rate = torchaudio.load(audio_buffer)
-        print(1, waveform.shape)
         # Convert to mono channel
         if waveform.shape[0] > 1:
             waveform = waveform.mean(dim=0)
         else:
             waveform = np.squeeze(waveform)
-        print(2, waveform.shape)
         # Convert to numpy array and normalize
         input_wav = waveform.numpy().astype(np.float32)
-        print(3, input_wav.shape)
         # Resample to 16kHz if needed
         if sample_rate != 16000:
             resampler = torchaudio.transforms.Resample(sample_rate, 16000)
             input_wav = resampler(torch.from_numpy(input_wav)[None, :])[0, :].numpy()
-        print(4, input_wav.shape)
         # Model inference
         text = model.generate(
             input=input_wav,

         audio_buffer = BytesIO(audio_data)
         waveform, sample_rate = torchaudio.load(audio_buffer)
         # Convert to mono channel
         if waveform.shape[0] > 1:
             waveform = waveform.mean(dim=0)
         else:
             waveform = np.squeeze(waveform)
         # Convert to numpy array and normalize
         input_wav = waveform.numpy().astype(np.float32)
         # Resample to 16kHz if needed
         if sample_rate != 16000:
             resampler = torchaudio.transforms.Resample(sample_rate, 16000)
             input_wav = resampler(torch.from_numpy(input_wav)[None, :])[0, :].numpy()
         # Model inference
         text = model.generate(
             input=input_wav,