Spaces:

megatrump
/

test-FunAudioLLM

Running

megatrump commited on Mar 6

Commit

af35adf

1 Parent(s): fcdb6f3

test

Files changed (1) hide show

api.py CHANGED Viewed

@@ -203,29 +203,23 @@ async def process_audio(audio_data: bytes, language: str = "auto") -> str:
         audio_buffer = BytesIO(audio_data)
         waveform, sample_rate = torchaudio.load(audio_buffer)
-        print(waveform.shape)
         # Convert to mono channel
         if waveform.shape[0] > 1:
             waveform = waveform.mean(dim=0)
         # Convert to numpy array and normalize
         input_wav = waveform.numpy().astype(np.float32)
         # Resample to 16kHz if needed
         if sample_rate != 16000:
             resampler = torchaudio.transforms.Resample(sample_rate, 16000)
             input_wav = resampler(torch.from_numpy(input_wav)[None, :])[0, :].numpy()
-        target_length = 90 * 16000
-        current_length = input_wav.shape[1]
-        if current_length < target_length:
-            padding_length = target_length - current_length
-            padding = np.zeros((1, padding_length), dtype=np.float32)
-            print(input_wav.shape)
-            print(padding.shape)
-            input_wav = np.concatenate((input_wav, padding), axis=1)
         # Model inference
         text = model.generate(

         audio_buffer = BytesIO(audio_data)
         waveform, sample_rate = torchaudio.load(audio_buffer)
+        print(1, waveform.shape)
         # Convert to mono channel
         if waveform.shape[0] > 1:
             waveform = waveform.mean(dim=0)
+        print(2, waveform.shape)
         # Convert to numpy array and normalize
         input_wav = waveform.numpy().astype(np.float32)
+        print(3, input_wav.shape)
         # Resample to 16kHz if needed
         if sample_rate != 16000:
             resampler = torchaudio.transforms.Resample(sample_rate, 16000)
             input_wav = resampler(torch.from_numpy(input_wav)[None, :])[0, :].numpy()
+        print(4, input_wav.shape)
         # Model inference
         text = model.generate(