Spaces:

NuMessiah
/

WhisperTranscript

Running

NuMessiah commited on Feb 15

Commit

36e6932

1 Parent(s): e9565b7

Convert to mono

Files changed (1) hide show

app.py CHANGED Viewed

@@ -25,6 +25,10 @@ def transcribe_audio(audio_file):
             resampler = torchaudio.transforms.Resample(sample_rate, 16000)
             audio = resampler(audio)
         # Transcribe the audio
         transcription = whisper_pipeline(audio.squeeze().numpy())["text"] # .squeeze() removes extra dimensions

             resampler = torchaudio.transforms.Resample(sample_rate, 16000)
             audio = resampler(audio)
+        # Convert to Mono
+        if audio.shape[0] > 1:  # Check if multi-channel
+            audio = torch.mean(audio, dim=0, keepdim=True) # Average channels
         # Transcribe the audio
         transcription = whisper_pipeline(audio.squeeze().numpy())["text"] # .squeeze() removes extra dimensions