Spaces:

camparchimedes
/

nb

Build error

camparchimedes commited on Aug 22, 2024

Commit

25beb4b

verified ·

1 Parent(s): d5948d3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -92,7 +92,7 @@ def transcribe_audio(audio_file, chunk_length_s=30):
     chunk_size = chunk_length_s * sample_rate
     num_chunks = waveform.shape[1] // chunk_size + int(waveform.shape[1] % chunk_size != 0)
-    # Initialize an empty list to store the transcribed text from each chunk
     full_text = []
     for i in range(num_chunks):
@@ -100,15 +100,20 @@ def transcribe_audio(audio_file, chunk_length_s=30):
         end = min((i + 1) * chunk_size, waveform.shape[1])
         chunk_waveform = waveform[:, start:end]
-        # Ensure the chunk waveform is properly shaped
         if chunk_waveform.shape[0] > 1:
             chunk_waveform = torch.mean(chunk_waveform, dim=0, keepdim=True)
         # Tokenize the input batch with the processor
         inputs = processor(chunk_waveform.squeeze(0).numpy(), sampling_rate=sample_rate, padding="max_length", return_tensors="pt", task="transcribe", device=device)
-        # ASR model inference on the chunk
         with torch.no_grad():
             generated_ids = model.generate(
                 input_features=inputs.input_features.to(device),

     chunk_size = chunk_length_s * sample_rate
     num_chunks = waveform.shape[1] // chunk_size + int(waveform.shape[1] % chunk_size != 0)
+    # Initialize empty list@store transcribed text from ea.chunk
     full_text = []
     for i in range(num_chunks):
         end = min((i + 1) * chunk_size, waveform.shape[1])
         chunk_waveform = waveform[:, start:end]
+        # Check chunk waveform is properly shaped
         if chunk_waveform.shape[0] > 1:
             chunk_waveform = torch.mean(chunk_waveform, dim=0, keepdim=True)
+        if processor.tokenizer.pad_token is None or processor.tokenizer.pad_token_id == processor.tokenizer.eos_token_id:
+            processor.tokenizer.add_special_tokens({'pad_token': '<PAD>'})
+            pad_token_id = processor.tokenizer.convert_tokens_to_ids('<PAD>')
+            model.config.pad_token_id = pad_token_id # update model configuration with new pad token ID
         # Tokenize the input batch with the processor
         inputs = processor(chunk_waveform.squeeze(0).numpy(), sampling_rate=sample_rate, padding="max_length", return_tensors="pt", task="transcribe", device=device)
+        # ASR model inference on chunk
         with torch.no_grad():
             generated_ids = model.generate(
                 input_features=inputs.input_features.to(device),