camparchimedes commited on
Commit
25beb4b
·
verified ·
1 Parent(s): d5948d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -4
app.py CHANGED
@@ -92,7 +92,7 @@ def transcribe_audio(audio_file, chunk_length_s=30):
92
  chunk_size = chunk_length_s * sample_rate
93
  num_chunks = waveform.shape[1] // chunk_size + int(waveform.shape[1] % chunk_size != 0)
94
 
95
- # Initialize an empty list to store the transcribed text from each chunk
96
  full_text = []
97
 
98
  for i in range(num_chunks):
@@ -100,15 +100,20 @@ def transcribe_audio(audio_file, chunk_length_s=30):
100
  end = min((i + 1) * chunk_size, waveform.shape[1])
101
  chunk_waveform = waveform[:, start:end]
102
 
103
- # Ensure the chunk waveform is properly shaped
104
  if chunk_waveform.shape[0] > 1:
105
  chunk_waveform = torch.mean(chunk_waveform, dim=0, keepdim=True)
106
 
107
-
 
 
 
 
 
108
  # Tokenize the input batch with the processor
109
  inputs = processor(chunk_waveform.squeeze(0).numpy(), sampling_rate=sample_rate, padding="max_length", return_tensors="pt", task="transcribe", device=device)
110
 
111
- # ASR model inference on the chunk
112
  with torch.no_grad():
113
  generated_ids = model.generate(
114
  input_features=inputs.input_features.to(device),
 
92
  chunk_size = chunk_length_s * sample_rate
93
  num_chunks = waveform.shape[1] // chunk_size + int(waveform.shape[1] % chunk_size != 0)
94
 
95
+ # Initialize empty list@store transcribed text from ea.chunk
96
  full_text = []
97
 
98
  for i in range(num_chunks):
 
100
  end = min((i + 1) * chunk_size, waveform.shape[1])
101
  chunk_waveform = waveform[:, start:end]
102
 
103
+ # Check chunk waveform is properly shaped
104
  if chunk_waveform.shape[0] > 1:
105
  chunk_waveform = torch.mean(chunk_waveform, dim=0, keepdim=True)
106
 
107
+ if processor.tokenizer.pad_token is None or processor.tokenizer.pad_token_id == processor.tokenizer.eos_token_id:
108
+ processor.tokenizer.add_special_tokens({'pad_token': '<PAD>'})
109
+ pad_token_id = processor.tokenizer.convert_tokens_to_ids('<PAD>')
110
+
111
+ model.config.pad_token_id = pad_token_id # update model configuration with new pad token ID
112
+
113
  # Tokenize the input batch with the processor
114
  inputs = processor(chunk_waveform.squeeze(0).numpy(), sampling_rate=sample_rate, padding="max_length", return_tensors="pt", task="transcribe", device=device)
115
 
116
+ # ASR model inference on chunk
117
  with torch.no_grad():
118
  generated_ids = model.generate(
119
  input_features=inputs.input_features.to(device),