NLPV commited on
Commit
3d23eab
·
verified ·
1 Parent(s): 650c3e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -60,11 +60,11 @@ def transcribe_audio(audio_path, original_text):
60
  waveform, sample_rate = torchaudio.load(audio_path)
61
  if waveform.shape[0] > 1:
62
  waveform = waveform.mean(dim=0, keepdim=True)
63
- if sample_rate != 16000:
64
- transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
65
  waveform = transform(waveform)
66
  waveform = waveform / waveform.abs().max()
67
- input_values = processor(waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt").input_values
68
  with torch.no_grad():
69
  logits = model(input_values).logits
70
  predicted_ids = torch.argmax(logits, dim=-1)
@@ -74,7 +74,7 @@ def transcribe_audio(audio_path, original_text):
74
  df_errors = pd.DataFrame(errors, columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"])
75
  # Speaking speed
76
  transcribed_words = transcription.strip().split()
77
- duration = waveform.shape[1] / 16000
78
  speed = round(len(transcribed_words) / duration, 2) if duration > 0 else 0
79
  result = {
80
  "📝 Transcribed Text": transcription,
 
60
  waveform, sample_rate = torchaudio.load(audio_path)
61
  if waveform.shape[0] > 1:
62
  waveform = waveform.mean(dim=0, keepdim=True)
63
+ if sample_rate != 48000:
64
+ transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=48000)
65
  waveform = transform(waveform)
66
  waveform = waveform / waveform.abs().max()
67
+ input_values = processor(waveform.squeeze().numpy(), sampling_rate=48000, return_tensors="pt").input_values
68
  with torch.no_grad():
69
  logits = model(input_values).logits
70
  predicted_ids = torch.argmax(logits, dim=-1)
 
74
  df_errors = pd.DataFrame(errors, columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"])
75
  # Speaking speed
76
  transcribed_words = transcription.strip().split()
77
+ duration = waveform.shape[1] / 48000
78
  speed = round(len(transcribed_words) / duration, 2) if duration > 0 else 0
79
  result = {
80
  "📝 Transcribed Text": transcription,