NLPV commited on
Commit
a81460b
·
verified ·
1 Parent(s): 95dd078

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -68,8 +68,8 @@ def transcribe_audio(audio_path, original_text):
68
  waveform, sample_rate = torchaudio.load(audio_path)
69
  if waveform.shape[0] > 1:
70
  waveform = waveform.mean(dim=0, keepdim=True)
71
- if sample_rate != 8000:
72
- transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=8000)
73
  waveform = transform(waveform)
74
 
75
  # Amplify voice intensity
@@ -77,7 +77,7 @@ def transcribe_audio(audio_path, original_text):
77
  waveform = waveform * GAIN
78
  waveform = torch.clamp(waveform, -1.0, 1.0)
79
 
80
- input_values = processor(waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt").input_values
81
 
82
  # 2. Transcribe with AI4Bharat model
83
  with torch.no_grad():
@@ -91,7 +91,7 @@ def transcribe_audio(audio_path, original_text):
91
 
92
  # Speaking speed
93
  transcribed_words = transcription.strip().split()
94
- duration = waveform.shape[1] / 16000
95
  speed = round(len(transcribed_words) / duration, 2) if duration > 0 else 0
96
 
97
  result = {
 
68
  waveform, sample_rate = torchaudio.load(audio_path)
69
  if waveform.shape[0] > 1:
70
  waveform = waveform.mean(dim=0, keepdim=True)
71
+ if sample_rate != 48000:
72
+ transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=48000)
73
  waveform = transform(waveform)
74
 
75
  # Amplify voice intensity
 
77
  waveform = waveform * GAIN
78
  waveform = torch.clamp(waveform, -1.0, 1.0)
79
 
80
+ input_values = processor(waveform.squeeze().numpy(), sampling_rate=48000, return_tensors="pt").input_values
81
 
82
  # 2. Transcribe with AI4Bharat model
83
  with torch.no_grad():
 
91
 
92
  # Speaking speed
93
  transcribed_words = transcription.strip().split()
94
+ duration = waveform.shape[1] / 48000
95
  speed = round(len(transcribed_words) / duration, 2) if duration > 0 else 0
96
 
97
  result = {