Spaces:

NLPV
/

ReadabilityTest

Sleeping

NLPV commited on Jul 14

Commit

a81460b

verified ·

1 Parent(s): 95dd078

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -68,8 +68,8 @@ def transcribe_audio(audio_path, original_text):
         waveform, sample_rate = torchaudio.load(audio_path)
         if waveform.shape[0] > 1:
             waveform = waveform.mean(dim=0, keepdim=True)
-        if sample_rate != 8000:
-            transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=8000)
             waveform = transform(waveform)
         # Amplify voice intensity
@@ -77,7 +77,7 @@ def transcribe_audio(audio_path, original_text):
         waveform = waveform * GAIN
         waveform = torch.clamp(waveform, -1.0, 1.0)
-        input_values = processor(waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt").input_values
         # 2. Transcribe with AI4Bharat model
         with torch.no_grad():
@@ -91,7 +91,7 @@ def transcribe_audio(audio_path, original_text):
         # Speaking speed
         transcribed_words = transcription.strip().split()
-        duration = waveform.shape[1] / 16000
         speed = round(len(transcribed_words) / duration, 2) if duration > 0 else 0
         result = {

         waveform, sample_rate = torchaudio.load(audio_path)
         if waveform.shape[0] > 1:
             waveform = waveform.mean(dim=0, keepdim=True)
+        if sample_rate != 48000:
+            transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=48000)
             waveform = transform(waveform)
         # Amplify voice intensity
         waveform = waveform * GAIN
         waveform = torch.clamp(waveform, -1.0, 1.0)
+        input_values = processor(waveform.squeeze().numpy(), sampling_rate=48000, return_tensors="pt").input_values
         # 2. Transcribe with AI4Bharat model
         with torch.no_grad():
         # Speaking speed
         transcribed_words = transcription.strip().split()
+        duration = waveform.shape[1] / 48000
         speed = round(len(transcribed_words) / duration, 2) if duration > 0 else 0
         result = {