Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -60,11 +60,11 @@ def transcribe_audio(audio_path, original_text):
|
|
60 |
waveform, sample_rate = torchaudio.load(audio_path)
|
61 |
if waveform.shape[0] > 1:
|
62 |
waveform = waveform.mean(dim=0, keepdim=True)
|
63 |
-
if sample_rate !=
|
64 |
-
transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=
|
65 |
waveform = transform(waveform)
|
66 |
waveform = waveform / waveform.abs().max()
|
67 |
-
input_values = processor(waveform.squeeze().numpy(), sampling_rate=
|
68 |
with torch.no_grad():
|
69 |
logits = model(input_values).logits
|
70 |
predicted_ids = torch.argmax(logits, dim=-1)
|
@@ -74,7 +74,7 @@ def transcribe_audio(audio_path, original_text):
|
|
74 |
df_errors = pd.DataFrame(errors, columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"])
|
75 |
# Speaking speed
|
76 |
transcribed_words = transcription.strip().split()
|
77 |
-
duration = waveform.shape[1] /
|
78 |
speed = round(len(transcribed_words) / duration, 2) if duration > 0 else 0
|
79 |
result = {
|
80 |
"📝 Transcribed Text": transcription,
|
|
|
60 |
waveform, sample_rate = torchaudio.load(audio_path)
|
61 |
if waveform.shape[0] > 1:
|
62 |
waveform = waveform.mean(dim=0, keepdim=True)
|
63 |
+
if sample_rate != 48000:
|
64 |
+
transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=48000)
|
65 |
waveform = transform(waveform)
|
66 |
waveform = waveform / waveform.abs().max()
|
67 |
+
input_values = processor(waveform.squeeze().numpy(), sampling_rate=48000, return_tensors="pt").input_values
|
68 |
with torch.no_grad():
|
69 |
logits = model(input_values).logits
|
70 |
predicted_ids = torch.argmax(logits, dim=-1)
|
|
|
74 |
df_errors = pd.DataFrame(errors, columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"])
|
75 |
# Speaking speed
|
76 |
transcribed_words = transcription.strip().split()
|
77 |
+
duration = waveform.shape[1] / 48000
|
78 |
speed = round(len(transcribed_words) / duration, 2) if duration > 0 else 0
|
79 |
result = {
|
80 |
"📝 Transcribed Text": transcription,
|