Spaces:

NLPV
/

ReadabilityTest

Sleeping

App Files Files Community

NLPV commited on Jul 14

Commit

ba18501

verified ·

1 Parent(s): a59a577

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -21

app.py CHANGED Viewed

@@ -2,11 +2,13 @@ import gradio as gr
 from gtts import gTTS
 import tempfile
 import os
-import difflib
 import torch
 import re
 from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 import torchaudio
 # Load AI4Bharat Hindi model & processor
 MODEL_NAME = "ai4bharat/indicwav2vec-hindi"
@@ -17,13 +19,52 @@ def play_text(text):
     tts = gTTS(text=text, lang='hi', slow=False)
     temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
     tts.save(temp_file.name)
-    # Windows: "start", Mac: "afplay", Linux: "mpg123" (edit as needed)
-    os.system(f"start {temp_file.name}")
     return "✅ Text is being read out. Please listen and read it yourself."
 def transcribe_audio(audio_path, original_text):
     try:
-        # 1. Load audio & convert to mono, 16kHz if needed
         waveform, sample_rate = torchaudio.load(audio_path)
         if waveform.shape[0] > 1:
             waveform = waveform.mean(dim=0, keepdim=True)
@@ -31,39 +72,37 @@ def transcribe_audio(audio_path, original_text):
             transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
             waveform = transform(waveform)
-        # --- Amplify voice intensity here ---
-        GAIN = 1.5  # You can adjust this value (1.0 = unchanged, 2.0 = double)
         waveform = waveform * GAIN
-        waveform = torch.clamp(waveform, -1.0, 1.0)  # Avoid clipping/distortion
         input_values = processor(waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt").input_values
         # 2. Transcribe with AI4Bharat model
         with torch.no_grad():
             logits = model(input_values).logits
         predicted_ids = torch.argmax(logits, dim=-1)
         transcription = processor.decode(predicted_ids[0])
-        # 3. Calculate accuracy etc.
-        original_words = re.findall(r'\w+', original_text.strip())
-        transcribed_words = re.findall(r'\w+', transcription.strip())
-        matcher = difflib.SequenceMatcher(None, original_words, transcribed_words)
-        accuracy = round(matcher.ratio() * 100, 2)
-        # Speaking speed approximation (needs duration, which torchaudio gives)
         duration = waveform.shape[1] / 16000
         speed = round(len(transcribed_words) / duration, 2) if duration > 0 else 0
         result = {
             "📝 Transcribed Text": transcription,
-            "🎯 Accuracy (%)": accuracy,
-            "⏱️ Speaking Speed (words/sec)": speed
         }
-        return result
-    except Exception as e:
-        return {"error": str(e)}
-        return {"error": str(e)}
 with gr.Blocks() as app:
     gr.Markdown("## 🗣️ Hindi Reading & Pronunciation Practice App (AI4Bharat Model)")
@@ -79,7 +118,12 @@ with gr.Blocks() as app:
     submit_button = gr.Button("✅ Submit Recording for Checking")
     output = gr.JSON(label="Results")
-    submit_button.click(transcribe_audio, inputs=[audio_input, input_text], outputs=[output])
 app.launch()

 from gtts import gTTS
 import tempfile
 import os
 import torch
 import re
 from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 import torchaudio
+import difflib
+import pandas as pd
+from Levenshtein import distance as lev_distance
 # Load AI4Bharat Hindi model & processor
 MODEL_NAME = "ai4bharat/indicwav2vec-hindi"
     tts = gTTS(text=text, lang='hi', slow=False)
     temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
     tts.save(temp_file.name)
+    os.system(f"start {temp_file.name}")  # Windows only
     return "✅ Text is being read out. Please listen and read it yourself."
+def get_error_type(asr_word, correct_word):
+    # Both words missing or extra
+    if not asr_word:
+        return "Missing word"
+    if not correct_word:
+        return "Extra word"
+    # Spelling error: small Levenshtein
+    if lev_distance(asr_word, correct_word) <= 2:
+        return "Spelling mistake"
+    # Matra/phonetic error: shared chars but wrong form
+    set1, set2 = set(asr_word), set(correct_word)
+    if set1 & set2:
+        return "Phonetic/Matra error"
+    return "Substitution/Distorted"
+def compare_hindi_sentences(expected, transcribed):
+    # Split by whitespace for Hindi
+    expected_words = expected.strip().split()
+    transcribed_words = transcribed.strip().split()
+    matcher = difflib.SequenceMatcher(None, transcribed_words, expected_words)
+    errors = []
+    for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
+        if opcode == "equal":
+            continue
+        elif opcode == "replace":
+            for k in range(max(i2 - i1, j2 - j1)):
+                asr_word = transcribed_words[i1 + k] if i1 + k < i2 else ""
+                correct_word = expected_words[j1 + k] if j1 + k < j2 else ""
+                error_type = get_error_type(asr_word, correct_word)
+                errors.append((asr_word, correct_word, error_type))
+        elif opcode == "insert":
+            for k in range(j1, j2):
+                errors.append(("", expected_words[k], "Missing word"))
+        elif opcode == "delete":
+            for k in range(i1, i2):
+                errors.append((transcribed_words[k], "", "Extra word"))
+    return errors
 def transcribe_audio(audio_path, original_text):
     try:
+        # 1. Load and pre-process audio
         waveform, sample_rate = torchaudio.load(audio_path)
         if waveform.shape[0] > 1:
             waveform = waveform.mean(dim=0, keepdim=True)
             transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
             waveform = transform(waveform)
+        # Amplify voice intensity
+        GAIN = 1.5
         waveform = waveform * GAIN
+        waveform = torch.clamp(waveform, -1.0, 1.0)
         input_values = processor(waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt").input_values
         # 2. Transcribe with AI4Bharat model
         with torch.no_grad():
             logits = model(input_values).logits
         predicted_ids = torch.argmax(logits, dim=-1)
         transcription = processor.decode(predicted_ids[0])
+        # 3. Error analysis (as table)
+        errors = compare_hindi_sentences(original_text, transcription)
+        df_errors = pd.DataFrame(errors, columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"])
+        # Speaking speed
+        transcribed_words = transcription.strip().split()
         duration = waveform.shape[1] / 16000
         speed = round(len(transcribed_words) / duration, 2) if duration > 0 else 0
         result = {
             "📝 Transcribed Text": transcription,
+            "⏱️ Speaking Speed (words/sec)": speed,
         }
+        # Return table as a separate output (Gradio Dataframe)
+        return result, df_errors
+    except Exception as e:
+        return {"error": str(e)}, pd.DataFrame(columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"])
 with gr.Blocks() as app:
     gr.Markdown("## 🗣️ Hindi Reading & Pronunciation Practice App (AI4Bharat Model)")
     submit_button = gr.Button("✅ Submit Recording for Checking")
     output = gr.JSON(label="Results")
+    error_table = gr.Dataframe(headers=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"], label="गलती तालिका (Error Table)")
+    submit_button.click(
+        transcribe_audio,
+        inputs=[audio_input, input_text],
+        outputs=[output, error_table]
+    )
 app.launch()