NLPV commited on
Commit
650c3e9
·
verified ·
1 Parent(s): f0b2a66

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -26
app.py CHANGED
@@ -3,7 +3,6 @@ from gtts import gTTS
3
  import tempfile
4
  import os
5
  import torch
6
- import re
7
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
8
  import torchaudio
9
  import difflib
@@ -19,32 +18,26 @@ def play_text(text):
19
  tts = gTTS(text=text, lang='hi', slow=False)
20
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
21
  tts.save(temp_file.name)
22
- os.system(f"start {temp_file.name}") # Windows only
23
- return "✅ Text is being read out. Please listen and read it yourself."
24
 
25
  def get_error_type(asr_word, correct_word):
26
- # Both words missing or extra
27
  if not asr_word:
28
  return "Missing word"
29
  if not correct_word:
30
  return "Extra word"
31
- # Spelling error: small Levenshtein
32
  if lev_distance(asr_word, correct_word) <= 2:
33
  return "Spelling mistake"
34
- # Matra/phonetic error: shared chars but wrong form
35
  set1, set2 = set(asr_word), set(correct_word)
36
  if set1 & set2:
37
  return "Phonetic/Matra error"
38
  return "Substitution/Distorted"
39
 
40
  def compare_hindi_sentences(expected, transcribed):
41
- # Split by whitespace for Hindi
42
  expected_words = expected.strip().split()
43
  transcribed_words = transcribed.strip().split()
44
-
45
  matcher = difflib.SequenceMatcher(None, transcribed_words, expected_words)
46
  errors = []
47
-
48
  for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
49
  if opcode == "equal":
50
  continue
@@ -65,48 +58,45 @@ def compare_hindi_sentences(expected, transcribed):
65
  def transcribe_audio(audio_path, original_text):
66
  try:
67
  waveform, sample_rate = torchaudio.load(audio_path)
68
- # Convert to mono
69
  if waveform.shape[0] > 1:
70
  waveform = waveform.mean(dim=0, keepdim=True)
71
- # Resample to 16000 Hz for model
72
  if sample_rate != 16000:
73
  transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
74
  waveform = transform(waveform)
75
- # Normalize to [-1, 1]
76
  waveform = waveform / waveform.abs().max()
77
-
78
  input_values = processor(waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt").input_values
79
-
80
  with torch.no_grad():
81
  logits = model(input_values).logits
82
  predicted_ids = torch.argmax(logits, dim=-1)
83
  transcription = processor.decode(predicted_ids[0])
84
-
85
- # ... rest of your error analysis
86
-
87
- return {
 
 
 
 
88
  "📝 Transcribed Text": transcription,
89
- # etc.
90
- }, df_errors
 
91
  except Exception as e:
92
  return {"error": str(e)}, pd.DataFrame(columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"])
93
 
94
  with gr.Blocks() as app:
95
  gr.Markdown("## 🗣️ Hindi Reading & Pronunciation Practice App (AI4Bharat Model)")
96
-
97
  with gr.Row():
98
  input_text = gr.Textbox(label="Paste Hindi Text Here", placeholder="यहाँ हिंदी टेक्स्ट लिखें...")
99
  play_button = gr.Button("🔊 Listen to Text")
100
-
101
- play_button.click(play_text, inputs=[input_text], outputs=[])
102
 
103
  gr.Markdown("### 🎤 Now upload or record yourself reading the text aloud below:")
104
  audio_input = gr.Audio(type="filepath", label="Upload or Record Your Voice")
105
-
106
  submit_button = gr.Button("✅ Submit Recording for Checking")
107
  output = gr.JSON(label="Results")
108
- error_table = gr.Dataframe(headers=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"], label="गलती तालिका (Error Table)")
109
-
110
  submit_button.click(
111
  transcribe_audio,
112
  inputs=[audio_input, input_text],
 
3
  import tempfile
4
  import os
5
  import torch
 
6
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
7
  import torchaudio
8
  import difflib
 
18
  tts = gTTS(text=text, lang='hi', slow=False)
19
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
20
  tts.save(temp_file.name)
21
+ # Return file for Gradio audio output
22
+ return temp_file.name
23
 
24
  def get_error_type(asr_word, correct_word):
 
25
  if not asr_word:
26
  return "Missing word"
27
  if not correct_word:
28
  return "Extra word"
 
29
  if lev_distance(asr_word, correct_word) <= 2:
30
  return "Spelling mistake"
 
31
  set1, set2 = set(asr_word), set(correct_word)
32
  if set1 & set2:
33
  return "Phonetic/Matra error"
34
  return "Substitution/Distorted"
35
 
36
  def compare_hindi_sentences(expected, transcribed):
 
37
  expected_words = expected.strip().split()
38
  transcribed_words = transcribed.strip().split()
 
39
  matcher = difflib.SequenceMatcher(None, transcribed_words, expected_words)
40
  errors = []
 
41
  for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
42
  if opcode == "equal":
43
  continue
 
58
  def transcribe_audio(audio_path, original_text):
59
  try:
60
  waveform, sample_rate = torchaudio.load(audio_path)
 
61
  if waveform.shape[0] > 1:
62
  waveform = waveform.mean(dim=0, keepdim=True)
 
63
  if sample_rate != 16000:
64
  transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
65
  waveform = transform(waveform)
 
66
  waveform = waveform / waveform.abs().max()
 
67
  input_values = processor(waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt").input_values
 
68
  with torch.no_grad():
69
  logits = model(input_values).logits
70
  predicted_ids = torch.argmax(logits, dim=-1)
71
  transcription = processor.decode(predicted_ids[0])
72
+ # Error analysis
73
+ errors = compare_hindi_sentences(original_text, transcription)
74
+ df_errors = pd.DataFrame(errors, columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"])
75
+ # Speaking speed
76
+ transcribed_words = transcription.strip().split()
77
+ duration = waveform.shape[1] / 16000
78
+ speed = round(len(transcribed_words) / duration, 2) if duration > 0 else 0
79
+ result = {
80
  "📝 Transcribed Text": transcription,
81
+ "⏱️ Speaking Speed (words/sec)": speed,
82
+ }
83
+ return result, df_errors
84
  except Exception as e:
85
  return {"error": str(e)}, pd.DataFrame(columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"])
86
 
87
  with gr.Blocks() as app:
88
  gr.Markdown("## 🗣️ Hindi Reading & Pronunciation Practice App (AI4Bharat Model)")
 
89
  with gr.Row():
90
  input_text = gr.Textbox(label="Paste Hindi Text Here", placeholder="यहाँ हिंदी टेक्स्ट लिखें...")
91
  play_button = gr.Button("🔊 Listen to Text")
92
+ audio_output = gr.Audio(label="Text-to-Speech Output", type="filepath")
93
+ play_button.click(play_text, inputs=input_text, outputs=audio_output)
94
 
95
  gr.Markdown("### 🎤 Now upload or record yourself reading the text aloud below:")
96
  audio_input = gr.Audio(type="filepath", label="Upload or Record Your Voice")
 
97
  submit_button = gr.Button("✅ Submit Recording for Checking")
98
  output = gr.JSON(label="Results")
99
+ error_table = gr.Dataframe(label="गलती तालिका (Error Table)")
 
100
  submit_button.click(
101
  transcribe_audio,
102
  inputs=[audio_input, input_text],