NLPV commited on
Commit
f5d520a
·
verified ·
1 Parent(s): 2cf982a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -13
app.py CHANGED
@@ -5,6 +5,7 @@ import difflib
5
  import tempfile
6
  import os
7
  import speech_recognition as sr
 
8
 
9
  # Function to play the text (optional)
10
  def play_text(text):
@@ -14,24 +15,26 @@ def play_text(text):
14
  os.system(f"start {temp_file.name}") # Windows
15
  return "✅ Text is being read out. Please listen and read it yourself."
16
 
 
 
 
17
  def transcribe_audio(audio, original_text):
18
- recognizer = sr.Recognizer()
19
  try:
20
- with sr.AudioFile(audio) as source:
21
- audio_data = recognizer.record(source)
22
-
23
- # Try chunking if needed
24
- transcription = recognizer.recognize_google(audio_data, language="hi-IN")
25
 
26
- # Clean and split the text better (remove punctuations etc.)
 
 
27
  import re
28
  original_words = re.findall(r'\w+', original_text.strip())
29
  transcribed_words = re.findall(r'\w+', transcription.strip())
30
 
31
  matcher = difflib.SequenceMatcher(None, original_words, transcribed_words)
32
  accuracy = round(matcher.ratio() * 100, 2)
33
-
34
- speed = round(len(transcribed_words) / (len(audio_data.frame_data) / audio_data.sample_rate), 2)
 
35
 
36
  result = {
37
  "📝 Transcribed Text": transcription,
@@ -39,10 +42,6 @@ def transcribe_audio(audio, original_text):
39
  "⏱️ Speaking Speed (words/sec)": speed
40
  }
41
  return result
42
- except sr.UnknownValueError:
43
- return {"error": "Could not understand audio"}
44
- except sr.RequestError as e:
45
- return {"error": f"Request error: {e}"}
46
  except Exception as e:
47
  return {"error": str(e)}
48
 
 
5
  import tempfile
6
  import os
7
  import speech_recognition as sr
8
+ from faster_whisper import WhisperModel
9
 
10
  # Function to play the text (optional)
11
  def play_text(text):
 
15
  os.system(f"start {temp_file.name}") # Windows
16
  return "✅ Text is being read out. Please listen and read it yourself."
17
 
18
+ # Load model once (outside function for efficiency)
19
+ model = WhisperModel("small", compute_type="float32") # Or "medium" for better accuracy
20
+
21
  def transcribe_audio(audio, original_text):
 
22
  try:
23
+ # Run inference
24
+ segments, info = model.transcribe(audio, language='hi')
 
 
 
25
 
26
+ transcription = " ".join([segment.text for segment in segments])
27
+
28
+ # Clean and split the text better
29
  import re
30
  original_words = re.findall(r'\w+', original_text.strip())
31
  transcribed_words = re.findall(r'\w+', transcription.strip())
32
 
33
  matcher = difflib.SequenceMatcher(None, original_words, transcribed_words)
34
  accuracy = round(matcher.ratio() * 100, 2)
35
+
36
+ # Speaking speed (approximate)
37
+ speed = round(len(transcribed_words) / info.duration, 2)
38
 
39
  result = {
40
  "📝 Transcribed Text": transcription,
 
42
  "⏱️ Speaking Speed (words/sec)": speed
43
  }
44
  return result
 
 
 
 
45
  except Exception as e:
46
  return {"error": str(e)}
47