mr2along commited on
Commit
7515a2b
1 Parent(s): 006f012

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -20
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # Import required libraries
2
  import os
3
  import requests
4
  import speech_recognition as sr
@@ -10,17 +9,11 @@ from pydub import AudioSegment
10
  import time
11
  import eng_to_ipa as ipa
12
 
13
- # Function to create pronunciation audio
14
- def create_pronunciation_audio(word):
15
- try:
16
- tts = gTTS(word)
17
- audio_file_path = f"audio/{word}.mp3"
18
- tts.save(audio_file_path)
19
- return audio_file_path # Return the local path instead of uploading
20
- except Exception as e:
21
- return f"Failed to create pronunciation audio: {e}"
22
 
23
- # Function to upload audio files to the server
24
  def upfilepath(local_filename):
25
  ts = time.time()
26
  upload_url = f"https://mr2along-speech-recognize.hf.space/gradio_api/upload?upload_id={ts}"
@@ -28,18 +21,72 @@ def upfilepath(local_filename):
28
 
29
  try:
30
  response = requests.post(upload_url, files=files, timeout=30) # Set timeout (e.g., 30 seconds)
 
31
  if response.status_code == 200:
32
  result = response.json()
33
  extracted_path = result[0]
34
  return extracted_path
35
  else:
36
  return None
 
37
  except requests.exceptions.Timeout:
38
  return "Request timed out. Please try again."
39
  except Exception as e:
40
  return f"An error occurred: {e}"
41
 
42
- # Update the compare_texts function
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  def compare_texts(reference_text, transcribed_text):
44
  reference_words = reference_text.split()
45
  transcribed_words = transcribed_text.split()
@@ -48,7 +95,7 @@ def compare_texts(reference_text, transcribed_text):
48
  sm = difflib.SequenceMatcher(None, reference_text, transcribed_text)
49
  similarity_score = round(sm.ratio() * 100, 2)
50
 
51
- # Construct HTML output
52
  html_output = f"<strong>Fidelity Class:</strong> "
53
  if similarity_score >= 85:
54
  html_output += f"<strong>GOOD (>=85%)</strong><br>"
@@ -61,10 +108,10 @@ def compare_texts(reference_text, transcribed_text):
61
 
62
  html_output += f"<strong>Quality Score:</strong> {similarity_score}%<br>"
63
  html_output += f"<strong>Transcribed Text:</strong> {transcribed_text}<br>"
64
- html_output += f"<strong>IPA Transcription:</strong> {ipa_transcription(reference_text)}<br>"
65
  html_output += "<strong>Word Score List:</strong><br>"
66
 
67
- # Generate colored word score list and audio links
68
  for i, word in enumerate(reference_words):
69
  try:
70
  if word.lower() == transcribed_words[i].lower():
@@ -85,14 +132,15 @@ def compare_texts(reference_text, transcribed_text):
85
  if incorrect_words_audios:
86
  html_output += "<br><strong>Pronunciation for Incorrect Words:</strong><br>"
87
  for word, audio in incorrect_words_audios:
88
- up_audio = upfilepath(audio) # Upload the audio
89
- audio_src = f"https://mr2along-speech-recognize.hf.space/gradio_api/file={up_audio}" # Use the upload URL
 
 
90
  html_output += f'{word}: '
91
- html_output += f'<audio controls><source src="{audio_src}" type="audio/mpeg">Your browser does not support the audio tag.</audio><br>'
92
 
93
  return [html_output]
94
 
95
-
96
  # Step 4: Text-to-Speech Function
97
  def text_to_speech(paragraph):
98
  if not paragraph:
@@ -138,4 +186,4 @@ tts_interface = gr.Interface(
138
  demo = gr.TabbedInterface([interface, tts_interface], ["Speech Recognition", "Text-to-Speech"])
139
 
140
  # Launch Gradio app
141
- demo.launch()
 
 
1
  import os
2
  import requests
3
  import speech_recognition as sr
 
9
  import time
10
  import eng_to_ipa as ipa
11
 
12
+ # Create audio directory if it doesn't exist
13
+ if not os.path.exists('audio'):
14
+ os.makedirs('audio')
 
 
 
 
 
 
15
 
16
+ # Step 2: Create pronunciation audio for incorrect words
17
  def upfilepath(local_filename):
18
  ts = time.time()
19
  upload_url = f"https://mr2along-speech-recognize.hf.space/gradio_api/upload?upload_id={ts}"
 
21
 
22
  try:
23
  response = requests.post(upload_url, files=files, timeout=30) # Set timeout (e.g., 30 seconds)
24
+
25
  if response.status_code == 200:
26
  result = response.json()
27
  extracted_path = result[0]
28
  return extracted_path
29
  else:
30
  return None
31
+
32
  except requests.exceptions.Timeout:
33
  return "Request timed out. Please try again."
34
  except Exception as e:
35
  return f"An error occurred: {e}"
36
 
37
+ # Step 1: Transcribe the audio file
38
+ def transcribe_audio(audio):
39
+ if audio is None:
40
+ return "No audio file provided."
41
+
42
+ recognizer = sr.Recognizer()
43
+
44
+ # Check if the file exists
45
+ if not os.path.isfile(audio):
46
+ return "Audio file not found."
47
+
48
+ audio_format = audio.split('.')[-1].lower()
49
+
50
+ if audio_format != 'wav':
51
+ try:
52
+ audio_segment = AudioSegment.from_file(audio)
53
+ wav_path = audio.replace(audio_format, 'wav')
54
+ audio_segment.export(wav_path, format='wav')
55
+ audio = wav_path
56
+ except Exception as e:
57
+ return f"Error converting audio: {e}"
58
+
59
+ audio_file = sr.AudioFile(audio)
60
+ with audio_file as source:
61
+ audio_data = recognizer.record(source)
62
+
63
+ try:
64
+ transcription = recognizer.recognize_google(audio_data)
65
+ return transcription
66
+ except sr.UnknownValueError:
67
+ return "Google Speech Recognition could not understand the audio."
68
+ except sr.RequestError as e:
69
+ return f"Error with Google Speech Recognition service: {e}"
70
+
71
+ # Function to get IPA transcription
72
+ def ipa_transcription(sentence):
73
+ try:
74
+ ipa_text = ipa.convert(sentence)
75
+ return ipa_text
76
+ except Exception as e:
77
+ return f"Error during IPA transcription: {e}"
78
+
79
+ # Step 2: Create pronunciation audio for incorrect words (locally)
80
+ def create_pronunciation_audio(word):
81
+ try:
82
+ tts = gTTS(word)
83
+ audio_file_path = f"audio/{word}.mp3"
84
+ tts.save(audio_file_path)
85
+ return audio_file_path # Return the local path instead of uploading
86
+ except Exception as e:
87
+ return f"Failed to create pronunciation audio: {e}"
88
+
89
+ # Step 3: Compare the transcribed text with the input paragraph
90
  def compare_texts(reference_text, transcribed_text):
91
  reference_words = reference_text.split()
92
  transcribed_words = transcribed_text.split()
 
95
  sm = difflib.SequenceMatcher(None, reference_text, transcribed_text)
96
  similarity_score = round(sm.ratio() * 100, 2)
97
 
98
+ # Construct HTML output with detailed fidelity class
99
  html_output = f"<strong>Fidelity Class:</strong> "
100
  if similarity_score >= 85:
101
  html_output += f"<strong>GOOD (>=85%)</strong><br>"
 
108
 
109
  html_output += f"<strong>Quality Score:</strong> {similarity_score}%<br>"
110
  html_output += f"<strong>Transcribed Text:</strong> {transcribed_text}<br>"
111
+ html_output += f"<strong>IPA Transcription:</strong> {ipa_transcription(reference_text)}<br>" # Display IPA transcription
112
  html_output += "<strong>Word Score List:</strong><br>"
113
 
114
+ # Generate colored word score list
115
  for i, word in enumerate(reference_words):
116
  try:
117
  if word.lower() == transcribed_words[i].lower():
 
132
  if incorrect_words_audios:
133
  html_output += "<br><strong>Pronunciation for Incorrect Words:</strong><br>"
134
  for word, audio in incorrect_words_audios:
135
+ suggestion = difflib.get_close_matches(word, reference_words, n=1)
136
+ suggestion_text = f" (Did you mean: <em>{suggestion[0]}</em>?)" if suggestion else ""
137
+ up_audio = upfilepath(audio)
138
+ audio_src = f"https://mr2along-speech-recognize.hf.space/gradio_api/file={up_audio}"
139
  html_output += f'{word}: '
140
+ html_output += f'<audio controls><source src="{audio_src}" type="audio/mpeg">Your browser does not support the audio tag.</audio>{suggestion_text}<br>'
141
 
142
  return [html_output]
143
 
 
144
  # Step 4: Text-to-Speech Function
145
  def text_to_speech(paragraph):
146
  if not paragraph:
 
186
  demo = gr.TabbedInterface([interface, tts_interface], ["Speech Recognition", "Text-to-Speech"])
187
 
188
  # Launch Gradio app
189
+ demo.launch()