speech_recognize

Runtime error

App Files Files Community

mr2along commited on Oct 23, 2024

Commit

7515a2b

verified ·

1 Parent(s): 006f012

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -20

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# Import required libraries
 import os
 import requests
 import speech_recognition as sr
@@ -10,17 +9,11 @@ from pydub import AudioSegment
 import time
 import eng_to_ipa as ipa
-# Function to create pronunciation audio
-def create_pronunciation_audio(word):
-    try:
-        tts = gTTS(word)
-        audio_file_path = f"audio/{word}.mp3"
-        tts.save(audio_file_path)
-        return audio_file_path  # Return the local path instead of uploading
-    except Exception as e:
-        return f"Failed to create pronunciation audio: {e}"
-# Function to upload audio files to the server
 def upfilepath(local_filename):
     ts = time.time()
     upload_url = f"https://mr2along-speech-recognize.hf.space/gradio_api/upload?upload_id={ts}"
@@ -28,18 +21,72 @@ def upfilepath(local_filename):
     try:
         response = requests.post(upload_url, files=files, timeout=30)  # Set timeout (e.g., 30 seconds)
         if response.status_code == 200:
             result = response.json()
             extracted_path = result[0]
             return extracted_path
         else:
             return None
     except requests.exceptions.Timeout:
         return "Request timed out. Please try again."
     except Exception as e:
         return f"An error occurred: {e}"
-# Update the compare_texts function
 def compare_texts(reference_text, transcribed_text):
     reference_words = reference_text.split()
     transcribed_words = transcribed_text.split()
@@ -48,7 +95,7 @@ def compare_texts(reference_text, transcribed_text):
     sm = difflib.SequenceMatcher(None, reference_text, transcribed_text)
     similarity_score = round(sm.ratio() * 100, 2)
-    # Construct HTML output
     html_output = f"<strong>Fidelity Class:</strong> "
     if similarity_score >= 85:
         html_output += f"<strong>GOOD (>=85%)</strong><br>"
@@ -61,10 +108,10 @@ def compare_texts(reference_text, transcribed_text):
     html_output += f"<strong>Quality Score:</strong> {similarity_score}%<br>"
     html_output += f"<strong>Transcribed Text:</strong> {transcribed_text}<br>"
-    html_output += f"<strong>IPA Transcription:</strong> {ipa_transcription(reference_text)}<br>"
     html_output += "<strong>Word Score List:</strong><br>"
-    # Generate colored word score list and audio links
     for i, word in enumerate(reference_words):
         try:
             if word.lower() == transcribed_words[i].lower():
@@ -85,14 +132,15 @@ def compare_texts(reference_text, transcribed_text):
     if incorrect_words_audios:
         html_output += "<br><strong>Pronunciation for Incorrect Words:</strong><br>"
         for word, audio in incorrect_words_audios:
-            up_audio = upfilepath(audio)  # Upload the audio
-            audio_src = f"https://mr2along-speech-recognize.hf.space/gradio_api/file={up_audio}"  # Use the upload URL
             html_output += f'{word}: '
-            html_output += f'<audio controls><source src="{audio_src}" type="audio/mpeg">Your browser does not support the audio tag.</audio><br>'
     return [html_output]
 # Step 4: Text-to-Speech Function
 def text_to_speech(paragraph):
     if not paragraph:
@@ -138,4 +186,4 @@ tts_interface = gr.Interface(
 demo = gr.TabbedInterface([interface, tts_interface], ["Speech Recognition", "Text-to-Speech"])
 # Launch Gradio app
-demo.launch()

 import os
 import requests
 import speech_recognition as sr
 import time
 import eng_to_ipa as ipa
+# Create audio directory if it doesn't exist
+if not os.path.exists('audio'):
+    os.makedirs('audio')
+# Step 2: Create pronunciation audio for incorrect words
 def upfilepath(local_filename):
     ts = time.time()
     upload_url = f"https://mr2along-speech-recognize.hf.space/gradio_api/upload?upload_id={ts}"
     try:
         response = requests.post(upload_url, files=files, timeout=30)  # Set timeout (e.g., 30 seconds)
         if response.status_code == 200:
             result = response.json()
             extracted_path = result[0]
             return extracted_path
         else:
             return None
     except requests.exceptions.Timeout:
         return "Request timed out. Please try again."
     except Exception as e:
         return f"An error occurred: {e}"
+# Step 1: Transcribe the audio file
+def transcribe_audio(audio):
+    if audio is None:
+        return "No audio file provided."
+    recognizer = sr.Recognizer()
+    # Check if the file exists
+    if not os.path.isfile(audio):
+        return "Audio file not found."
+    audio_format = audio.split('.')[-1].lower()
+    if audio_format != 'wav':
+        try:
+            audio_segment = AudioSegment.from_file(audio)
+            wav_path = audio.replace(audio_format, 'wav')
+            audio_segment.export(wav_path, format='wav')
+            audio = wav_path
+        except Exception as e:
+            return f"Error converting audio: {e}"
+    audio_file = sr.AudioFile(audio)
+    with audio_file as source:
+        audio_data = recognizer.record(source)
+    try:
+        transcription = recognizer.recognize_google(audio_data)
+        return transcription
+    except sr.UnknownValueError:
+        return "Google Speech Recognition could not understand the audio."
+    except sr.RequestError as e:
+        return f"Error with Google Speech Recognition service: {e}"
+# Function to get IPA transcription
+def ipa_transcription(sentence):
+    try:
+        ipa_text = ipa.convert(sentence)
+        return ipa_text
+    except Exception as e:
+        return f"Error during IPA transcription: {e}"
+# Step 2: Create pronunciation audio for incorrect words (locally)
+def create_pronunciation_audio(word):
+    try:
+        tts = gTTS(word)
+        audio_file_path = f"audio/{word}.mp3"
+        tts.save(audio_file_path)
+        return audio_file_path  # Return the local path instead of uploading
+    except Exception as e:
+        return f"Failed to create pronunciation audio: {e}"
+# Step 3: Compare the transcribed text with the input paragraph
 def compare_texts(reference_text, transcribed_text):
     reference_words = reference_text.split()
     transcribed_words = transcribed_text.split()
     sm = difflib.SequenceMatcher(None, reference_text, transcribed_text)
     similarity_score = round(sm.ratio() * 100, 2)
+    # Construct HTML output with detailed fidelity class
     html_output = f"<strong>Fidelity Class:</strong> "
     if similarity_score >= 85:
         html_output += f"<strong>GOOD (>=85%)</strong><br>"
     html_output += f"<strong>Quality Score:</strong> {similarity_score}%<br>"
     html_output += f"<strong>Transcribed Text:</strong> {transcribed_text}<br>"
+    html_output += f"<strong>IPA Transcription:</strong> {ipa_transcription(reference_text)}<br>"  # Display IPA transcription
     html_output += "<strong>Word Score List:</strong><br>"
+    # Generate colored word score list
     for i, word in enumerate(reference_words):
         try:
             if word.lower() == transcribed_words[i].lower():
     if incorrect_words_audios:
         html_output += "<br><strong>Pronunciation for Incorrect Words:</strong><br>"
         for word, audio in incorrect_words_audios:
+            suggestion = difflib.get_close_matches(word, reference_words, n=1)
+            suggestion_text = f" (Did you mean: <em>{suggestion[0]}</em>?)" if suggestion else ""
+            up_audio = upfilepath(audio)
+            audio_src = f"https://mr2along-speech-recognize.hf.space/gradio_api/file={up_audio}"
             html_output += f'{word}: '
+            html_output += f'<audio controls><source src="{audio_src}" type="audio/mpeg">Your browser does not support the audio tag.</audio>{suggestion_text}<br>'
     return [html_output]
 # Step 4: Text-to-Speech Function
 def text_to_speech(paragraph):
     if not paragraph:
 demo = gr.TabbedInterface([interface, tts_interface], ["Speech Recognition", "Text-to-Speech"])
 # Launch Gradio app
+demo.launch()