speech_recognize

Runtime error

App Files Files Community

mr2along commited on 16 days ago

Commit

33ead9a

•

1 Parent(s): 257e787

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -28

app.py CHANGED Viewed

@@ -7,30 +7,11 @@ from gtts import gTTS
 import io
 from pydub import AudioSegment
 import time
 # Create audio directory if it doesn't exist
 if not os.path.exists('audio'):
     os.makedirs('audio')
-# Step 2: Create pronunciation audio for incorrect words
-def upfilepath(local_filename):
-    ts = time.time()
-    upload_url = f"https://mr2along-speech-recognize.hf.space/gradio_api/upload?upload_id={ts}"
-    files = {'files': open(local_filename, 'rb')}
-    try:
-        response = requests.post(upload_url, files=files, timeout=30)  # Set timeout (e.g., 30 seconds)
-        if response.status_code == 200:
-            result = response.json()
-            extracted_path = result[0]
-            return extracted_path
-        else:
-            return None
-    except requests.exceptions.Timeout:
-        return "Request timed out. Please try again."
-    except Exception as e:
-        return f"An error occurred: {e}"
 # Step 1: Transcribe the audio file
 def transcribe_audio(audio):
@@ -59,7 +40,7 @@ def transcribe_audio(audio):
         audio_data = recognizer.record(source)
     try:
-        transcription = recognizer.recognize_google(audio_data)
         return transcription
     except sr.UnknownValueError:
         return "Google Speech Recognition could not understand the audio"
@@ -76,6 +57,27 @@ def create_pronunciation_audio(word):
     except Exception as e:
         return f"Failed to create pronunciation audio: {e}"
 # Step 3: Compare the transcribed text with the input paragraph
 def compare_texts(reference_text, transcribed_text):
     reference_words = reference_text.split()
@@ -123,12 +125,15 @@ def compare_texts(reference_text, transcribed_text):
         for word, audio in incorrect_words_audios:
             suggestion = difflib.get_close_matches(word, reference_words, n=1)
             suggestion_text = f" (Did you mean: <em>{suggestion[0]}</em>?)" if suggestion else ""
-            up_audio=upfilepath(audio)
-            audio_src=f"https://mr2along-speech-recognize.hf.space/gradio_api/file={up_audio}"
             html_output += f'{word}: '
             html_output += f'<audio controls><source src="{audio_src}" type="audio/mpeg">Your browser does not support the audio tag.</audio>{suggestion_text}<br>'
-    #return [html_output, [audio for _, audio in incorrect_words_audios]]
     return [html_output]
 # Step 4: Text-to-Speech Function
@@ -150,7 +155,7 @@ def gradio_function(paragraph, audio):
     # Return comparison result
     return comparison_result
 # Gradio Interface using the updated API
 interface = gr.Interface(
     fn=gradio_function,
@@ -158,10 +163,9 @@ interface = gr.Interface(
         gr.Textbox(lines=5, label="Input Paragraph"),
         gr.Audio(type="filepath", label="Record Audio")
     ],
-    #outputs=["html","files"],
     outputs=["html"],
-    title="Speech Recognition Comparison",
-    description="Input a paragraph, record your audio, and compare the transcription to the original text."
 )
 # Gradio Interface for Text-to-Speech

 import io
 from pydub import AudioSegment
 import time
+from underthesea import phonetic
 # Create audio directory if it doesn't exist
 if not os.path.exists('audio'):
     os.makedirs('audio')
 # Step 1: Transcribe the audio file
 def transcribe_audio(audio):
         audio_data = recognizer.record(source)
     try:
+        transcription = recognizer.recognize_google(audio_data, language='vi-VN')  # For Vietnamese
         return transcription
     except sr.UnknownValueError:
         return "Google Speech Recognition could not understand the audio"
     except Exception as e:
         return f"Failed to create pronunciation audio: {e}"
+# Upload function to Hugging Face Space
+def upfilepath(local_filename):
+    ts = time.time()
+    upload_url = f"https://mr2along-speech-recognize.hf.space/gradio_api/upload?upload_id={ts}"
+    files = {'files': open(local_filename, 'rb')}
+    try:
+        response = requests.post(upload_url, files=files, timeout=30)  # Set timeout (e.g., 30 seconds)
+        if response.status_code == 200:
+            result = response.json()
+            extracted_path = result[0]
+            return extracted_path
+        else:
+            return None
+    except requests.exceptions.Timeout:
+        return "Request timed out. Please try again."
+    except Exception as e:
+        return f"An error occurred: {e}"
 # Step 3: Compare the transcribed text with the input paragraph
 def compare_texts(reference_text, transcribed_text):
     reference_words = reference_text.split()
         for word, audio in incorrect_words_audios:
             suggestion = difflib.get_close_matches(word, reference_words, n=1)
             suggestion_text = f" (Did you mean: <em>{suggestion[0]}</em>?)" if suggestion else ""
+            up_audio = upfilepath(audio)
+            audio_src = f"https://mr2along-speech-recognize.hf.space/gradio_api/file={up_audio}"
             html_output += f'{word}: '
             html_output += f'<audio controls><source src="{audio_src}" type="audio/mpeg">Your browser does not support the audio tag.</audio>{suggestion_text}<br>'
+    # Step 4: Vietnamese Phonetic Transcription
+    phonetic_transcription = phonetic(reference_text)
+    html_output += f"<br><strong>Phonetic Transcription (Vietnamese):</strong> {phonetic_transcription}<br>"
     return [html_output]
 # Step 4: Text-to-Speech Function
     # Return comparison result
     return comparison_result
 # Gradio Interface using the updated API
 interface = gr.Interface(
     fn=gradio_function,
         gr.Textbox(lines=5, label="Input Paragraph"),
         gr.Audio(type="filepath", label="Record Audio")
     ],
     outputs=["html"],
+    title="Speech Recognition Comparison with Phonetic Transcription",
+    description="Input a paragraph, record your audio, and compare the transcription to the original text. Also, see phonetic transcription for Vietnamese."
 )
 # Gradio Interface for Text-to-Speech