speech_recognize

Runtime error

App Files Files Community

mr2along commited on Oct 11, 2024

Commit

9586c71

verified ·

1 Parent(s): 91a2ea1

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -14

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import speech_recognition as sr
 import difflib
 import gradio as gr
 from gtts import gTTS
-import os
 # Step 1: Transcribe the audio file
 def transcribe_audio(audio):
@@ -26,16 +26,17 @@ def transcribe_audio(audio):
 # Step 2: Create pronunciation audio for incorrect words
 def create_pronunciation_audio(word):
     tts = gTTS(word)
-    audio_filename = f"pronunciation_{word}.mp3"
-    tts.save(audio_filename)
-    return audio_filename
 # Step 3: Compare the transcribed text with the input paragraph
 def compare_texts(reference_text, transcribed_text):
     word_scores = []
     reference_words = reference_text.split()
     transcribed_words = transcribed_text.split()
-    incorrect_words_audio_links = []  # Store audio links for incorrect words
     sm = difflib.SequenceMatcher(None, reference_text, transcribed_text)
     similarity_score = round(sm.ratio() * 100, 2)
@@ -57,24 +58,28 @@ def compare_texts(reference_text, transcribed_text):
                 # Incorrect words in red
                 html_output += f'<span style="color: red;">{word}</span> '
                 # Create pronunciation audio for the incorrect word
-                audio_link = create_pronunciation_audio(word)
-                incorrect_words_audio_links.append(audio_link)
         except IndexError:
             html_output += f'<span style="color: red;">{word}</span> '  # Words in reference that were not transcribed
-    # Provide audio links for incorrect words
-    if incorrect_words_audio_links:
         html_output += "<br><strong>Pronunciation for Incorrect Words:</strong><br>"
-        for audio in incorrect_words_audio_links:
-            html_output += f'<a href="{audio}" target="_blank">Listen</a><br>'
     return html_output
 # Step 4: Text-to-Speech Function
 def text_to_speech(paragraph):
     tts = gTTS(paragraph)
-    tts.save("paragraph.mp3")
-    return "paragraph.mp3"
 # Gradio Interface Function
 def gradio_function(paragraph, audio):
@@ -103,7 +108,7 @@ interface = gr.Interface(
 tts_interface = gr.Interface(
     fn=text_to_speech,
     inputs=gr.Textbox(lines=5, label="Input Paragraph to Read Aloud"),
-    outputs=gr.Audio(label="Text-to-Speech Output", type="filepath"),
     title="Text-to-Speech",
     description="This tool will read your input paragraph aloud."
 )

 import difflib
 import gradio as gr
 from gtts import gTTS
+import io
 # Step 1: Transcribe the audio file
 def transcribe_audio(audio):
 # Step 2: Create pronunciation audio for incorrect words
 def create_pronunciation_audio(word):
     tts = gTTS(word)
+    audio_buffer = io.BytesIO()
+    tts.save(audio_buffer)
+    audio_buffer.seek(0)
+    return audio_buffer
 # Step 3: Compare the transcribed text with the input paragraph
 def compare_texts(reference_text, transcribed_text):
     word_scores = []
     reference_words = reference_text.split()
     transcribed_words = transcribed_text.split()
+    incorrect_words_audios = []  # Store audio buffers for incorrect words
     sm = difflib.SequenceMatcher(None, reference_text, transcribed_text)
     similarity_score = round(sm.ratio() * 100, 2)
                 # Incorrect words in red
                 html_output += f'<span style="color: red;">{word}</span> '
                 # Create pronunciation audio for the incorrect word
+                audio_buffer = create_pronunciation_audio(word)
+                incorrect_words_audios.append((word, audio_buffer))
         except IndexError:
             html_output += f'<span style="color: red;">{word}</span> '  # Words in reference that were not transcribed
+    # Provide audio for incorrect words
+    if incorrect_words_audios:
         html_output += "<br><strong>Pronunciation for Incorrect Words:</strong><br>"
+        for word, audio in incorrect_words_audios:
+            html_output += f'{word}: '
+            # Return the audio buffer as part of the HTML output
+            html_output += f'<audio controls><source src="data:audio/mp3;base64,{audio.getvalue().decode()}" type="audio/mpeg">Your browser does not support the audio tag.</audio><br>'
     return html_output
 # Step 4: Text-to-Speech Function
 def text_to_speech(paragraph):
     tts = gTTS(paragraph)
+    audio_buffer = io.BytesIO()
+    tts.save(audio_buffer)
+    audio_buffer.seek(0)
+    return audio_buffer
 # Gradio Interface Function
 def gradio_function(paragraph, audio):
 tts_interface = gr.Interface(
     fn=text_to_speech,
     inputs=gr.Textbox(lines=5, label="Input Paragraph to Read Aloud"),
+    outputs=gr.Audio(label="Text-to-Speech Output"),
     title="Text-to-Speech",
     description="This tool will read your input paragraph aloud."
 )