Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
-
import os
|
| 2 |
import gradio as gr
|
| 3 |
import openai
|
| 4 |
import speech_recognition as sr
|
| 5 |
import logging
|
|
|
|
| 6 |
|
| 7 |
# Set up logging
|
| 8 |
-
logging.basicConfig(level=logging.
|
| 9 |
logger = logging.getLogger(__name__)
|
| 10 |
|
| 11 |
# Set OpenAI API key
|
|
@@ -27,6 +27,8 @@ def generate_text():
|
|
| 27 |
|
| 28 |
def get_pronunciation_feedback(original_text, transcription):
|
| 29 |
try:
|
|
|
|
|
|
|
| 30 |
response = openai.ChatCompletion.create(
|
| 31 |
model="gpt-3.5-turbo",
|
| 32 |
messages=[
|
|
@@ -34,73 +36,52 @@ def get_pronunciation_feedback(original_text, transcription):
|
|
| 34 |
{"role": "user", "content": f"Original text: '{original_text}'\nTranscription: '{transcription}'\nProvide pronunciation feedback."}
|
| 35 |
]
|
| 36 |
)
|
| 37 |
-
|
|
|
|
|
|
|
| 38 |
except Exception as e:
|
| 39 |
logger.error(f"Error in get_pronunciation_feedback: {str(e)}")
|
|
|
|
| 40 |
return "Error generating feedback. Please try again."
|
| 41 |
|
| 42 |
def transcribe_audio_realtime(audio):
|
|
|
|
|
|
|
|
|
|
| 43 |
try:
|
|
|
|
| 44 |
recognizer = sr.Recognizer()
|
| 45 |
with sr.AudioFile(audio) as source:
|
|
|
|
| 46 |
audio_data = recognizer.record(source)
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
| 48 |
except sr.UnknownValueError:
|
|
|
|
| 49 |
return "Could not understand audio"
|
| 50 |
except sr.RequestError as e:
|
| 51 |
logger.error(f"Could not request results from the speech recognition service; {str(e)}")
|
| 52 |
return "Error in speech recognition service"
|
| 53 |
except Exception as e:
|
| 54 |
logger.error(f"Error in transcribe_audio_realtime: {str(e)}")
|
|
|
|
| 55 |
return "Error transcribing audio. Please try again."
|
| 56 |
|
| 57 |
def practice_pronunciation(audio, text_to_read):
|
|
|
|
| 58 |
if not text_to_read:
|
|
|
|
| 59 |
text_to_read = generate_text()
|
| 60 |
-
|
| 61 |
-
feedback = get_pronunciation_feedback(text_to_read, transcription)
|
| 62 |
-
return text_to_read, transcription, feedback
|
| 63 |
-
|
| 64 |
-
# Custom CSS for improved styling
|
| 65 |
-
custom_css = """
|
| 66 |
-
.container {max-width: 800px; margin: auto; padding: 20px;}
|
| 67 |
-
.title {text-align: center; color: #2c3e50; margin-bottom: 20px;}
|
| 68 |
-
.subtitle {text-align: center; color: #34495e; margin-bottom: 30px;}
|
| 69 |
-
.input-section, .output-section {background-color: #ecf0f1; padding: 20px; border-radius: 10px; margin-bottom: 20px;}
|
| 70 |
-
.input-section h3, .output-section h3 {color: #2980b9; margin-bottom: 10px;}
|
| 71 |
-
.button-primary {background-color: #3498db !important;}
|
| 72 |
-
.button-secondary {background-color: #2ecc71 !important;}
|
| 73 |
-
"""
|
| 74 |
-
|
| 75 |
-
# Gradio interface with improved UI
|
| 76 |
-
with gr.Blocks(css=custom_css) as demo:
|
| 77 |
-
gr.HTML("<div class='container'>")
|
| 78 |
-
gr.HTML("<h1 class='title'>Pronunciation Practice Tool</h1>")
|
| 79 |
-
gr.HTML("<p class='subtitle'>Improve your English pronunciation with AI-powered feedback</p>")
|
| 80 |
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
text_to_read = gr.Textbox(label="Text to Read", placeholder="Click 'Generate New Text' or type your own text here")
|
| 85 |
-
generate_button = gr.Button("Generate New Text", variant="primary", className="button-primary")
|
| 86 |
-
|
| 87 |
-
with gr.Box(className="input-section"):
|
| 88 |
-
gr.HTML("<h3>Step 2: Record Your Voice</h3>")
|
| 89 |
-
audio_input = gr.Audio(type="filepath", label="Record your voice reading the text above")
|
| 90 |
-
|
| 91 |
-
with gr.Box(className="output-section"):
|
| 92 |
-
gr.HTML("<h3>Step 3: Get Feedback</h3>")
|
| 93 |
-
with gr.Row():
|
| 94 |
-
transcription_output = gr.Textbox(label="Your Transcription", lines=3)
|
| 95 |
-
feedback_output = gr.Textbox(label="Pronunciation Feedback", lines=5)
|
| 96 |
|
| 97 |
-
|
|
|
|
|
|
|
| 98 |
|
| 99 |
-
|
| 100 |
-
submit_button.click(practice_pronunciation, inputs=[audio_input, text_to_read], outputs=[text_to_read, transcription_output, feedback_output])
|
| 101 |
-
|
| 102 |
-
gr.HTML("</div>")
|
| 103 |
-
|
| 104 |
-
# Launch the app
|
| 105 |
-
if __name__ == "__main__":
|
| 106 |
-
demo.launch()
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import openai
|
| 3 |
import speech_recognition as sr
|
| 4 |
import logging
|
| 5 |
+
import traceback
|
| 6 |
|
| 7 |
# Set up logging
|
| 8 |
+
logging.basicConfig(level=logging.DEBUG)
|
| 9 |
logger = logging.getLogger(__name__)
|
| 10 |
|
| 11 |
# Set OpenAI API key
|
|
|
|
| 27 |
|
| 28 |
def get_pronunciation_feedback(original_text, transcription):
|
| 29 |
try:
|
| 30 |
+
logger.debug(f"Original text: {original_text}")
|
| 31 |
+
logger.debug(f"Transcription: {transcription}")
|
| 32 |
response = openai.ChatCompletion.create(
|
| 33 |
model="gpt-3.5-turbo",
|
| 34 |
messages=[
|
|
|
|
| 36 |
{"role": "user", "content": f"Original text: '{original_text}'\nTranscription: '{transcription}'\nProvide pronunciation feedback."}
|
| 37 |
]
|
| 38 |
)
|
| 39 |
+
feedback = response.choices[0].message['content']
|
| 40 |
+
logger.debug(f"Generated feedback: {feedback}")
|
| 41 |
+
return feedback
|
| 42 |
except Exception as e:
|
| 43 |
logger.error(f"Error in get_pronunciation_feedback: {str(e)}")
|
| 44 |
+
logger.error(traceback.format_exc())
|
| 45 |
return "Error generating feedback. Please try again."
|
| 46 |
|
| 47 |
def transcribe_audio_realtime(audio):
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
|
| 51 |
try:
|
| 52 |
+
logger.debug(f"Received audio file: {audio}")
|
| 53 |
recognizer = sr.Recognizer()
|
| 54 |
with sr.AudioFile(audio) as source:
|
| 55 |
+
logger.debug("Reading audio file")
|
| 56 |
audio_data = recognizer.record(source)
|
| 57 |
+
logger.debug("Transcribing audio")
|
| 58 |
+
transcription = recognizer.recognize_google(audio_data)
|
| 59 |
+
logger.debug(f"Transcription result: {transcription}")
|
| 60 |
+
return transcription
|
| 61 |
except sr.UnknownValueError:
|
| 62 |
+
logger.warning("Could not understand audio")
|
| 63 |
return "Could not understand audio"
|
| 64 |
except sr.RequestError as e:
|
| 65 |
logger.error(f"Could not request results from the speech recognition service; {str(e)}")
|
| 66 |
return "Error in speech recognition service"
|
| 67 |
except Exception as e:
|
| 68 |
logger.error(f"Error in transcribe_audio_realtime: {str(e)}")
|
| 69 |
+
logger.error(traceback.format_exc())
|
| 70 |
return "Error transcribing audio. Please try again."
|
| 71 |
|
| 72 |
def practice_pronunciation(audio, text_to_read):
|
| 73 |
+
logger.info("Starting practice_pronunciation function")
|
| 74 |
if not text_to_read:
|
| 75 |
+
logger.info("Generating new text to read")
|
| 76 |
text_to_read = generate_text()
|
| 77 |
+
logger.info(f"Text to read: {text_to_read}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
+
logger.info("Starting transcription")
|
| 80 |
+
transcription = transcribe_audio_realtime(audio)
|
| 81 |
+
logger.info(f"Transcription result: {transcription}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
|
| 83 |
+
logger.info("Getting pronunciation feedback")
|
| 84 |
+
feedback = get_pronunciation_feedback(text_to_read, transcription)
|
| 85 |
+
logger.info(f"Feedback generated: {feedback}")
|
| 86 |
|
| 87 |
+
return text_to_read, transcription, feedback
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|