AI-Edify commited on
Commit
a83e487
·
verified ·
1 Parent(s): 9da39f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -22
app.py CHANGED
@@ -2,46 +2,86 @@ import os
2
  import gradio as gr
3
  import openai
4
  import speech_recognition as sr
 
 
 
 
 
 
5
 
6
  # Set OpenAI API key
7
  openai.api_key = os.environ.get("OPENAI_API_KEY")
8
 
9
  def generate_text():
10
- response = openai.ChatCompletion.create(
11
- model="gpt-3.5-turbo",
12
- messages=[
13
- {"role": "system", "content": "Generate exactly two simple sentences for English pronunciation practice. Do not include any instructions, comments, or additional text."},
14
- {"role": "user", "content": "Create two simple sentences for pronunciation practice."}
15
- ]
16
- )
17
- return response.choices[0].message['content'].strip()
 
 
 
 
18
 
19
  def get_pronunciation_feedback(original_text, transcription):
20
- response = openai.ChatCompletion.create(
21
- model="gpt-3.5-turbo",
22
- messages=[
23
- {"role": "system", "content": "You are a helpful pronunciation assistant. Compare the generated text with the user's transcription and provide feedback on how the user can improve their pronunciation. Single out specific words they pronounced incorrectly and give tips on how to improve, like for example 'schedule' can be pronounced as 'sked-jool'."},
24
- {"role": "user", "content": f"Original text: '{original_text}'\nTranscription: '{transcription}'\nProvide pronunciation feedback."}
25
- ]
26
- )
27
- return response.choices[0].message['content']
 
 
 
 
 
 
 
 
 
28
 
29
  def transcribe_audio_realtime(audio):
30
- recognizer = sr.Recognizer()
31
- with sr.AudioFile(audio) as source:
32
- audio_data = recognizer.record(source)
33
  try:
34
- return recognizer.recognize_google(audio_data)
 
 
 
 
 
 
 
 
35
  except sr.UnknownValueError:
 
36
  return "Could not understand audio"
37
- except sr.RequestError:
38
- return "Could not request results from the speech recognition service"
 
 
 
 
 
39
 
40
  def practice_pronunciation(audio, text_to_read):
 
41
  if not text_to_read:
 
42
  text_to_read = generate_text()
 
 
 
43
  transcription = transcribe_audio_realtime(audio)
 
 
 
44
  feedback = get_pronunciation_feedback(text_to_read, transcription)
 
 
45
  return text_to_read, transcription, feedback
46
 
47
  # Gradio interface
 
2
  import gradio as gr
3
  import openai
4
  import speech_recognition as sr
5
+ import logging
6
+ import traceback
7
+
8
+ # Set up logging
9
+ logging.basicConfig(level=logging.DEBUG)
10
+ logger = logging.getLogger(__name__)
11
 
12
  # Set OpenAI API key
13
  openai.api_key = os.environ.get("OPENAI_API_KEY")
14
 
15
  def generate_text():
16
+ try:
17
+ response = openai.ChatCompletion.create(
18
+ model="gpt-3.5-turbo",
19
+ messages=[
20
+ {"role": "system", "content": "Generate exactly two simple sentences for English pronunciation practice. Do not include any instructions, comments, or additional text."},
21
+ {"role": "user", "content": "Create two simple sentences for pronunciation practice."}
22
+ ]
23
+ )
24
+ return response.choices[0].message['content'].strip()
25
+ except Exception as e:
26
+ logger.error(f"Error in generate_text: {str(e)}")
27
+ return "Error generating text. Please try again."
28
 
29
  def get_pronunciation_feedback(original_text, transcription):
30
+ try:
31
+ logger.debug(f"Original text: {original_text}")
32
+ logger.debug(f"Transcription: {transcription}")
33
+ response = openai.ChatCompletion.create(
34
+ model="gpt-3.5-turbo",
35
+ messages=[
36
+ {"role": "system", "content": "You are a helpful pronunciation assistant. Compare the generated text with the user's transcription and provide feedback on how the user can improve their pronunciation. Single out specific words they pronounced incorrectly and give tips on how to improve, like for example 'schedule' can be pronounced as 'sked-jool'."},
37
+ {"role": "user", "content": f"Original text: '{original_text}'\nTranscription: '{transcription}'\nProvide pronunciation feedback."}
38
+ ]
39
+ )
40
+ feedback = response.choices[0].message['content']
41
+ logger.debug(f"Generated feedback: {feedback}")
42
+ return feedback
43
+ except Exception as e:
44
+ logger.error(f"Error in get_pronunciation_feedback: {str(e)}")
45
+ logger.error(traceback.format_exc())
46
+ return "Error generating feedback. Please try again."
47
 
48
  def transcribe_audio_realtime(audio):
 
 
 
49
  try:
50
+ logger.debug(f"Received audio file: {audio}")
51
+ recognizer = sr.Recognizer()
52
+ with sr.AudioFile(audio) as source:
53
+ logger.debug("Reading audio file")
54
+ audio_data = recognizer.record(source)
55
+ logger.debug("Transcribing audio")
56
+ transcription = recognizer.recognize_google(audio_data)
57
+ logger.debug(f"Transcription result: {transcription}")
58
+ return transcription
59
  except sr.UnknownValueError:
60
+ logger.warning("Could not understand audio")
61
  return "Could not understand audio"
62
+ except sr.RequestError as e:
63
+ logger.error(f"Could not request results from the speech recognition service; {str(e)}")
64
+ return "Error in speech recognition service"
65
+ except Exception as e:
66
+ logger.error(f"Error in transcribe_audio_realtime: {str(e)}")
67
+ logger.error(traceback.format_exc())
68
+ return "Error transcribing audio. Please try again."
69
 
70
  def practice_pronunciation(audio, text_to_read):
71
+ logger.info("Starting practice_pronunciation function")
72
  if not text_to_read:
73
+ logger.info("Generating new text to read")
74
  text_to_read = generate_text()
75
+ logger.info(f"Text to read: {text_to_read}")
76
+
77
+ logger.info("Starting transcription")
78
  transcription = transcribe_audio_realtime(audio)
79
+ logger.info(f"Transcription result: {transcription}")
80
+
81
+ logger.info("Getting pronunciation feedback")
82
  feedback = get_pronunciation_feedback(text_to_read, transcription)
83
+ logger.info(f"Feedback generated: {feedback}")
84
+
85
  return text_to_read, transcription, feedback
86
 
87
  # Gradio interface