Krishnavamshithumma commited on
Commit
1f0a91c
Β·
verified Β·
1 Parent(s): b5d9400

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -35
app.py CHANGED
@@ -15,12 +15,13 @@ Answer questions about your background professionally but engagingly (2-3 senten
15
  r = sr.Recognizer()
16
 
17
  def transcribe_audio_and_chat(audio_filepath, history, api_key):
 
 
 
18
  if not api_key:
19
- # Raise a Gradio error to be displayed in the UI
20
  raise gr.Error("❌ Please enter your OpenAI API key.")
21
 
22
  if audio_filepath is None:
23
- # Raise a Gradio error if no audio is captured
24
  raise gr.Error("No audio received. Please speak into the microphone.")
25
 
26
  try:
@@ -32,62 +33,54 @@ def transcribe_audio_and_chat(audio_filepath, history, api_key):
32
  try:
33
  user_input = r.recognize_google(audio_data) # Using Google Web Speech API
34
  print(f"Transcribed User Input: {user_input}") # For debugging purposes
35
- # If transcription is successful, you might want to show it in the chat
36
- # before the AI responds. For now, we'll just use it directly.
37
 
38
  except sr.UnknownValueError:
39
- # If speech is unintelligible
40
- # Return current history and an error message for the chatbot
41
- return history + [("", "Sorry, I could not understand the audio. Please try again.")], ""
42
  except sr.RequestError as e:
43
- # If API request fails
44
- # Return current history and an error message for the chatbot
45
- return history + [("", f"Could not request results from Google Speech Recognition service; {e}")], ""
46
 
47
  finally:
48
- # Always clean up the temporary audio file, regardless of success or failure
49
  if os.path.exists(audio_filepath):
50
  os.remove(audio_filepath)
51
 
52
  # --- Proceed with OpenAI chat ---
53
  client = OpenAI(api_key=api_key)
54
 
55
- # Build messages from history
56
- messages = [{"role": "system", "content": system_prompt}]
57
- for entry in history:
58
- # Ensure history entries are tuples (user_message, bot_message)
59
- if isinstance(entry, (list, tuple)) and len(entry) == 2:
60
- messages.append({"role": "user", "content": entry[0]})
61
- messages.append({"role": "assistant", "content": entry[1]})
62
- messages.append({"role": "user", "content": user_input}) # Add the current user input
63
 
64
  # Get response from OpenAI
65
  response = client.chat.completions.create(
66
  model="gpt-4o",
67
- messages=messages,
68
  temperature=0.7
69
  )
70
 
71
  bot_reply = response.choices[0].message.content
72
 
73
- # Append the new user input and bot reply to the history
74
- history.append((user_input, bot_reply))
 
 
75
 
76
- # Return the updated history for the chatbot component
77
- # and an empty string for the audio input, effectively clearing it for next input.
78
- return history, None # Use None for the audio input to reset the component
79
 
80
  except Exception as e:
81
- # Catch any other unexpected errors
82
  print(f"An unexpected error occurred: {e}") # Log the error
83
- # Raise a Gradio error for display in the UI
84
  raise gr.Error(f"❌ An unexpected error occurred: {str(e)}")
85
 
86
- # --- Gradio UI setup remains mostly the same ---
87
  with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
88
  gr.Markdown("## πŸŽ™οΈ Krishnavamshi Thumma - Voice Assistant")
89
 
90
- # Add custom CSS
91
  gr.HTML("""
92
  <style>
93
  #chatBox {
@@ -110,7 +103,7 @@ with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
110
  .bot {
111
  background: #f5f5f5;
112
  }
113
- #audioInputComponent { /* New ID for the audio component */
114
  margin-top: 20px;
115
  }
116
  .key-status {
@@ -131,8 +124,10 @@ with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
131
 
132
  api_key = gr.Textbox(label="πŸ” OpenAI API Key", type="password", elem_id="apiKeyInput")
133
  key_status = gr.HTML("<div id='keyStatus'></div>")
 
 
134
  chatbot = gr.Chatbot(elem_id="chatBox", type="messages", height=400)
135
- state = gr.State([]) # Stores the chat history
136
 
137
  audio_input = gr.Audio(
138
  sources=["microphone"],
@@ -144,14 +139,12 @@ with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
144
 
145
  clear_btn = gr.Button("πŸ—‘οΈ Clear Chat")
146
 
147
- # Event handler: When audio input is recorded and submitted (by stopping recording)
148
  audio_input.change(
149
  transcribe_audio_and_chat,
150
  inputs=[audio_input, state, api_key],
151
- outputs=[chatbot, state] # Ensure chatbot and state are updated
152
  )
153
 
154
- # JavaScript for API key status (still useful for UX)
155
  gr.HTML("""
156
  <script>
157
  document.getElementById("apiKeyInput").addEventListener("input", function() {
@@ -165,11 +158,11 @@ with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
165
  }
166
  });
167
 
168
- // Focus on API key input on load
169
  document.querySelector("#apiKeyInput input").focus();
170
  </script>
171
  """)
172
 
 
173
  clear_btn.click(lambda: ([], []), None, [chatbot, state])
174
 
175
  demo.launch()
 
15
  r = sr.Recognizer()
16
 
17
  def transcribe_audio_and_chat(audio_filepath, history, api_key):
18
+ # 'history' now directly contains the list of message dictionaries
19
+ # that we can directly pass to OpenAI, after adding current turn.
20
+
21
  if not api_key:
 
22
  raise gr.Error("❌ Please enter your OpenAI API key.")
23
 
24
  if audio_filepath is None:
 
25
  raise gr.Error("No audio received. Please speak into the microphone.")
26
 
27
  try:
 
33
  try:
34
  user_input = r.recognize_google(audio_data) # Using Google Web Speech API
35
  print(f"Transcribed User Input: {user_input}") # For debugging purposes
 
 
36
 
37
  except sr.UnknownValueError:
38
+ # If speech is unintelligible, add an assistant message to history
39
+ history.append({"role": "assistant", "content": "Sorry, I could not understand the audio. Please try again."})
40
+ return history, None
41
  except sr.RequestError as e:
42
+ # If API request fails, add an assistant message to history
43
+ history.append({"role": "assistant", "content": f"Could not request results from Google Speech Recognition service; {e}"})
44
+ return history, None
45
 
46
  finally:
47
+ # Always clean up the temporary audio file
48
  if os.path.exists(audio_filepath):
49
  os.remove(audio_filepath)
50
 
51
  # --- Proceed with OpenAI chat ---
52
  client = OpenAI(api_key=api_key)
53
 
54
+ # Create the full messages list for OpenAI, starting with the system prompt
55
+ # and then appending the current chat history, followed by the new user input.
56
+ messages_for_openai = [{"role": "system", "content": system_prompt}] + history
57
+ messages_for_openai.append({"role": "user", "content": user_input})
 
 
 
 
58
 
59
  # Get response from OpenAI
60
  response = client.chat.completions.create(
61
  model="gpt-4o",
62
+ messages=messages_for_openai, # Pass the correctly formatted messages
63
  temperature=0.7
64
  )
65
 
66
  bot_reply = response.choices[0].message.content
67
 
68
+ # Append both the user input and bot reply to the *Gradio* history (state)
69
+ # in the 'messages' format that Gradio's chatbot expects.
70
+ history.append({"role": "user", "content": user_input})
71
+ history.append({"role": "assistant", "content": bot_reply})
72
 
73
+ # Return the updated history for the chatbot component, and clear the audio input.
74
+ return history, None
 
75
 
76
  except Exception as e:
 
77
  print(f"An unexpected error occurred: {e}") # Log the error
 
78
  raise gr.Error(f"❌ An unexpected error occurred: {str(e)}")
79
 
80
+ # --- Gradio UI setup ---
81
  with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
82
  gr.Markdown("## πŸŽ™οΈ Krishnavamshi Thumma - Voice Assistant")
83
 
 
84
  gr.HTML("""
85
  <style>
86
  #chatBox {
 
103
  .bot {
104
  background: #f5f5f5;
105
  }
106
+ #audioInputComponent {
107
  margin-top: 20px;
108
  }
109
  .key-status {
 
124
 
125
  api_key = gr.Textbox(label="πŸ” OpenAI API Key", type="password", elem_id="apiKeyInput")
126
  key_status = gr.HTML("<div id='keyStatus'></div>")
127
+
128
+ # Crucially, set type="messages" here to match OpenAI's expected format
129
  chatbot = gr.Chatbot(elem_id="chatBox", type="messages", height=400)
130
+ state = gr.State([]) # Now `state` will directly hold OpenAI-compatible messages
131
 
132
  audio_input = gr.Audio(
133
  sources=["microphone"],
 
139
 
140
  clear_btn = gr.Button("πŸ—‘οΈ Clear Chat")
141
 
 
142
  audio_input.change(
143
  transcribe_audio_and_chat,
144
  inputs=[audio_input, state, api_key],
145
+ outputs=[chatbot, state]
146
  )
147
 
 
148
  gr.HTML("""
149
  <script>
150
  document.getElementById("apiKeyInput").addEventListener("input", function() {
 
158
  }
159
  });
160
 
 
161
  document.querySelector("#apiKeyInput input").focus();
162
  </script>
163
  """)
164
 
165
+ # When clearing, ensure state is reset to an empty list, matching the 'messages' format
166
  clear_btn.click(lambda: ([], []), None, [chatbot, state])
167
 
168
  demo.launch()