Update app.py
Browse files
app.py
CHANGED
@@ -15,12 +15,13 @@ Answer questions about your background professionally but engagingly (2-3 senten
|
|
15 |
r = sr.Recognizer()
|
16 |
|
17 |
def transcribe_audio_and_chat(audio_filepath, history, api_key):
|
|
|
|
|
|
|
18 |
if not api_key:
|
19 |
-
# Raise a Gradio error to be displayed in the UI
|
20 |
raise gr.Error("β Please enter your OpenAI API key.")
|
21 |
|
22 |
if audio_filepath is None:
|
23 |
-
# Raise a Gradio error if no audio is captured
|
24 |
raise gr.Error("No audio received. Please speak into the microphone.")
|
25 |
|
26 |
try:
|
@@ -32,62 +33,54 @@ def transcribe_audio_and_chat(audio_filepath, history, api_key):
|
|
32 |
try:
|
33 |
user_input = r.recognize_google(audio_data) # Using Google Web Speech API
|
34 |
print(f"Transcribed User Input: {user_input}") # For debugging purposes
|
35 |
-
# If transcription is successful, you might want to show it in the chat
|
36 |
-
# before the AI responds. For now, we'll just use it directly.
|
37 |
|
38 |
except sr.UnknownValueError:
|
39 |
-
# If speech is unintelligible
|
40 |
-
|
41 |
-
return history
|
42 |
except sr.RequestError as e:
|
43 |
-
# If API request fails
|
44 |
-
|
45 |
-
return history
|
46 |
|
47 |
finally:
|
48 |
-
# Always clean up the temporary audio file
|
49 |
if os.path.exists(audio_filepath):
|
50 |
os.remove(audio_filepath)
|
51 |
|
52 |
# --- Proceed with OpenAI chat ---
|
53 |
client = OpenAI(api_key=api_key)
|
54 |
|
55 |
-
#
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
if isinstance(entry, (list, tuple)) and len(entry) == 2:
|
60 |
-
messages.append({"role": "user", "content": entry[0]})
|
61 |
-
messages.append({"role": "assistant", "content": entry[1]})
|
62 |
-
messages.append({"role": "user", "content": user_input}) # Add the current user input
|
63 |
|
64 |
# Get response from OpenAI
|
65 |
response = client.chat.completions.create(
|
66 |
model="gpt-4o",
|
67 |
-
messages=messages
|
68 |
temperature=0.7
|
69 |
)
|
70 |
|
71 |
bot_reply = response.choices[0].message.content
|
72 |
|
73 |
-
# Append the
|
74 |
-
|
|
|
|
|
75 |
|
76 |
-
# Return the updated history for the chatbot component
|
77 |
-
|
78 |
-
return history, None # Use None for the audio input to reset the component
|
79 |
|
80 |
except Exception as e:
|
81 |
-
# Catch any other unexpected errors
|
82 |
print(f"An unexpected error occurred: {e}") # Log the error
|
83 |
-
# Raise a Gradio error for display in the UI
|
84 |
raise gr.Error(f"β An unexpected error occurred: {str(e)}")
|
85 |
|
86 |
-
# --- Gradio UI setup
|
87 |
with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
|
88 |
gr.Markdown("## ποΈ Krishnavamshi Thumma - Voice Assistant")
|
89 |
|
90 |
-
# Add custom CSS
|
91 |
gr.HTML("""
|
92 |
<style>
|
93 |
#chatBox {
|
@@ -110,7 +103,7 @@ with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
|
|
110 |
.bot {
|
111 |
background: #f5f5f5;
|
112 |
}
|
113 |
-
#audioInputComponent {
|
114 |
margin-top: 20px;
|
115 |
}
|
116 |
.key-status {
|
@@ -131,8 +124,10 @@ with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
|
|
131 |
|
132 |
api_key = gr.Textbox(label="π OpenAI API Key", type="password", elem_id="apiKeyInput")
|
133 |
key_status = gr.HTML("<div id='keyStatus'></div>")
|
|
|
|
|
134 |
chatbot = gr.Chatbot(elem_id="chatBox", type="messages", height=400)
|
135 |
-
state = gr.State([]) #
|
136 |
|
137 |
audio_input = gr.Audio(
|
138 |
sources=["microphone"],
|
@@ -144,14 +139,12 @@ with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
|
|
144 |
|
145 |
clear_btn = gr.Button("ποΈ Clear Chat")
|
146 |
|
147 |
-
# Event handler: When audio input is recorded and submitted (by stopping recording)
|
148 |
audio_input.change(
|
149 |
transcribe_audio_and_chat,
|
150 |
inputs=[audio_input, state, api_key],
|
151 |
-
outputs=[chatbot, state]
|
152 |
)
|
153 |
|
154 |
-
# JavaScript for API key status (still useful for UX)
|
155 |
gr.HTML("""
|
156 |
<script>
|
157 |
document.getElementById("apiKeyInput").addEventListener("input", function() {
|
@@ -165,11 +158,11 @@ with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
|
|
165 |
}
|
166 |
});
|
167 |
|
168 |
-
// Focus on API key input on load
|
169 |
document.querySelector("#apiKeyInput input").focus();
|
170 |
</script>
|
171 |
""")
|
172 |
|
|
|
173 |
clear_btn.click(lambda: ([], []), None, [chatbot, state])
|
174 |
|
175 |
demo.launch()
|
|
|
15 |
r = sr.Recognizer()
|
16 |
|
17 |
def transcribe_audio_and_chat(audio_filepath, history, api_key):
|
18 |
+
# 'history' now directly contains the list of message dictionaries
|
19 |
+
# that we can directly pass to OpenAI, after adding current turn.
|
20 |
+
|
21 |
if not api_key:
|
|
|
22 |
raise gr.Error("β Please enter your OpenAI API key.")
|
23 |
|
24 |
if audio_filepath is None:
|
|
|
25 |
raise gr.Error("No audio received. Please speak into the microphone.")
|
26 |
|
27 |
try:
|
|
|
33 |
try:
|
34 |
user_input = r.recognize_google(audio_data) # Using Google Web Speech API
|
35 |
print(f"Transcribed User Input: {user_input}") # For debugging purposes
|
|
|
|
|
36 |
|
37 |
except sr.UnknownValueError:
|
38 |
+
# If speech is unintelligible, add an assistant message to history
|
39 |
+
history.append({"role": "assistant", "content": "Sorry, I could not understand the audio. Please try again."})
|
40 |
+
return history, None
|
41 |
except sr.RequestError as e:
|
42 |
+
# If API request fails, add an assistant message to history
|
43 |
+
history.append({"role": "assistant", "content": f"Could not request results from Google Speech Recognition service; {e}"})
|
44 |
+
return history, None
|
45 |
|
46 |
finally:
|
47 |
+
# Always clean up the temporary audio file
|
48 |
if os.path.exists(audio_filepath):
|
49 |
os.remove(audio_filepath)
|
50 |
|
51 |
# --- Proceed with OpenAI chat ---
|
52 |
client = OpenAI(api_key=api_key)
|
53 |
|
54 |
+
# Create the full messages list for OpenAI, starting with the system prompt
|
55 |
+
# and then appending the current chat history, followed by the new user input.
|
56 |
+
messages_for_openai = [{"role": "system", "content": system_prompt}] + history
|
57 |
+
messages_for_openai.append({"role": "user", "content": user_input})
|
|
|
|
|
|
|
|
|
58 |
|
59 |
# Get response from OpenAI
|
60 |
response = client.chat.completions.create(
|
61 |
model="gpt-4o",
|
62 |
+
messages=messages_for_openai, # Pass the correctly formatted messages
|
63 |
temperature=0.7
|
64 |
)
|
65 |
|
66 |
bot_reply = response.choices[0].message.content
|
67 |
|
68 |
+
# Append both the user input and bot reply to the *Gradio* history (state)
|
69 |
+
# in the 'messages' format that Gradio's chatbot expects.
|
70 |
+
history.append({"role": "user", "content": user_input})
|
71 |
+
history.append({"role": "assistant", "content": bot_reply})
|
72 |
|
73 |
+
# Return the updated history for the chatbot component, and clear the audio input.
|
74 |
+
return history, None
|
|
|
75 |
|
76 |
except Exception as e:
|
|
|
77 |
print(f"An unexpected error occurred: {e}") # Log the error
|
|
|
78 |
raise gr.Error(f"β An unexpected error occurred: {str(e)}")
|
79 |
|
80 |
+
# --- Gradio UI setup ---
|
81 |
with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
|
82 |
gr.Markdown("## ποΈ Krishnavamshi Thumma - Voice Assistant")
|
83 |
|
|
|
84 |
gr.HTML("""
|
85 |
<style>
|
86 |
#chatBox {
|
|
|
103 |
.bot {
|
104 |
background: #f5f5f5;
|
105 |
}
|
106 |
+
#audioInputComponent {
|
107 |
margin-top: 20px;
|
108 |
}
|
109 |
.key-status {
|
|
|
124 |
|
125 |
api_key = gr.Textbox(label="π OpenAI API Key", type="password", elem_id="apiKeyInput")
|
126 |
key_status = gr.HTML("<div id='keyStatus'></div>")
|
127 |
+
|
128 |
+
# Crucially, set type="messages" here to match OpenAI's expected format
|
129 |
chatbot = gr.Chatbot(elem_id="chatBox", type="messages", height=400)
|
130 |
+
state = gr.State([]) # Now `state` will directly hold OpenAI-compatible messages
|
131 |
|
132 |
audio_input = gr.Audio(
|
133 |
sources=["microphone"],
|
|
|
139 |
|
140 |
clear_btn = gr.Button("ποΈ Clear Chat")
|
141 |
|
|
|
142 |
audio_input.change(
|
143 |
transcribe_audio_and_chat,
|
144 |
inputs=[audio_input, state, api_key],
|
145 |
+
outputs=[chatbot, state]
|
146 |
)
|
147 |
|
|
|
148 |
gr.HTML("""
|
149 |
<script>
|
150 |
document.getElementById("apiKeyInput").addEventListener("input", function() {
|
|
|
158 |
}
|
159 |
});
|
160 |
|
|
|
161 |
document.querySelector("#apiKeyInput input").focus();
|
162 |
</script>
|
163 |
""")
|
164 |
|
165 |
+
# When clearing, ensure state is reset to an empty list, matching the 'messages' format
|
166 |
clear_btn.click(lambda: ([], []), None, [chatbot, state])
|
167 |
|
168 |
demo.launch()
|