Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,10 @@ import gradio as gr
|
|
2 |
from openai import OpenAI
|
3 |
import speech_recognition as sr
|
4 |
import os
|
|
|
|
|
|
|
|
|
5 |
|
6 |
system_prompt = """You are a voice bot representing Krishnavamshi Thumma. When responding to questions, answer as if you are:
|
7 |
- A Generative AI and Data Engineering enthusiast with 1.5+ years of experience
|
@@ -14,23 +18,35 @@ Answer questions about your background professionally but engagingly (2-3 senten
|
|
14 |
# Initialize the SpeechRecognition Recognizer
|
15 |
r = sr.Recognizer()
|
16 |
|
17 |
-
|
|
|
18 |
# 'history' now directly contains the list of message dictionaries
|
19 |
-
# that we can directly pass to OpenAI, after adding current turn.
|
20 |
-
|
21 |
if not api_key:
|
22 |
raise gr.Error("β Please enter your OpenAI API key.")
|
23 |
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
25 |
if history is None:
|
26 |
history = []
|
27 |
|
28 |
-
|
29 |
-
raise gr.Error("No audio received. Please speak into the microphone.")
|
30 |
|
31 |
try:
|
32 |
-
#
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
audio_data = r.record(source) # read the entire audio file
|
35 |
|
36 |
# Perform speech recognition
|
@@ -39,20 +55,11 @@ def transcribe_audio_and_chat(audio_filepath, history, api_key):
|
|
39 |
print(f"Transcribed User Input: {user_input}") # For debugging purposes
|
40 |
|
41 |
except sr.UnknownValueError:
|
42 |
-
# If speech is unintelligible, add an assistant message to history
|
43 |
history.append({"role": "assistant", "content": "Sorry, I could not understand the audio. Please try again."})
|
44 |
-
|
45 |
-
return history, None
|
46 |
except sr.RequestError as e:
|
47 |
-
# If API request fails, add an assistant message to history
|
48 |
history.append({"role": "assistant", "content": f"Could not request results from Google Speech Recognition service; {e}"})
|
49 |
-
|
50 |
-
return history, None
|
51 |
-
|
52 |
-
finally:
|
53 |
-
# Always clean up the temporary audio file
|
54 |
-
if os.path.exists(audio_filepath):
|
55 |
-
os.remove(audio_filepath)
|
56 |
|
57 |
# --- Proceed with OpenAI chat ---
|
58 |
client = OpenAI(api_key=api_key)
|
@@ -76,15 +83,18 @@ def transcribe_audio_and_chat(audio_filepath, history, api_key):
|
|
76 |
history.append({"role": "user", "content": user_input})
|
77 |
history.append({"role": "assistant", "content": bot_reply})
|
78 |
|
79 |
-
# Return the updated history for the chatbot component
|
80 |
-
#
|
81 |
-
|
|
|
82 |
|
83 |
except Exception as e:
|
84 |
-
print(f"An unexpected error occurred: {e}") # Log the error
|
|
|
85 |
raise gr.Error(f"β An unexpected error occurred: {str(e)}")
|
86 |
|
87 |
-
|
|
|
88 |
with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
|
89 |
gr.Markdown("## ποΈ Krishnavamshi Thumma - Voice Assistant")
|
90 |
|
@@ -132,25 +142,31 @@ with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
|
|
132 |
api_key = gr.Textbox(label="π OpenAI API Key", type="password", elem_id="apiKeyInput")
|
133 |
key_status = gr.HTML("<div id='keyStatus'></div>")
|
134 |
|
|
|
135 |
chatbot = gr.Chatbot(elem_id="chatBox", type="messages", height=400)
|
136 |
-
|
|
|
137 |
|
|
|
138 |
audio_input = gr.Audio(
|
139 |
sources=["microphone"],
|
140 |
-
type="
|
141 |
label="Speak your message here",
|
142 |
elem_id="audioInputComponent",
|
143 |
-
streaming=False
|
144 |
)
|
145 |
|
146 |
clear_btn = gr.Button("ποΈ Clear Chat")
|
147 |
|
|
|
148 |
audio_input.change(
|
149 |
-
transcribe_audio_and_chat,
|
150 |
inputs=[audio_input, state, api_key],
|
151 |
-
|
|
|
152 |
)
|
153 |
|
|
|
154 |
gr.HTML("""
|
155 |
<script>
|
156 |
document.getElementById("apiKeyInput").addEventListener("input", function() {
|
@@ -164,11 +180,12 @@ with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
|
|
164 |
}
|
165 |
});
|
166 |
|
|
|
167 |
document.querySelector("#apiKeyInput input").focus();
|
168 |
</script>
|
169 |
""")
|
170 |
|
171 |
-
#
|
172 |
clear_btn.click(lambda: ([], []), None, [chatbot, state])
|
173 |
|
174 |
demo.launch()
|
|
|
2 |
from openai import OpenAI
|
3 |
import speech_recognition as sr
|
4 |
import os
|
5 |
+
import io # For in-memory file handling
|
6 |
+
import scipy.io.wavfile as wavfile # For writing WAV data to in-memory file
|
7 |
+
import numpy as np # To handle the audio array
|
8 |
+
import datetime # For logging timestamps if needed (not directly used in this version, but good practice)
|
9 |
|
10 |
system_prompt = """You are a voice bot representing Krishnavamshi Thumma. When responding to questions, answer as if you are:
|
11 |
- A Generative AI and Data Engineering enthusiast with 1.5+ years of experience
|
|
|
18 |
# Initialize the SpeechRecognition Recognizer
|
19 |
r = sr.Recognizer()
|
20 |
|
21 |
+
# Modified function to accept audio as a numpy array and samplerate
|
22 |
+
def transcribe_audio_and_chat(audio_tuple, history, api_key):
|
23 |
# 'history' now directly contains the list of message dictionaries
|
|
|
|
|
24 |
if not api_key:
|
25 |
raise gr.Error("β Please enter your OpenAI API key.")
|
26 |
|
27 |
+
if audio_tuple is None:
|
28 |
+
# If no audio is received, add an assistant message to history and reset audio input
|
29 |
+
history.append({"role": "assistant", "content": "No audio received. Please speak into the microphone."})
|
30 |
+
return history, history, None
|
31 |
+
|
32 |
+
# Ensure history is a list, even if it somehow became None (defensive programming)
|
33 |
if history is None:
|
34 |
history = []
|
35 |
|
36 |
+
samplerate, audio_np_array = audio_tuple
|
|
|
37 |
|
38 |
try:
|
39 |
+
# Convert the NumPy array to a format speech_recognition can handle (in-memory WAV)
|
40 |
+
# Ensure the array is int16 as it's a common format for audio samples and expected by scipy.io.wavfile
|
41 |
+
if audio_np_array.dtype != np.int16:
|
42 |
+
audio_np_array = audio_np_array.astype(np.int16)
|
43 |
+
|
44 |
+
wav_byte_io = io.BytesIO()
|
45 |
+
wavfile.write(wav_byte_io, samplerate, audio_np_array)
|
46 |
+
wav_byte_io.seek(0) # Rewind to the beginning of the BytesIO object
|
47 |
+
|
48 |
+
# Create an AudioFile object from the in-memory WAV data
|
49 |
+
with sr.AudioFile(wav_byte_io) as source:
|
50 |
audio_data = r.record(source) # read the entire audio file
|
51 |
|
52 |
# Perform speech recognition
|
|
|
55 |
print(f"Transcribed User Input: {user_input}") # For debugging purposes
|
56 |
|
57 |
except sr.UnknownValueError:
|
|
|
58 |
history.append({"role": "assistant", "content": "Sorry, I could not understand the audio. Please try again."})
|
59 |
+
return history, history, None # Reset audio input after error
|
|
|
60 |
except sr.RequestError as e:
|
|
|
61 |
history.append({"role": "assistant", "content": f"Could not request results from Google Speech Recognition service; {e}"})
|
62 |
+
return history, history, None # Reset audio input after error
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
# --- Proceed with OpenAI chat ---
|
65 |
client = OpenAI(api_key=api_key)
|
|
|
83 |
history.append({"role": "user", "content": user_input})
|
84 |
history.append({"role": "assistant", "content": bot_reply})
|
85 |
|
86 |
+
# Return the updated history for the chatbot component,
|
87 |
+
# history again for the 'state' component,
|
88 |
+
# and None for the audio input to clear it and make it ready for next input.
|
89 |
+
return history, history, None
|
90 |
|
91 |
except Exception as e:
|
92 |
+
print(f"An unexpected error occurred: {e}") # Log the error for debugging
|
93 |
+
# If an unexpected error occurs, still try to reset the audio input
|
94 |
raise gr.Error(f"β An unexpected error occurred: {str(e)}")
|
95 |
|
96 |
+
|
97 |
+
# --- Gradio UI setup ---
|
98 |
with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
|
99 |
gr.Markdown("## ποΈ Krishnavamshi Thumma - Voice Assistant")
|
100 |
|
|
|
142 |
api_key = gr.Textbox(label="π OpenAI API Key", type="password", elem_id="apiKeyInput")
|
143 |
key_status = gr.HTML("<div id='keyStatus'></div>")
|
144 |
|
145 |
+
# Chatbot component to display messages
|
146 |
chatbot = gr.Chatbot(elem_id="chatBox", type="messages", height=400)
|
147 |
+
# State component to maintain chat history in OpenAI's message format
|
148 |
+
state = gr.State([])
|
149 |
|
150 |
+
# Audio input component for microphone recording
|
151 |
audio_input = gr.Audio(
|
152 |
sources=["microphone"],
|
153 |
+
type="numpy", # Receive audio as (samplerate, numpy_array)
|
154 |
label="Speak your message here",
|
155 |
elem_id="audioInputComponent",
|
156 |
+
streaming=False # Process audio after full recording
|
157 |
)
|
158 |
|
159 |
clear_btn = gr.Button("ποΈ Clear Chat")
|
160 |
|
161 |
+
# Event handler: When audio input is recorded and submitted (by stopping recording)
|
162 |
audio_input.change(
|
163 |
+
fn=transcribe_audio_and_chat,
|
164 |
inputs=[audio_input, state, api_key],
|
165 |
+
# Outputs: 1. chatbot display, 2. state (updated history), 3. audio_input (to clear it)
|
166 |
+
outputs=[chatbot, state, audio_input]
|
167 |
)
|
168 |
|
169 |
+
# JavaScript for API key input and status display
|
170 |
gr.HTML("""
|
171 |
<script>
|
172 |
document.getElementById("apiKeyInput").addEventListener("input", function() {
|
|
|
180 |
}
|
181 |
});
|
182 |
|
183 |
+
// Focus on API key input on page load for convenience
|
184 |
document.querySelector("#apiKeyInput input").focus();
|
185 |
</script>
|
186 |
""")
|
187 |
|
188 |
+
# Clear button functionality: resets chatbot and state to empty
|
189 |
clear_btn.click(lambda: ([], []), None, [chatbot, state])
|
190 |
|
191 |
demo.launch()
|