Krishnavamshithumma commited on
Commit
59110a2
Β·
verified Β·
1 Parent(s): 9cd9066

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -30
app.py CHANGED
@@ -2,6 +2,10 @@ import gradio as gr
2
  from openai import OpenAI
3
  import speech_recognition as sr
4
  import os
 
 
 
 
5
 
6
  system_prompt = """You are a voice bot representing Krishnavamshi Thumma. When responding to questions, answer as if you are:
7
  - A Generative AI and Data Engineering enthusiast with 1.5+ years of experience
@@ -14,23 +18,35 @@ Answer questions about your background professionally but engagingly (2-3 senten
14
  # Initialize the SpeechRecognition Recognizer
15
  r = sr.Recognizer()
16
 
17
- def transcribe_audio_and_chat(audio_filepath, history, api_key):
 
18
  # 'history' now directly contains the list of message dictionaries
19
- # that we can directly pass to OpenAI, after adding current turn.
20
-
21
  if not api_key:
22
  raise gr.Error("❌ Please enter your OpenAI API key.")
23
 
24
- # Always ensure history is a list, even if it somehow became None
 
 
 
 
 
25
  if history is None:
26
  history = []
27
 
28
- if audio_filepath is None:
29
- raise gr.Error("No audio received. Please speak into the microphone.")
30
 
31
  try:
32
- # Load the audio file
33
- with sr.AudioFile(audio_filepath) as source:
 
 
 
 
 
 
 
 
 
34
  audio_data = r.record(source) # read the entire audio file
35
 
36
  # Perform speech recognition
@@ -39,20 +55,11 @@ def transcribe_audio_and_chat(audio_filepath, history, api_key):
39
  print(f"Transcribed User Input: {user_input}") # For debugging purposes
40
 
41
  except sr.UnknownValueError:
42
- # If speech is unintelligible, add an assistant message to history
43
  history.append({"role": "assistant", "content": "Sorry, I could not understand the audio. Please try again."})
44
- # Return history for chatbot, and None for audio input
45
- return history, None
46
  except sr.RequestError as e:
47
- # If API request fails, add an assistant message to history
48
  history.append({"role": "assistant", "content": f"Could not request results from Google Speech Recognition service; {e}"})
49
- # Return history for chatbot, and None for audio input
50
- return history, None
51
-
52
- finally:
53
- # Always clean up the temporary audio file
54
- if os.path.exists(audio_filepath):
55
- os.remove(audio_filepath)
56
 
57
  # --- Proceed with OpenAI chat ---
58
  client = OpenAI(api_key=api_key)
@@ -76,15 +83,18 @@ def transcribe_audio_and_chat(audio_filepath, history, api_key):
76
  history.append({"role": "user", "content": user_input})
77
  history.append({"role": "assistant", "content": bot_reply})
78
 
79
- # Return the updated history for the chatbot component (state),
80
- # and None for the audio input to clear it.
81
- return history, None # CORRECT: Return history for 'state' output, None for audio input
 
82
 
83
  except Exception as e:
84
- print(f"An unexpected error occurred: {e}") # Log the error
 
85
  raise gr.Error(f"❌ An unexpected error occurred: {str(e)}")
86
 
87
- # --- Gradio UI setup (no changes needed here) ---
 
88
  with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
89
  gr.Markdown("## πŸŽ™οΈ Krishnavamshi Thumma - Voice Assistant")
90
 
@@ -132,25 +142,31 @@ with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
132
  api_key = gr.Textbox(label="πŸ” OpenAI API Key", type="password", elem_id="apiKeyInput")
133
  key_status = gr.HTML("<div id='keyStatus'></div>")
134
 
 
135
  chatbot = gr.Chatbot(elem_id="chatBox", type="messages", height=400)
136
- state = gr.State([]) # `state` will hold OpenAI-compatible messages
 
137
 
 
138
  audio_input = gr.Audio(
139
  sources=["microphone"],
140
- type="filepath",
141
  label="Speak your message here",
142
  elem_id="audioInputComponent",
143
- streaming=False
144
  )
145
 
146
  clear_btn = gr.Button("πŸ—‘οΈ Clear Chat")
147
 
 
148
  audio_input.change(
149
- transcribe_audio_and_chat,
150
  inputs=[audio_input, state, api_key],
151
- outputs=[chatbot, state] # Ensure chatbot and state are updated
 
152
  )
153
 
 
154
  gr.HTML("""
155
  <script>
156
  document.getElementById("apiKeyInput").addEventListener("input", function() {
@@ -164,11 +180,12 @@ with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
164
  }
165
  });
166
 
 
167
  document.querySelector("#apiKeyInput input").focus();
168
  </script>
169
  """)
170
 
171
- # When clearing, ensure state is reset to an empty list
172
  clear_btn.click(lambda: ([], []), None, [chatbot, state])
173
 
174
  demo.launch()
 
2
  from openai import OpenAI
3
  import speech_recognition as sr
4
  import os
5
+ import io # For in-memory file handling
6
+ import scipy.io.wavfile as wavfile # For writing WAV data to in-memory file
7
+ import numpy as np # To handle the audio array
8
+ import datetime # For logging timestamps if needed (not directly used in this version, but good practice)
9
 
10
  system_prompt = """You are a voice bot representing Krishnavamshi Thumma. When responding to questions, answer as if you are:
11
  - A Generative AI and Data Engineering enthusiast with 1.5+ years of experience
 
18
  # Initialize the SpeechRecognition Recognizer
19
  r = sr.Recognizer()
20
 
21
+ # Modified function to accept audio as a numpy array and samplerate
22
+ def transcribe_audio_and_chat(audio_tuple, history, api_key):
23
  # 'history' now directly contains the list of message dictionaries
 
 
24
  if not api_key:
25
  raise gr.Error("❌ Please enter your OpenAI API key.")
26
 
27
+ if audio_tuple is None:
28
+ # If no audio is received, add an assistant message to history and reset audio input
29
+ history.append({"role": "assistant", "content": "No audio received. Please speak into the microphone."})
30
+ return history, history, None
31
+
32
+ # Ensure history is a list, even if it somehow became None (defensive programming)
33
  if history is None:
34
  history = []
35
 
36
+ samplerate, audio_np_array = audio_tuple
 
37
 
38
  try:
39
+ # Convert the NumPy array to a format speech_recognition can handle (in-memory WAV)
40
+ # Ensure the array is int16 as it's a common format for audio samples and expected by scipy.io.wavfile
41
+ if audio_np_array.dtype != np.int16:
42
+ audio_np_array = audio_np_array.astype(np.int16)
43
+
44
+ wav_byte_io = io.BytesIO()
45
+ wavfile.write(wav_byte_io, samplerate, audio_np_array)
46
+ wav_byte_io.seek(0) # Rewind to the beginning of the BytesIO object
47
+
48
+ # Create an AudioFile object from the in-memory WAV data
49
+ with sr.AudioFile(wav_byte_io) as source:
50
  audio_data = r.record(source) # read the entire audio file
51
 
52
  # Perform speech recognition
 
55
  print(f"Transcribed User Input: {user_input}") # For debugging purposes
56
 
57
  except sr.UnknownValueError:
 
58
  history.append({"role": "assistant", "content": "Sorry, I could not understand the audio. Please try again."})
59
+ return history, history, None # Reset audio input after error
 
60
  except sr.RequestError as e:
 
61
  history.append({"role": "assistant", "content": f"Could not request results from Google Speech Recognition service; {e}"})
62
+ return history, history, None # Reset audio input after error
 
 
 
 
 
 
63
 
64
  # --- Proceed with OpenAI chat ---
65
  client = OpenAI(api_key=api_key)
 
83
  history.append({"role": "user", "content": user_input})
84
  history.append({"role": "assistant", "content": bot_reply})
85
 
86
+ # Return the updated history for the chatbot component,
87
+ # history again for the 'state' component,
88
+ # and None for the audio input to clear it and make it ready for next input.
89
+ return history, history, None
90
 
91
  except Exception as e:
92
+ print(f"An unexpected error occurred: {e}") # Log the error for debugging
93
+ # If an unexpected error occurs, still try to reset the audio input
94
  raise gr.Error(f"❌ An unexpected error occurred: {str(e)}")
95
 
96
+
97
+ # --- Gradio UI setup ---
98
  with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
99
  gr.Markdown("## πŸŽ™οΈ Krishnavamshi Thumma - Voice Assistant")
100
 
 
142
  api_key = gr.Textbox(label="πŸ” OpenAI API Key", type="password", elem_id="apiKeyInput")
143
  key_status = gr.HTML("<div id='keyStatus'></div>")
144
 
145
+ # Chatbot component to display messages
146
  chatbot = gr.Chatbot(elem_id="chatBox", type="messages", height=400)
147
+ # State component to maintain chat history in OpenAI's message format
148
+ state = gr.State([])
149
 
150
+ # Audio input component for microphone recording
151
  audio_input = gr.Audio(
152
  sources=["microphone"],
153
+ type="numpy", # Receive audio as (samplerate, numpy_array)
154
  label="Speak your message here",
155
  elem_id="audioInputComponent",
156
+ streaming=False # Process audio after full recording
157
  )
158
 
159
  clear_btn = gr.Button("πŸ—‘οΈ Clear Chat")
160
 
161
+ # Event handler: When audio input is recorded and submitted (by stopping recording)
162
  audio_input.change(
163
+ fn=transcribe_audio_and_chat,
164
  inputs=[audio_input, state, api_key],
165
+ # Outputs: 1. chatbot display, 2. state (updated history), 3. audio_input (to clear it)
166
+ outputs=[chatbot, state, audio_input]
167
  )
168
 
169
+ # JavaScript for API key input and status display
170
  gr.HTML("""
171
  <script>
172
  document.getElementById("apiKeyInput").addEventListener("input", function() {
 
180
  }
181
  });
182
 
183
+ // Focus on API key input on page load for convenience
184
  document.querySelector("#apiKeyInput input").focus();
185
  </script>
186
  """)
187
 
188
+ # Clear button functionality: resets chatbot and state to empty
189
  clear_btn.click(lambda: ([], []), None, [chatbot, state])
190
 
191
  demo.launch()