Krishnavamshithumma commited on
Commit
b5d9400
Β·
verified Β·
1 Parent(s): 3b4c90e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +139 -38
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  from openai import OpenAI
3
  import speech_recognition as sr
4
- import tempfile
5
 
6
  system_prompt = """You are a voice bot representing Krishnavamshi Thumma. When responding to questions, answer as if you are:
7
  - A Generative AI and Data Engineering enthusiast with 1.5+ years of experience
@@ -11,64 +11,165 @@ system_prompt = """You are a voice bot representing Krishnavamshi Thumma. When r
11
  - A Computer Science graduate from Neil Gogte Institute of Technology
12
  Answer questions about your background professionally but engagingly (2-3 sentences max)."""
13
 
14
- def speech_to_text(audio):
15
- recognizer = sr.Recognizer()
16
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
17
- tmp_file.write(audio)
18
- tmp_file.flush()
19
- with sr.AudioFile(tmp_file.name) as source:
20
- audio_data = recognizer.record(source)
21
- try:
22
- text = recognizer.recognize_google(audio_data)
23
- return text
24
- except sr.UnknownValueError:
25
- return "❌ Could not understand the audio"
26
- except sr.RequestError as e:
27
- return f"❌ Speech recognition error: {e}"
28
-
29
- def chat_with_openai(user_input, history, api_key):
30
  if not api_key:
 
31
  raise gr.Error("❌ Please enter your OpenAI API key.")
 
 
 
 
 
32
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  client = OpenAI(api_key=api_key)
 
 
34
  messages = [{"role": "system", "content": system_prompt}]
35
  for entry in history:
36
- messages.append({"role": "user", "content": entry[0]})
37
- messages.append({"role": "assistant", "content": entry[1]})
38
- messages.append({"role": "user", "content": user_input})
 
 
 
 
39
  response = client.chat.completions.create(
40
  model="gpt-4o",
41
  messages=messages,
42
  temperature=0.7
43
  )
 
44
  bot_reply = response.choices[0].message.content
 
 
45
  history.append((user_input, bot_reply))
46
- return history, ""
 
 
 
 
47
  except Exception as e:
48
- raise gr.Error(f"❌ Error: {str(e)}")
 
 
 
49
 
 
50
  with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
51
- gr.Markdown("## πŸŽ™οΈ Krishnavamshi Thumma - Voice Assistant (No JavaScript)")
52
 
53
- api_key = gr.Textbox(label="οΏ½οΏ½ OpenAI API Key", type="password")
54
- chatbot = gr.Chatbot(height=400)
55
- state = gr.State([])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
- with gr.Row():
58
- voice_input = gr.Audio(sources=["microphone"], type="filepath", label="🎀 Speak here")
59
- transcribed_text = gr.Textbox(label="Transcribed Text")
 
60
 
61
- # When audio is submitted, convert to text
62
- voice_input.change(speech_to_text, voice_input, transcribed_text)
63
-
64
- # When transcribed text is ready, send to OpenAI
65
- transcribed_text.submit(
66
- chat_with_openai,
67
- [transcribed_text, state, api_key],
68
- [chatbot, state, transcribed_text]
69
  )
70
 
71
  clear_btn = gr.Button("πŸ—‘οΈ Clear Chat")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  clear_btn.click(lambda: ([], []), None, [chatbot, state])
73
 
74
- demo.launch()
 
1
  import gradio as gr
2
  from openai import OpenAI
3
  import speech_recognition as sr
4
+ import os
5
 
6
  system_prompt = """You are a voice bot representing Krishnavamshi Thumma. When responding to questions, answer as if you are:
7
  - A Generative AI and Data Engineering enthusiast with 1.5+ years of experience
 
11
  - A Computer Science graduate from Neil Gogte Institute of Technology
12
  Answer questions about your background professionally but engagingly (2-3 sentences max)."""
13
 
14
+ # Initialize the SpeechRecognition Recognizer
15
+ r = sr.Recognizer()
16
+
17
+ def transcribe_audio_and_chat(audio_filepath, history, api_key):
 
 
 
 
 
 
 
 
 
 
 
 
18
  if not api_key:
19
+ # Raise a Gradio error to be displayed in the UI
20
  raise gr.Error("❌ Please enter your OpenAI API key.")
21
+
22
+ if audio_filepath is None:
23
+ # Raise a Gradio error if no audio is captured
24
+ raise gr.Error("No audio received. Please speak into the microphone.")
25
+
26
  try:
27
+ # Load the audio file
28
+ with sr.AudioFile(audio_filepath) as source:
29
+ audio_data = r.record(source) # read the entire audio file
30
+
31
+ # Perform speech recognition
32
+ try:
33
+ user_input = r.recognize_google(audio_data) # Using Google Web Speech API
34
+ print(f"Transcribed User Input: {user_input}") # For debugging purposes
35
+ # If transcription is successful, you might want to show it in the chat
36
+ # before the AI responds. For now, we'll just use it directly.
37
+
38
+ except sr.UnknownValueError:
39
+ # If speech is unintelligible
40
+ # Return current history and an error message for the chatbot
41
+ return history + [("", "Sorry, I could not understand the audio. Please try again.")], ""
42
+ except sr.RequestError as e:
43
+ # If API request fails
44
+ # Return current history and an error message for the chatbot
45
+ return history + [("", f"Could not request results from Google Speech Recognition service; {e}")], ""
46
+
47
+ finally:
48
+ # Always clean up the temporary audio file, regardless of success or failure
49
+ if os.path.exists(audio_filepath):
50
+ os.remove(audio_filepath)
51
+
52
+ # --- Proceed with OpenAI chat ---
53
  client = OpenAI(api_key=api_key)
54
+
55
+ # Build messages from history
56
  messages = [{"role": "system", "content": system_prompt}]
57
  for entry in history:
58
+ # Ensure history entries are tuples (user_message, bot_message)
59
+ if isinstance(entry, (list, tuple)) and len(entry) == 2:
60
+ messages.append({"role": "user", "content": entry[0]})
61
+ messages.append({"role": "assistant", "content": entry[1]})
62
+ messages.append({"role": "user", "content": user_input}) # Add the current user input
63
+
64
+ # Get response from OpenAI
65
  response = client.chat.completions.create(
66
  model="gpt-4o",
67
  messages=messages,
68
  temperature=0.7
69
  )
70
+
71
  bot_reply = response.choices[0].message.content
72
+
73
+ # Append the new user input and bot reply to the history
74
  history.append((user_input, bot_reply))
75
+
76
+ # Return the updated history for the chatbot component
77
+ # and an empty string for the audio input, effectively clearing it for next input.
78
+ return history, None # Use None for the audio input to reset the component
79
+
80
  except Exception as e:
81
+ # Catch any other unexpected errors
82
+ print(f"An unexpected error occurred: {e}") # Log the error
83
+ # Raise a Gradio error for display in the UI
84
+ raise gr.Error(f"❌ An unexpected error occurred: {str(e)}")
85
 
86
+ # --- Gradio UI setup remains mostly the same ---
87
  with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
88
+ gr.Markdown("## πŸŽ™οΈ Krishnavamshi Thumma - Voice Assistant")
89
 
90
+ # Add custom CSS
91
+ gr.HTML("""
92
+ <style>
93
+ #chatBox {
94
+ height: 60vh;
95
+ overflow-y: auto;
96
+ padding: 20px;
97
+ border-radius: 10px;
98
+ background: #f9f9f9;
99
+ margin-bottom: 20px;
100
+ }
101
+ .message {
102
+ margin: 10px 0;
103
+ padding: 12px;
104
+ border-radius: 8px;
105
+ }
106
+ .user {
107
+ background: #e3f2fd;
108
+ text-align: right;
109
+ }
110
+ .bot {
111
+ background: #f5f5f5;
112
+ }
113
+ #audioInputComponent { /* New ID for the audio component */
114
+ margin-top: 20px;
115
+ }
116
+ .key-status {
117
+ padding: 5px;
118
+ margin-top: 5px;
119
+ border-radius: 4px;
120
+ }
121
+ .success {
122
+ background: #d4edda;
123
+ color: #155724;
124
+ }
125
+ .error {
126
+ background: #f8d7da;
127
+ color: #721c24;
128
+ }
129
+ </style>
130
+ """)
131
 
132
+ api_key = gr.Textbox(label="πŸ” OpenAI API Key", type="password", elem_id="apiKeyInput")
133
+ key_status = gr.HTML("<div id='keyStatus'></div>")
134
+ chatbot = gr.Chatbot(elem_id="chatBox", type="messages", height=400)
135
+ state = gr.State([]) # Stores the chat history
136
 
137
+ audio_input = gr.Audio(
138
+ sources=["microphone"],
139
+ type="filepath",
140
+ label="Speak your message here",
141
+ elem_id="audioInputComponent",
142
+ streaming=False
 
 
143
  )
144
 
145
  clear_btn = gr.Button("πŸ—‘οΈ Clear Chat")
146
+
147
+ # Event handler: When audio input is recorded and submitted (by stopping recording)
148
+ audio_input.change(
149
+ transcribe_audio_and_chat,
150
+ inputs=[audio_input, state, api_key],
151
+ outputs=[chatbot, state] # Ensure chatbot and state are updated
152
+ )
153
+
154
+ # JavaScript for API key status (still useful for UX)
155
+ gr.HTML("""
156
+ <script>
157
+ document.getElementById("apiKeyInput").addEventListener("input", function() {
158
+ const apiKey = this.value.trim();
159
+ const keyStatus = document.getElementById("keyStatus");
160
+
161
+ if (apiKey) {
162
+ keyStatus.innerHTML = '<div class="key-status success">API Key saved successfully!</div>';
163
+ } else {
164
+ keyStatus.innerHTML = '<div class="key-status error">Please enter a valid API key</div>';
165
+ }
166
+ });
167
+
168
+ // Focus on API key input on load
169
+ document.querySelector("#apiKeyInput input").focus();
170
+ </script>
171
+ """)
172
+
173
  clear_btn.click(lambda: ([], []), None, [chatbot, state])
174
 
175
+ demo.launch()