Krishnavamshithumma commited on
Commit
630158a
Β·
verified Β·
1 Parent(s): 4860ff8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -36
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  from openai import OpenAI
3
  import speech_recognition as sr
4
- import tempfile
5
 
6
  system_prompt = """You are a voice bot representing Krishnavamshi Thumma. When responding to questions, answer as if you are:
7
  - A Generative AI and Data Engineering enthusiast with 1.5+ years of experience
@@ -11,64 +11,161 @@ system_prompt = """You are a voice bot representing Krishnavamshi Thumma. When r
11
  - A Computer Science graduate from Neil Gogte Institute of Technology
12
  Answer questions about your background professionally but engagingly (2-3 sentences max)."""
13
 
14
- def speech_to_text(audio):
15
- recognizer = sr.Recognizer()
16
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
17
- tmp_file.write(audio)
18
- tmp_file.flush()
19
- with sr.AudioFile(tmp_file.name) as source:
20
- audio_data = recognizer.record(source)
21
- try:
22
- text = recognizer.recognize_google(audio_data)
23
- return text
24
- except sr.UnknownValueError:
25
- return "❌ Could not understand the audio"
26
- except sr.RequestError as e:
27
- return f"❌ Speech recognition error: {e}"
28
-
29
- def chat_with_openai(user_input, history, api_key):
 
 
 
 
 
 
 
 
30
  if not api_key:
31
  raise gr.Error("❌ Please enter your OpenAI API key.")
 
 
 
32
  try:
33
  client = OpenAI(api_key=api_key)
 
34
  messages = [{"role": "system", "content": system_prompt}]
35
  for entry in history:
36
  messages.append({"role": "user", "content": entry[0]})
37
  messages.append({"role": "assistant", "content": entry[1]})
38
- messages.append({"role": "user", "content": user_input})
 
39
  response = client.chat.completions.create(
40
  model="gpt-4o",
41
  messages=messages,
42
  temperature=0.7
43
  )
 
44
  bot_reply = response.choices[0].message.content
45
- history.append((user_input, bot_reply))
46
- return history, ""
47
  except Exception as e:
48
- raise gr.Error(f"❌ Error: {str(e)}")
49
 
50
  with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
51
- gr.Markdown("## πŸŽ™οΈ Krishnavamshi Thumma - Voice Assistant (No JavaScript)")
52
 
53
- api_key = gr.Textbox(label="πŸ” OpenAI API Key", type="password")
54
- chatbot = gr.Chatbot(height=400)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  state = gr.State([])
56
 
57
- with gr.Row():
58
- voice_input = gr.Audio(source="microphone", type="filepath", label="🎀 Speak here")
59
- transcribed_text = gr.Textbox(label="Transcribed Text")
 
60
 
61
- # When audio is submitted, convert to text
62
- voice_input.change(speech_to_text, voice_input, transcribed_text)
 
63
 
64
- # When transcribed text is ready, send to OpenAI
65
- transcribed_text.submit(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  chat_with_openai,
67
- [transcribed_text, state, api_key],
68
- [chatbot, state, transcribed_text]
69
  )
70
 
71
- clear_btn = gr.Button("πŸ—‘οΈ Clear Chat")
72
- clear_btn.click(lambda: ([], []), None, [chatbot, state])
73
 
74
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from openai import OpenAI
3
  import speech_recognition as sr
4
+ import os # Import os for managing temporary audio files
5
 
6
  system_prompt = """You are a voice bot representing Krishnavamshi Thumma. When responding to questions, answer as if you are:
7
  - A Generative AI and Data Engineering enthusiast with 1.5+ years of experience
 
11
  - A Computer Science graduate from Neil Gogte Institute of Technology
12
  Answer questions about your background professionally but engagingly (2-3 sentences max)."""
13
 
14
+ def transcribe_audio(audio_file_path):
15
+ """Transcribes an audio file using Google Web Speech API."""
16
+ if audio_file_path is None:
17
+ return "" # Return empty string if no audio provided
18
+
19
+ r = sr.Recognizer()
20
+ try:
21
+ with sr.AudioFile(audio_file_path) as source:
22
+ audio_data = r.record(source)
23
+ text = r.recognize_google(audio_data)
24
+ return text
25
+ except sr.UnknownValueError:
26
+ return "Sorry, I could not understand the audio."
27
+ except sr.RequestError as e:
28
+ return f"Could not request results from Google Web Speech API service; {e}"
29
+ except Exception as e:
30
+ return f"An error occurred during transcription: {e}"
31
+ finally:
32
+ # Clean up the temporary audio file
33
+ if os.path.exists(audio_file_path):
34
+ os.remove(audio_file_path)
35
+
36
+ def chat_with_openai(user_input_text, history, api_key):
37
+ """Handles chat with OpenAI, now accepting text input."""
38
  if not api_key:
39
  raise gr.Error("❌ Please enter your OpenAI API key.")
40
+ if not user_input_text:
41
+ return history, user_input_text # Return current history if input is empty
42
+
43
  try:
44
  client = OpenAI(api_key=api_key)
45
+
46
  messages = [{"role": "system", "content": system_prompt}]
47
  for entry in history:
48
  messages.append({"role": "user", "content": entry[0]})
49
  messages.append({"role": "assistant", "content": entry[1]})
50
+ messages.append({"role": "user", "content": user_input_text})
51
+
52
  response = client.chat.completions.create(
53
  model="gpt-4o",
54
  messages=messages,
55
  temperature=0.7
56
  )
57
+
58
  bot_reply = response.choices[0].message.content
59
+ history.append((user_input_text, bot_reply))
60
+ return history, "" # Clear the input text box after processing
61
  except Exception as e:
62
+ raise gr.Error(f"❌ Error during OpenAI chat: {str(e)}")
63
 
64
  with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
65
+ gr.Markdown("## πŸŽ™οΈ Krishnavamshi Thumma - Voice Assistant")
66
 
67
+ # Add custom CSS - kept for styling, removed JS for mic handling
68
+ gr.HTML("""
69
+ <style>
70
+ #chatBox {
71
+ height: 60vh;
72
+ overflow-y: auto;
73
+ padding: 20px;
74
+ border-radius: 10px;
75
+ background: #f9f9f9;
76
+ margin-bottom: 20px;
77
+ }
78
+ .message {
79
+ margin: 10px 0;
80
+ padding: 12px;
81
+ border-radius: 8px;
82
+ }
83
+ .user {
84
+ background: #e3f2fd;
85
+ text-align: right;
86
+ }
87
+ .bot {
88
+ background: #f5f5f5;
89
+ }
90
+ .key-status {
91
+ padding: 5px;
92
+ margin-top: 5px;
93
+ border-radius: 4px;
94
+ }
95
+ .success {
96
+ background: #d4edda;
97
+ color: #155724;
98
+ }
99
+ .error {
100
+ background: #f8d7da;
101
+ color: #721c24;
102
+ }
103
+ /* Style for the Gradio Audio component, if needed */
104
+ .gradio-audio-recorder {
105
+ margin-top: 20px; /* Add some space above the recorder */
106
+ }
107
+ </style>
108
+ """)
109
+
110
+ api_key = gr.Textbox(label="πŸ” OpenAI API Key", type="password", elem_id="apiKeyInput")
111
+ key_status = gr.HTML("<div id='keyStatus'></div>")
112
+ chatbot = gr.Chatbot(elem_id="chatBox", type="messages", height=400)
113
  state = gr.State([])
114
 
115
+ # New Gradio Audio component for recording
116
+ # source="microphone" makes it record directly from mic
117
+ # type="filepath" means it will pass a path to a temporary audio file to the function
118
+ audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Record your question", elem_id="audioInput")
119
 
120
+ # This Textbox will display the transcribed text and serve as input to chat_with_openai
121
+ transcribed_text_output = gr.Textbox(label="Transcribed Text", interactive=False)
122
+ # Adding a hidden submit button if needed, or link directly to the audio_input
123
 
124
+ clear_btn = gr.Button("πŸ—‘οΈ Clear Chat")
125
+
126
+ # Event flow:
127
+ # 1. User records audio using audio_input.
128
+ # 2. When recording stops, audio_input triggers its change event.
129
+ # 3. transcribe_audio is called with the audio file path.
130
+ # 4. The transcribed text updates transcribed_text_output.
131
+ # 5. The submit event of transcribed_text_output (implicitly when its value changes)
132
+ # or an explicit submit button would then trigger chat_with_openai.
133
+ # For simplicity, let's make it so that when audio is transcribed, it directly chats.
134
+
135
+ audio_input.change(
136
+ transcribe_audio,
137
+ inputs=[audio_input],
138
+ outputs=[transcribed_text_output]
139
+ )
140
+
141
+ # Now, when transcribed_text_output changes (i.e., new transcription is available),
142
+ # we want to send it to chat_with_openai.
143
+ transcribed_text_output.submit(
144
  chat_with_openai,
145
+ inputs=[transcribed_text_output, state, api_key],
146
+ outputs=[chatbot, state, transcribed_text_output] # Clear transcribed_text_output after chat
147
  )
148
 
 
 
149
 
150
+ # JavaScript for API key input and status (still useful for UI)
151
+ gr.HTML("""
152
+ <script>
153
+ document.getElementById("apiKeyInput").addEventListener("input", function() {
154
+ const apiKey = this.value.trim();
155
+ const keyStatus = document.getElementById("keyStatus");
156
+
157
+ if (apiKey) {
158
+ keyStatus.innerHTML = '<div class="key-status success">API Key saved successfully!</div>';
159
+ } else {
160
+ keyStatus.innerHTML = '<div class="key-status error">Please enter a valid API key</div>';
161
+ }
162
+ });
163
+
164
+ // Initial setup for key status
165
+ document.querySelector("#apiKeyInput input").focus();
166
+ </script>
167
+ """)
168
+
169
+ clear_btn.click(lambda: ([], "", None), None, [chatbot, state, transcribed_text_output]) # Clear audio input too
170
+
171
+ demo.launch()