Krishnavamshithumma commited on
Commit
3b4c90e
Β·
verified Β·
1 Parent(s): 9fc3d39

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -133
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  from openai import OpenAI
3
  import speech_recognition as sr
4
- import os # Import os for managing temporary audio files
5
 
6
  system_prompt = """You are a voice bot representing Krishnavamshi Thumma. When responding to questions, answer as if you are:
7
  - A Generative AI and Data Engineering enthusiast with 1.5+ years of experience
@@ -11,161 +11,64 @@ system_prompt = """You are a voice bot representing Krishnavamshi Thumma. When r
11
  - A Computer Science graduate from Neil Gogte Institute of Technology
12
  Answer questions about your background professionally but engagingly (2-3 sentences max)."""
13
 
14
- def transcribe_audio(audio_file_path):
15
- """Transcribes an audio file using Google Web Speech API."""
16
- if audio_file_path is None:
17
- return "" # Return empty string if no audio provided
18
-
19
- r = sr.Recognizer()
20
- try:
21
- with sr.AudioFile(audio_file_path) as source:
22
- audio_data = r.record(source)
23
- text = r.recognize_google(audio_data)
24
- return text
25
- except sr.UnknownValueError:
26
- return "Sorry, I could not understand the audio."
27
- except sr.RequestError as e:
28
- return f"Could not request results from Google Web Speech API service; {e}"
29
- except Exception as e:
30
- return f"An error occurred during transcription: {e}"
31
- finally:
32
- # Clean up the temporary audio file
33
- if os.path.exists(audio_file_path):
34
- os.remove(audio_file_path)
35
-
36
- def chat_with_openai(user_input_text, history, api_key):
37
- """Handles chat with OpenAI, now accepting text input."""
38
  if not api_key:
39
  raise gr.Error("❌ Please enter your OpenAI API key.")
40
- if not user_input_text:
41
- return history, user_input_text # Return current history if input is empty
42
-
43
  try:
44
  client = OpenAI(api_key=api_key)
45
-
46
  messages = [{"role": "system", "content": system_prompt}]
47
  for entry in history:
48
  messages.append({"role": "user", "content": entry[0]})
49
  messages.append({"role": "assistant", "content": entry[1]})
50
- messages.append({"role": "user", "content": user_input_text})
51
-
52
  response = client.chat.completions.create(
53
  model="gpt-4o",
54
  messages=messages,
55
  temperature=0.7
56
  )
57
-
58
  bot_reply = response.choices[0].message.content
59
- history.append((user_input_text, bot_reply))
60
- return history, "" # Clear the input text box after processing
61
  except Exception as e:
62
- raise gr.Error(f"❌ Error during OpenAI chat: {str(e)}")
63
 
64
  with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
65
- gr.Markdown("## πŸŽ™οΈ Krishnavamshi Thumma - Voice Assistant")
66
 
67
- # Add custom CSS - kept for styling, removed JS for mic handling
68
- gr.HTML("""
69
- <style>
70
- #chatBox {
71
- height: 60vh;
72
- overflow-y: auto;
73
- padding: 20px;
74
- border-radius: 10px;
75
- background: #f9f9f9;
76
- margin-bottom: 20px;
77
- }
78
- .message {
79
- margin: 10px 0;
80
- padding: 12px;
81
- border-radius: 8px;
82
- }
83
- .user {
84
- background: #e3f2fd;
85
- text-align: right;
86
- }
87
- .bot {
88
- background: #f5f5f5;
89
- }
90
- .key-status {
91
- padding: 5px;
92
- margin-top: 5px;
93
- border-radius: 4px;
94
- }
95
- .success {
96
- background: #d4edda;
97
- color: #155724;
98
- }
99
- .error {
100
- background: #f8d7da;
101
- color: #721c24;
102
- }
103
- /* Style for the Gradio Audio component, if needed */
104
- .gradio-audio-recorder {
105
- margin-top: 20px; /* Add some space above the recorder */
106
- }
107
- </style>
108
- """)
109
-
110
- api_key = gr.Textbox(label="πŸ” OpenAI API Key", type="password", elem_id="apiKeyInput")
111
- key_status = gr.HTML("<div id='keyStatus'></div>")
112
- chatbot = gr.Chatbot(elem_id="chatBox", type="messages", height=400)
113
  state = gr.State([])
114
 
115
- # New Gradio Audio component for recording
116
- # source="microphone" makes it record directly from mic
117
- # type="filepath" means it will pass a path to a temporary audio file to the function
118
- audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Record your question", elem_id="audioInput")
119
 
120
- # This Textbox will display the transcribed text and serve as input to chat_with_openai
121
- transcribed_text_output = gr.Textbox(label="Transcribed Text", interactive=False)
122
- # Adding a hidden submit button if needed, or link directly to the audio_input
123
 
124
- clear_btn = gr.Button("πŸ—‘οΈ Clear Chat")
125
-
126
- # Event flow:
127
- # 1. User records audio using audio_input.
128
- # 2. When recording stops, audio_input triggers its change event.
129
- # 3. transcribe_audio is called with the audio file path.
130
- # 4. The transcribed text updates transcribed_text_output.
131
- # 5. The submit event of transcribed_text_output (implicitly when its value changes)
132
- # or an explicit submit button would then trigger chat_with_openai.
133
- # For simplicity, let's make it so that when audio is transcribed, it directly chats.
134
-
135
- audio_input.change(
136
- transcribe_audio,
137
- inputs=[audio_input],
138
- outputs=[transcribed_text_output]
139
- )
140
-
141
- # Now, when transcribed_text_output changes (i.e., new transcription is available),
142
- # we want to send it to chat_with_openai.
143
- transcribed_text_output.submit(
144
  chat_with_openai,
145
- inputs=[transcribed_text_output, state, api_key],
146
- outputs=[chatbot, state, transcribed_text_output] # Clear transcribed_text_output after chat
147
  )
148
 
 
 
149
 
150
- # JavaScript for API key input and status (still useful for UI)
151
- gr.HTML("""
152
- <script>
153
- document.getElementById("apiKeyInput").addEventListener("input", function() {
154
- const apiKey = this.value.trim();
155
- const keyStatus = document.getElementById("keyStatus");
156
-
157
- if (apiKey) {
158
- keyStatus.innerHTML = '<div class="key-status success">API Key saved successfully!</div>';
159
- } else {
160
- keyStatus.innerHTML = '<div class="key-status error">Please enter a valid API key</div>';
161
- }
162
- });
163
-
164
- // Initial setup for key status
165
- document.querySelector("#apiKeyInput input").focus();
166
- </script>
167
- """)
168
-
169
- clear_btn.click(lambda: ([], "", None), None, [chatbot, state, transcribed_text_output]) # Clear audio input too
170
-
171
- demo.launch()
 
1
  import gradio as gr
2
  from openai import OpenAI
3
  import speech_recognition as sr
4
+ import tempfile
5
 
6
  system_prompt = """You are a voice bot representing Krishnavamshi Thumma. When responding to questions, answer as if you are:
7
  - A Generative AI and Data Engineering enthusiast with 1.5+ years of experience
 
11
  - A Computer Science graduate from Neil Gogte Institute of Technology
12
  Answer questions about your background professionally but engagingly (2-3 sentences max)."""
13
 
14
+ def speech_to_text(audio):
15
+ recognizer = sr.Recognizer()
16
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
17
+ tmp_file.write(audio)
18
+ tmp_file.flush()
19
+ with sr.AudioFile(tmp_file.name) as source:
20
+ audio_data = recognizer.record(source)
21
+ try:
22
+ text = recognizer.recognize_google(audio_data)
23
+ return text
24
+ except sr.UnknownValueError:
25
+ return "❌ Could not understand the audio"
26
+ except sr.RequestError as e:
27
+ return f"❌ Speech recognition error: {e}"
28
+
29
+ def chat_with_openai(user_input, history, api_key):
 
 
 
 
 
 
 
 
30
  if not api_key:
31
  raise gr.Error("❌ Please enter your OpenAI API key.")
 
 
 
32
  try:
33
  client = OpenAI(api_key=api_key)
 
34
  messages = [{"role": "system", "content": system_prompt}]
35
  for entry in history:
36
  messages.append({"role": "user", "content": entry[0]})
37
  messages.append({"role": "assistant", "content": entry[1]})
38
+ messages.append({"role": "user", "content": user_input})
 
39
  response = client.chat.completions.create(
40
  model="gpt-4o",
41
  messages=messages,
42
  temperature=0.7
43
  )
 
44
  bot_reply = response.choices[0].message.content
45
+ history.append((user_input, bot_reply))
46
+ return history, ""
47
  except Exception as e:
48
+ raise gr.Error(f"❌ Error: {str(e)}")
49
 
50
  with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
51
+ gr.Markdown("## πŸŽ™οΈ Krishnavamshi Thumma - Voice Assistant (No JavaScript)")
52
 
53
+ api_key = gr.Textbox(label="πŸ” OpenAI API Key", type="password")
54
+ chatbot = gr.Chatbot(height=400)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  state = gr.State([])
56
 
57
+ with gr.Row():
58
+ voice_input = gr.Audio(sources=["microphone"], type="filepath", label="🎀 Speak here")
59
+ transcribed_text = gr.Textbox(label="Transcribed Text")
 
60
 
61
+ # When audio is submitted, convert to text
62
+ voice_input.change(speech_to_text, voice_input, transcribed_text)
 
63
 
64
+ # When transcribed text is ready, send to OpenAI
65
+ transcribed_text.submit(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  chat_with_openai,
67
+ [transcribed_text, state, api_key],
68
+ [chatbot, state, transcribed_text]
69
  )
70
 
71
+ clear_btn = gr.Button("πŸ—‘οΈ Clear Chat")
72
+ clear_btn.click(lambda: ([], []), None, [chatbot, state])
73
 
74
+ demo.launch()