shukdevdatta123 commited on
Commit
cc3538d
·
verified ·
1 Parent(s): 9f74220

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -24
app.py CHANGED
@@ -44,7 +44,7 @@ def image_url_chat(image_url, text_query, temperature, top_p, max_output_tokens)
44
 
45
  messages = [
46
  {"role": "user", "content": [
47
- {"type": "image_url", "image_url": {"url": image_url}}, # Corrected format
48
  {"type": "text", "text": text_query}
49
  ]},
50
  ]
@@ -71,7 +71,7 @@ def image_chat(image_file, text_query, temperature, top_p, max_output_tokens):
71
 
72
  messages = [
73
  {"role": "user", "content": [
74
- {"type": "image_url", "image_url": {"url": image_data}}, # Fixed format
75
  {"type": "text", "text": text_query}
76
  ]},
77
  ]
@@ -121,9 +121,59 @@ def transcribe_audio(audio_binary, openai_api_key):
121
  except Exception as e:
122
  return f"Error transcribing audio: {str(e)}"
123
 
124
- # Function to clear the chat (Fix: Returns the correct number of outputs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  def clear_chat():
126
- return "", "", "", "", "", "", "", None, "", None, "", 1.0, 1.0, 2048
127
 
128
  # Gradio UI Layout
129
  with gr.Blocks() as demo:
@@ -155,10 +205,10 @@ with gr.Blocks() as demo:
155
  #clear_chat_button:hover {
156
  background: linear-gradient(135deg, #c53030 0%, #e53e3e 100%); /* Slightly darker red gradient on hover */
157
  }
158
- #ask_button {
159
  background: linear-gradient(135deg, #fbd38d 0%, #f6e05e 100%); /* Yellow gradient */
160
  }
161
- #ask_button:hover {
162
  background: linear-gradient(135deg, #ecc94b 0%, #fbd38d 100%); /* Slightly darker yellow gradient on hover */
163
  }
164
  </style>
@@ -173,40 +223,54 @@ with gr.Blocks() as demo:
173
  with gr.Row():
174
  temperature = gr.Slider(0, 2, value=1.0, step=0.1, label="Temperature")
175
  top_p = gr.Slider(0, 1, value=1.0, step=0.1, label="Top-P")
176
- max_output_tokens = gr.Slider(0, 16384, value=2048, step=512, label="Max Output Tokens") # Changed default to 2048
177
 
178
  with gr.Tabs():
179
  with gr.Tab("Image URL Chat"):
180
  image_url = gr.Textbox(label="Enter Image URL")
181
  image_query = gr.Textbox(label="Ask about the Image")
182
  image_url_output = gr.Textbox(label="Response", interactive=False)
183
- image_url_button = gr.Button("Ask",elem_id="ask_button")
184
 
185
  with gr.Tab("Text Chat"):
186
  text_query = gr.Textbox(label="Enter your query")
187
  text_output = gr.Textbox(label="Response", interactive=False)
188
- text_button = gr.Button("Ask",elem_id="ask_button")
189
 
190
  with gr.Tab("Image Chat"):
191
  image_upload = gr.File(label="Upload an Image", type="filepath")
192
  image_text_query = gr.Textbox(label="Ask about the uploaded image")
193
  image_output = gr.Textbox(label="Response", interactive=False)
194
- image_button = gr.Button("Ask",elem_id="ask_button")
195
 
196
  with gr.Tab("PDF Chat"):
197
  pdf_upload = gr.File(label="Upload a PDF", type="filepath")
198
  pdf_text_query = gr.Textbox(label="Ask about the uploaded PDF")
199
  pdf_output = gr.Textbox(label="Response", interactive=False)
200
- pdf_button = gr.Button("Ask",elem_id="ask_button")
201
 
202
  with gr.Tab("Voice Chat"):
203
- audio_upload = gr.File(label="Upload an Audio File", type="binary")
204
- audio_query = gr.Textbox(label="Ask about the transcription")
205
- audio_output = gr.Textbox(label="Response", interactive=False)
206
- audio_button = gr.Button("Ask",elem_id="ask_button")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
 
208
  # Clear chat button
209
- clear_button = gr.Button("Clear Chat",elem_id="clear_chat_button")
210
 
211
  # Button Click Actions
212
  api_key_button.click(set_api_key, inputs=[api_key_input], outputs=[api_key_output])
@@ -215,22 +279,42 @@ with gr.Blocks() as demo:
215
  image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
216
  pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
217
 
218
- # For Voice Chat
219
- audio_button.click(
220
- lambda audio_binary, query, temperature, top_p, max_output_tokens: query_openai(
221
- [{"role": "user", "content": [{"type": "text", "text": transcribe_audio(audio_binary, api_key)}, {"type": "text", "text": query}]}],
222
- temperature, top_p, max_output_tokens
223
- ), [audio_upload, audio_query, temperature, top_p, max_output_tokens], audio_output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  )
225
 
226
- # Fix: Clear button resets all necessary fields correctly
227
  clear_button.click(
228
  clear_chat,
229
  outputs=[
230
  image_url, image_query, image_url_output,
231
  text_query, text_output,
232
  image_text_query, image_output,
233
- pdf_upload, pdf_text_query, pdf_output,
 
 
234
  temperature, top_p, max_output_tokens
235
  ]
236
  )
 
44
 
45
  messages = [
46
  {"role": "user", "content": [
47
+ {"type": "image_url", "image_url": {"url": image_url}},
48
  {"type": "text", "text": text_query}
49
  ]},
50
  ]
 
71
 
72
  messages = [
73
  {"role": "user", "content": [
74
+ {"type": "image_url", "image_url": {"url": image_data}},
75
  {"type": "text", "text": text_query}
76
  ]},
77
  ]
 
121
  except Exception as e:
122
  return f"Error transcribing audio: {str(e)}"
123
 
124
+ # Function to handle uploaded audio transcription
125
+ def process_uploaded_audio(audio_binary):
126
+ if not audio_binary:
127
+ return "Please upload an audio file first."
128
+
129
+ if not api_key:
130
+ return "Please enter your OpenAI API key first."
131
+
132
+ try:
133
+ transcription = transcribe_audio(audio_binary, api_key)
134
+ return transcription
135
+ except Exception as e:
136
+ return f"Error transcribing audio: {str(e)}"
137
+
138
+ # Function to handle recorded audio transcription
139
+ def process_recorded_audio(audio_path):
140
+ if not audio_path:
141
+ return "No audio recorded."
142
+
143
+ if not api_key:
144
+ return "Please enter your OpenAI API key first."
145
+
146
+ try:
147
+ with open(audio_path, "rb") as audio_file:
148
+ audio_binary = audio_file.read()
149
+
150
+ transcription = transcribe_audio(audio_binary, api_key)
151
+ return transcription
152
+ except Exception as e:
153
+ return f"Error transcribing recorded audio: {str(e)}"
154
+
155
+ # Function to process the voice chat queries
156
+ def process_voice_query(transcription, query, temperature, top_p, max_output_tokens):
157
+ if not transcription or transcription.startswith("Error") or transcription.startswith("Please"):
158
+ return "Please ensure audio is transcribed successfully first."
159
+
160
+ if not query:
161
+ # If no specific query is provided, use the transcription as the query
162
+ messages = [{"role": "user", "content": [{"type": "text", "text": transcription}]}]
163
+ else:
164
+ # If a query about the transcription is provided
165
+ messages = [
166
+ {"role": "user", "content": [
167
+ {"type": "text", "text": f"Transcription: {transcription}"},
168
+ {"type": "text", "text": f"Query: {query}"}
169
+ ]}
170
+ ]
171
+
172
+ return query_openai(messages, temperature, top_p, max_output_tokens)
173
+
174
+ # Function to clear the chat
175
  def clear_chat():
176
+ return "", "", "", "", "", "", "", None, "", None, "", None, "", "", "", 1.0, 1.0, 2048
177
 
178
  # Gradio UI Layout
179
  with gr.Blocks() as demo:
 
205
  #clear_chat_button:hover {
206
  background: linear-gradient(135deg, #c53030 0%, #e53e3e 100%); /* Slightly darker red gradient on hover */
207
  }
208
+ #ask_button, #transcribe_button {
209
  background: linear-gradient(135deg, #fbd38d 0%, #f6e05e 100%); /* Yellow gradient */
210
  }
211
+ #ask_button:hover, #transcribe_button:hover {
212
  background: linear-gradient(135deg, #ecc94b 0%, #fbd38d 100%); /* Slightly darker yellow gradient on hover */
213
  }
214
  </style>
 
223
  with gr.Row():
224
  temperature = gr.Slider(0, 2, value=1.0, step=0.1, label="Temperature")
225
  top_p = gr.Slider(0, 1, value=1.0, step=0.1, label="Top-P")
226
+ max_output_tokens = gr.Slider(0, 16384, value=2048, step=512, label="Max Output Tokens")
227
 
228
  with gr.Tabs():
229
  with gr.Tab("Image URL Chat"):
230
  image_url = gr.Textbox(label="Enter Image URL")
231
  image_query = gr.Textbox(label="Ask about the Image")
232
  image_url_output = gr.Textbox(label="Response", interactive=False)
233
+ image_url_button = gr.Button("Ask", elem_id="ask_button")
234
 
235
  with gr.Tab("Text Chat"):
236
  text_query = gr.Textbox(label="Enter your query")
237
  text_output = gr.Textbox(label="Response", interactive=False)
238
+ text_button = gr.Button("Ask", elem_id="ask_button")
239
 
240
  with gr.Tab("Image Chat"):
241
  image_upload = gr.File(label="Upload an Image", type="filepath")
242
  image_text_query = gr.Textbox(label="Ask about the uploaded image")
243
  image_output = gr.Textbox(label="Response", interactive=False)
244
+ image_button = gr.Button("Ask", elem_id="ask_button")
245
 
246
  with gr.Tab("PDF Chat"):
247
  pdf_upload = gr.File(label="Upload a PDF", type="filepath")
248
  pdf_text_query = gr.Textbox(label="Ask about the uploaded PDF")
249
  pdf_output = gr.Textbox(label="Response", interactive=False)
250
+ pdf_button = gr.Button("Ask", elem_id="ask_button")
251
 
252
  with gr.Tab("Voice Chat"):
253
+ with gr.Tabs():
254
+ with gr.Tab("Upload Audio"):
255
+ # Upload audio section
256
+ audio_upload = gr.File(label="Upload an Audio File", type="binary")
257
+ upload_transcribe_button = gr.Button("Transcribe Audio", elem_id="transcribe_button")
258
+ upload_transcription = gr.Textbox(label="Transcription", interactive=False)
259
+ upload_audio_query = gr.Textbox(label="Ask about the transcription (optional)")
260
+ upload_audio_output = gr.Textbox(label="Response", interactive=False)
261
+ upload_audio_button = gr.Button("Ask", elem_id="ask_button")
262
+
263
+ with gr.Tab("Record Audio"):
264
+ # Record audio section
265
+ audio_recorder = gr.Audio(source="microphone", type="filepath", label="Record your voice")
266
+ record_transcribe_button = gr.Button("Transcribe Recording", elem_id="transcribe_button")
267
+ record_transcription = gr.Textbox(label="Transcription", interactive=False)
268
+ record_audio_query = gr.Textbox(label="Ask about the transcription (optional)")
269
+ record_audio_output = gr.Textbox(label="Response", interactive=False)
270
+ record_audio_button = gr.Button("Ask", elem_id="ask_button")
271
 
272
  # Clear chat button
273
+ clear_button = gr.Button("Clear Chat", elem_id="clear_chat_button")
274
 
275
  # Button Click Actions
276
  api_key_button.click(set_api_key, inputs=[api_key_input], outputs=[api_key_output])
 
279
  image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
280
  pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
281
 
282
+ # Voice Chat - Upload Audio tab actions
283
+ upload_transcribe_button.click(
284
+ process_uploaded_audio,
285
+ inputs=[audio_upload],
286
+ outputs=[upload_transcription]
287
+ )
288
+
289
+ upload_audio_button.click(
290
+ process_voice_query,
291
+ inputs=[upload_transcription, upload_audio_query, temperature, top_p, max_output_tokens],
292
+ outputs=[upload_audio_output]
293
+ )
294
+
295
+ # Voice Chat - Record Audio tab actions
296
+ record_transcribe_button.click(
297
+ process_recorded_audio,
298
+ inputs=[audio_recorder],
299
+ outputs=[record_transcription]
300
+ )
301
+
302
+ record_audio_button.click(
303
+ process_voice_query,
304
+ inputs=[record_transcription, record_audio_query, temperature, top_p, max_output_tokens],
305
+ outputs=[record_audio_output]
306
  )
307
 
308
+ # Clear button resets all necessary fields
309
  clear_button.click(
310
  clear_chat,
311
  outputs=[
312
  image_url, image_query, image_url_output,
313
  text_query, text_output,
314
  image_text_query, image_output,
315
+ pdf_upload, pdf_text_query, pdf_output,
316
+ audio_upload, upload_transcription, upload_audio_query, upload_audio_output,
317
+ audio_recorder, record_transcription, record_audio_query, record_audio_output,
318
  temperature, top_p, max_output_tokens
319
  ]
320
  )