shukdevdatta123 commited on
Commit
3a0fb13
·
verified ·
1 Parent(s): 4110b67

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -38
app.py CHANGED
@@ -152,16 +152,7 @@ def process_recorded_audio(audio_path):
152
  except Exception as e:
153
  return f"Error transcribing recorded audio: {str(e)}"
154
 
155
- # Modified to work without the query parameter
156
- def process_voice_query_direct(transcription, temperature, top_p, max_output_tokens):
157
- if not transcription or transcription.startswith("Error") or transcription.startswith("Please"):
158
- return "Please ensure audio is transcribed successfully first."
159
-
160
- # Use the transcription directly as the query
161
- messages = [{"role": "user", "content": [{"type": "text", "text": transcription}]}]
162
- return query_openai(messages, temperature, top_p, max_output_tokens)
163
-
164
- # Function to process the voice chat queries (kept for compatibility)
165
  def process_voice_query(transcription, query, temperature, top_p, max_output_tokens):
166
  if not transcription or transcription.startswith("Error") or transcription.startswith("Please"):
167
  return "Please ensure audio is transcribed successfully first."
@@ -204,6 +195,7 @@ def clear_chat():
204
  "", # upload_audio_output (textbox)
205
  None, # audio_recorder (audio)
206
  "", # record_transcription (textbox)
 
207
  "", # record_audio_output (textbox)
208
  1.0, # temperature (slider)
209
  1.0, # top_p (slider)
@@ -240,12 +232,19 @@ with gr.Blocks() as demo:
240
  #clear_chat_button:hover {
241
  background: linear-gradient(135deg, #c53030 0%, #e53e3e 100%); /* Slightly darker red gradient on hover */
242
  }
243
- #ask_button, #transcribe_button {
244
  background: linear-gradient(135deg, #fbd38d 0%, #f6e05e 100%); /* Yellow gradient */
245
  }
246
- #ask_button:hover, #transcribe_button:hover {
247
  background: linear-gradient(135deg, #ecc94b 0%, #fbd38d 100%); /* Slightly darker yellow gradient on hover */
248
  }
 
 
 
 
 
 
 
249
  </style>
250
  """)
251
 
@@ -286,22 +285,21 @@ with gr.Blocks() as demo:
286
 
287
  with gr.Tab("Voice Chat"):
288
  with gr.Tabs():
289
- # We completely commented out the "Upload Audio" tab
290
- # with gr.Tab("Upload Audio"):
291
- # # Upload audio section
292
- # audio_upload = gr.File(label="Upload an Audio File", type="binary")
293
- # upload_transcribe_button = gr.Button("Transcribe Audio", elem_id="transcribe_button")
294
- # upload_transcription = gr.Textbox(label="Transcription", interactive=False)
295
- # upload_audio_query = gr.Textbox(label="Ask about the transcription (optional)")
296
- # upload_audio_output = gr.Textbox(label="Response", interactive=False)
297
- # upload_audio_button = gr.Button("Ask", elem_id="ask_button")
298
 
299
  with gr.Tab("Record Audio"):
300
- # Record audio section
301
  audio_recorder = gr.Audio(label="Record your voice", type="filepath")
302
  record_transcribe_button = gr.Button("Transcribe Recording", elem_id="transcribe_button")
303
  record_transcription = gr.Textbox(label="Transcription", interactive=False)
304
- # We removed the optional query field
305
  record_audio_output = gr.Textbox(label="Response", interactive=False)
306
  record_audio_button = gr.Button("Ask", elem_id="ask_button")
307
 
@@ -315,18 +313,19 @@ with gr.Blocks() as demo:
315
  image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
316
  pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
317
 
318
- # Voice Chat - Upload Audio tab actions (commented out)
319
- # upload_transcribe_button.click(
320
- # process_uploaded_audio,
321
- # inputs=[audio_upload],
322
- # outputs=[upload_transcription]
323
- # )
324
 
325
- # upload_audio_button.click(
326
- # process_voice_query,
327
- # inputs=[upload_transcription, upload_audio_query, temperature, top_p, max_output_tokens],
328
- # outputs=[upload_audio_output]
329
- # )
 
330
 
331
  # Voice Chat - Record Audio tab actions
332
  record_transcribe_button.click(
@@ -335,14 +334,14 @@ with gr.Blocks() as demo:
335
  outputs=[record_transcription]
336
  )
337
 
338
- # Modified to use new function that doesn't require the query parameter
339
  record_audio_button.click(
340
- process_voice_query_direct,
 
341
  inputs=[record_transcription, temperature, top_p, max_output_tokens],
342
  outputs=[record_audio_output]
343
  )
344
 
345
- # Clear button - modified to remove the record_audio_query reference
346
  clear_button.click(
347
  clear_chat,
348
  outputs=[
@@ -351,7 +350,7 @@ with gr.Blocks() as demo:
351
  image_text_query, image_output,
352
  pdf_upload, pdf_text_query, pdf_output,
353
  audio_upload, upload_transcription, upload_audio_query, upload_audio_output,
354
- audio_recorder, record_transcription, record_audio_output,
355
  temperature, top_p, max_output_tokens
356
  ]
357
  )
 
152
  except Exception as e:
153
  return f"Error transcribing recorded audio: {str(e)}"
154
 
155
+ # Function to process the voice chat queries
 
 
 
 
 
 
 
 
 
156
  def process_voice_query(transcription, query, temperature, top_p, max_output_tokens):
157
  if not transcription or transcription.startswith("Error") or transcription.startswith("Please"):
158
  return "Please ensure audio is transcribed successfully first."
 
195
  "", # upload_audio_output (textbox)
196
  None, # audio_recorder (audio)
197
  "", # record_transcription (textbox)
198
+ "", # record_audio_query (textbox)
199
  "", # record_audio_output (textbox)
200
  1.0, # temperature (slider)
201
  1.0, # top_p (slider)
 
232
  #clear_chat_button:hover {
233
  background: linear-gradient(135deg, #c53030 0%, #e53e3e 100%); /* Slightly darker red gradient on hover */
234
  }
235
+ #ask_button {
236
  background: linear-gradient(135deg, #fbd38d 0%, #f6e05e 100%); /* Yellow gradient */
237
  }
238
+ #ask_button:hover {
239
  background: linear-gradient(135deg, #ecc94b 0%, #fbd38d 100%); /* Slightly darker yellow gradient on hover */
240
  }
241
+ #transcribe_button {
242
+ background: linear-gradient(135deg, #68d391 0%, #48bb78 100%); /* Green gradient */
243
+ }
244
+
245
+ #transcribe_button:hover {
246
+ background: linear-gradient(135deg, #38a169 0%, #68d391 100%); /* Slightly darker green gradient on hover */
247
+ }
248
  </style>
249
  """)
250
 
 
285
 
286
  with gr.Tab("Voice Chat"):
287
  with gr.Tabs():
288
+ with gr.Tab("Upload Audio"):
289
+ # Upload audio section
290
+ audio_upload = gr.File(label="Upload an Audio File", type="binary")
291
+ upload_transcribe_button = gr.Button("Transcribe Audio", elem_id="transcribe_button")
292
+ upload_transcription = gr.Textbox(label="Transcription", interactive=False)
293
+ # upload_audio_query = gr.Textbox(label="Ask about the transcription (optional)")
294
+ upload_audio_output = gr.Textbox(label="Response", interactive=False)
295
+ upload_audio_button = gr.Button("Ask", elem_id="ask_button")
 
296
 
297
  with gr.Tab("Record Audio"):
298
+ # Record audio section - Fixed to use compatible parameters
299
  audio_recorder = gr.Audio(label="Record your voice", type="filepath")
300
  record_transcribe_button = gr.Button("Transcribe Recording", elem_id="transcribe_button")
301
  record_transcription = gr.Textbox(label="Transcription", interactive=False)
302
+ # record_audio_query = gr.Textbox(label="Ask about the transcription (optional)")
303
  record_audio_output = gr.Textbox(label="Response", interactive=False)
304
  record_audio_button = gr.Button("Ask", elem_id="ask_button")
305
 
 
313
  image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
314
  pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
315
 
316
+ # Voice Chat - Upload Audio tab actions
317
+ upload_transcribe_button.click(
318
+ process_uploaded_audio,
319
+ inputs=[audio_upload],
320
+ outputs=[upload_transcription]
321
+ )
322
 
323
+ upload_audio_button.click(
324
+ process_voice_query,
325
+ # inputs=[upload_transcription, upload_audio_query, temperature, top_p, max_output_tokens],
326
+ inputs=[upload_transcription, temperature, top_p, max_output_tokens],
327
+ outputs=[upload_audio_output]
328
+ )
329
 
330
  # Voice Chat - Record Audio tab actions
331
  record_transcribe_button.click(
 
334
  outputs=[record_transcription]
335
  )
336
 
 
337
  record_audio_button.click(
338
+ process_voice_query,
339
+ # inputs=[record_transcription, record_audio_query, temperature, top_p, max_output_tokens],
340
  inputs=[record_transcription, temperature, top_p, max_output_tokens],
341
  outputs=[record_audio_output]
342
  )
343
 
344
+ # Clear button resets all necessary fields
345
  clear_button.click(
346
  clear_chat,
347
  outputs=[
 
350
  image_text_query, image_output,
351
  pdf_upload, pdf_text_query, pdf_output,
352
  audio_upload, upload_transcription, upload_audio_query, upload_audio_output,
353
+ audio_recorder, record_transcription, record_audio_query, record_audio_output,
354
  temperature, top_p, max_output_tokens
355
  ]
356
  )