Update app.py
Browse files
app.py
CHANGED
@@ -152,7 +152,16 @@ def process_recorded_audio(audio_path):
|
|
152 |
except Exception as e:
|
153 |
return f"Error transcribing recorded audio: {str(e)}"
|
154 |
|
155 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
def process_voice_query(transcription, query, temperature, top_p, max_output_tokens):
|
157 |
if not transcription or transcription.startswith("Error") or transcription.startswith("Please"):
|
158 |
return "Please ensure audio is transcribed successfully first."
|
@@ -195,7 +204,6 @@ def clear_chat():
|
|
195 |
"", # upload_audio_output (textbox)
|
196 |
None, # audio_recorder (audio)
|
197 |
"", # record_transcription (textbox)
|
198 |
-
"", # record_audio_query (textbox)
|
199 |
"", # record_audio_output (textbox)
|
200 |
1.0, # temperature (slider)
|
201 |
1.0, # top_p (slider)
|
@@ -278,6 +286,7 @@ with gr.Blocks() as demo:
|
|
278 |
|
279 |
with gr.Tab("Voice Chat"):
|
280 |
with gr.Tabs():
|
|
|
281 |
# with gr.Tab("Upload Audio"):
|
282 |
# # Upload audio section
|
283 |
# audio_upload = gr.File(label="Upload an Audio File", type="binary")
|
@@ -288,11 +297,11 @@ with gr.Blocks() as demo:
|
|
288 |
# upload_audio_button = gr.Button("Ask", elem_id="ask_button")
|
289 |
|
290 |
with gr.Tab("Record Audio"):
|
291 |
-
# Record audio section
|
292 |
audio_recorder = gr.Audio(label="Record your voice", type="filepath")
|
293 |
record_transcribe_button = gr.Button("Transcribe Recording", elem_id="transcribe_button")
|
294 |
record_transcription = gr.Textbox(label="Transcription", interactive=False)
|
295 |
-
#
|
296 |
record_audio_output = gr.Textbox(label="Response", interactive=False)
|
297 |
record_audio_button = gr.Button("Ask", elem_id="ask_button")
|
298 |
|
@@ -306,7 +315,7 @@ with gr.Blocks() as demo:
|
|
306 |
image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
|
307 |
pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
|
308 |
|
309 |
-
# Voice Chat - Upload Audio tab actions
|
310 |
# upload_transcribe_button.click(
|
311 |
# process_uploaded_audio,
|
312 |
# inputs=[audio_upload],
|
@@ -326,14 +335,14 @@ with gr.Blocks() as demo:
|
|
326 |
outputs=[record_transcription]
|
327 |
)
|
328 |
|
|
|
329 |
record_audio_button.click(
|
330 |
-
|
331 |
-
|
332 |
-
inputs=[record_transcription, "", temperature, top_p, max_output_tokens],
|
333 |
outputs=[record_audio_output]
|
334 |
)
|
335 |
|
336 |
-
# Clear button
|
337 |
clear_button.click(
|
338 |
clear_chat,
|
339 |
outputs=[
|
@@ -342,7 +351,7 @@ with gr.Blocks() as demo:
|
|
342 |
image_text_query, image_output,
|
343 |
pdf_upload, pdf_text_query, pdf_output,
|
344 |
audio_upload, upload_transcription, upload_audio_query, upload_audio_output,
|
345 |
-
audio_recorder, record_transcription,
|
346 |
temperature, top_p, max_output_tokens
|
347 |
]
|
348 |
)
|
|
|
152 |
except Exception as e:
|
153 |
return f"Error transcribing recorded audio: {str(e)}"
|
154 |
|
155 |
+
# Modified to work without the query parameter
|
156 |
+
def process_voice_query_direct(transcription, temperature, top_p, max_output_tokens):
|
157 |
+
if not transcription or transcription.startswith("Error") or transcription.startswith("Please"):
|
158 |
+
return "Please ensure audio is transcribed successfully first."
|
159 |
+
|
160 |
+
# Use the transcription directly as the query
|
161 |
+
messages = [{"role": "user", "content": [{"type": "text", "text": transcription}]}]
|
162 |
+
return query_openai(messages, temperature, top_p, max_output_tokens)
|
163 |
+
|
164 |
+
# Function to process the voice chat queries (kept for compatibility)
|
165 |
def process_voice_query(transcription, query, temperature, top_p, max_output_tokens):
|
166 |
if not transcription or transcription.startswith("Error") or transcription.startswith("Please"):
|
167 |
return "Please ensure audio is transcribed successfully first."
|
|
|
204 |
"", # upload_audio_output (textbox)
|
205 |
None, # audio_recorder (audio)
|
206 |
"", # record_transcription (textbox)
|
|
|
207 |
"", # record_audio_output (textbox)
|
208 |
1.0, # temperature (slider)
|
209 |
1.0, # top_p (slider)
|
|
|
286 |
|
287 |
with gr.Tab("Voice Chat"):
|
288 |
with gr.Tabs():
|
289 |
+
# We completely commented out the "Upload Audio" tab
|
290 |
# with gr.Tab("Upload Audio"):
|
291 |
# # Upload audio section
|
292 |
# audio_upload = gr.File(label="Upload an Audio File", type="binary")
|
|
|
297 |
# upload_audio_button = gr.Button("Ask", elem_id="ask_button")
|
298 |
|
299 |
with gr.Tab("Record Audio"):
|
300 |
+
# Record audio section
|
301 |
audio_recorder = gr.Audio(label="Record your voice", type="filepath")
|
302 |
record_transcribe_button = gr.Button("Transcribe Recording", elem_id="transcribe_button")
|
303 |
record_transcription = gr.Textbox(label="Transcription", interactive=False)
|
304 |
+
# We removed the optional query field
|
305 |
record_audio_output = gr.Textbox(label="Response", interactive=False)
|
306 |
record_audio_button = gr.Button("Ask", elem_id="ask_button")
|
307 |
|
|
|
315 |
image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
|
316 |
pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
|
317 |
|
318 |
+
# Voice Chat - Upload Audio tab actions (commented out)
|
319 |
# upload_transcribe_button.click(
|
320 |
# process_uploaded_audio,
|
321 |
# inputs=[audio_upload],
|
|
|
335 |
outputs=[record_transcription]
|
336 |
)
|
337 |
|
338 |
+
# Modified to use new function that doesn't require the query parameter
|
339 |
record_audio_button.click(
|
340 |
+
process_voice_query_direct,
|
341 |
+
inputs=[record_transcription, temperature, top_p, max_output_tokens],
|
|
|
342 |
outputs=[record_audio_output]
|
343 |
)
|
344 |
|
345 |
+
# Clear button - modified to remove the record_audio_query reference
|
346 |
clear_button.click(
|
347 |
clear_chat,
|
348 |
outputs=[
|
|
|
351 |
image_text_query, image_output,
|
352 |
pdf_upload, pdf_text_query, pdf_output,
|
353 |
audio_upload, upload_transcription, upload_audio_query, upload_audio_output,
|
354 |
+
audio_recorder, record_transcription, record_audio_output,
|
355 |
temperature, top_p, max_output_tokens
|
356 |
]
|
357 |
)
|