Update app.py
Browse files
app.py
CHANGED
@@ -152,16 +152,7 @@ def process_recorded_audio(audio_path):
|
|
152 |
except Exception as e:
|
153 |
return f"Error transcribing recorded audio: {str(e)}"
|
154 |
|
155 |
-
#
|
156 |
-
def process_voice_query_direct(transcription, temperature, top_p, max_output_tokens):
|
157 |
-
if not transcription or transcription.startswith("Error") or transcription.startswith("Please"):
|
158 |
-
return "Please ensure audio is transcribed successfully first."
|
159 |
-
|
160 |
-
# Use the transcription directly as the query
|
161 |
-
messages = [{"role": "user", "content": [{"type": "text", "text": transcription}]}]
|
162 |
-
return query_openai(messages, temperature, top_p, max_output_tokens)
|
163 |
-
|
164 |
-
# Function to process the voice chat queries (kept for compatibility)
|
165 |
def process_voice_query(transcription, query, temperature, top_p, max_output_tokens):
|
166 |
if not transcription or transcription.startswith("Error") or transcription.startswith("Please"):
|
167 |
return "Please ensure audio is transcribed successfully first."
|
@@ -204,6 +195,7 @@ def clear_chat():
|
|
204 |
"", # upload_audio_output (textbox)
|
205 |
None, # audio_recorder (audio)
|
206 |
"", # record_transcription (textbox)
|
|
|
207 |
"", # record_audio_output (textbox)
|
208 |
1.0, # temperature (slider)
|
209 |
1.0, # top_p (slider)
|
@@ -240,12 +232,19 @@ with gr.Blocks() as demo:
|
|
240 |
#clear_chat_button:hover {
|
241 |
background: linear-gradient(135deg, #c53030 0%, #e53e3e 100%); /* Slightly darker red gradient on hover */
|
242 |
}
|
243 |
-
#ask_button
|
244 |
background: linear-gradient(135deg, #fbd38d 0%, #f6e05e 100%); /* Yellow gradient */
|
245 |
}
|
246 |
-
#ask_button:hover
|
247 |
background: linear-gradient(135deg, #ecc94b 0%, #fbd38d 100%); /* Slightly darker yellow gradient on hover */
|
248 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
</style>
|
250 |
""")
|
251 |
|
@@ -286,22 +285,21 @@ with gr.Blocks() as demo:
|
|
286 |
|
287 |
with gr.Tab("Voice Chat"):
|
288 |
with gr.Tabs():
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
# upload_audio_button = gr.Button("Ask", elem_id="ask_button")
|
298 |
|
299 |
with gr.Tab("Record Audio"):
|
300 |
-
# Record audio section
|
301 |
audio_recorder = gr.Audio(label="Record your voice", type="filepath")
|
302 |
record_transcribe_button = gr.Button("Transcribe Recording", elem_id="transcribe_button")
|
303 |
record_transcription = gr.Textbox(label="Transcription", interactive=False)
|
304 |
-
#
|
305 |
record_audio_output = gr.Textbox(label="Response", interactive=False)
|
306 |
record_audio_button = gr.Button("Ask", elem_id="ask_button")
|
307 |
|
@@ -315,18 +313,19 @@ with gr.Blocks() as demo:
|
|
315 |
image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
|
316 |
pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
|
317 |
|
318 |
-
# Voice Chat - Upload Audio tab actions
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
|
|
330 |
|
331 |
# Voice Chat - Record Audio tab actions
|
332 |
record_transcribe_button.click(
|
@@ -335,14 +334,14 @@ with gr.Blocks() as demo:
|
|
335 |
outputs=[record_transcription]
|
336 |
)
|
337 |
|
338 |
-
# Modified to use new function that doesn't require the query parameter
|
339 |
record_audio_button.click(
|
340 |
-
|
|
|
341 |
inputs=[record_transcription, temperature, top_p, max_output_tokens],
|
342 |
outputs=[record_audio_output]
|
343 |
)
|
344 |
|
345 |
-
# Clear button
|
346 |
clear_button.click(
|
347 |
clear_chat,
|
348 |
outputs=[
|
@@ -351,7 +350,7 @@ with gr.Blocks() as demo:
|
|
351 |
image_text_query, image_output,
|
352 |
pdf_upload, pdf_text_query, pdf_output,
|
353 |
audio_upload, upload_transcription, upload_audio_query, upload_audio_output,
|
354 |
-
audio_recorder, record_transcription, record_audio_output,
|
355 |
temperature, top_p, max_output_tokens
|
356 |
]
|
357 |
)
|
|
|
152 |
except Exception as e:
|
153 |
return f"Error transcribing recorded audio: {str(e)}"
|
154 |
|
155 |
+
# Function to process the voice chat queries
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
def process_voice_query(transcription, query, temperature, top_p, max_output_tokens):
|
157 |
if not transcription or transcription.startswith("Error") or transcription.startswith("Please"):
|
158 |
return "Please ensure audio is transcribed successfully first."
|
|
|
195 |
"", # upload_audio_output (textbox)
|
196 |
None, # audio_recorder (audio)
|
197 |
"", # record_transcription (textbox)
|
198 |
+
"", # record_audio_query (textbox)
|
199 |
"", # record_audio_output (textbox)
|
200 |
1.0, # temperature (slider)
|
201 |
1.0, # top_p (slider)
|
|
|
232 |
#clear_chat_button:hover {
|
233 |
background: linear-gradient(135deg, #c53030 0%, #e53e3e 100%); /* Slightly darker red gradient on hover */
|
234 |
}
|
235 |
+
#ask_button {
|
236 |
background: linear-gradient(135deg, #fbd38d 0%, #f6e05e 100%); /* Yellow gradient */
|
237 |
}
|
238 |
+
#ask_button:hover {
|
239 |
background: linear-gradient(135deg, #ecc94b 0%, #fbd38d 100%); /* Slightly darker yellow gradient on hover */
|
240 |
}
|
241 |
+
#transcribe_button {
|
242 |
+
background: linear-gradient(135deg, #68d391 0%, #48bb78 100%); /* Green gradient */
|
243 |
+
}
|
244 |
+
|
245 |
+
#transcribe_button:hover {
|
246 |
+
background: linear-gradient(135deg, #38a169 0%, #68d391 100%); /* Slightly darker green gradient on hover */
|
247 |
+
}
|
248 |
</style>
|
249 |
""")
|
250 |
|
|
|
285 |
|
286 |
with gr.Tab("Voice Chat"):
|
287 |
with gr.Tabs():
|
288 |
+
with gr.Tab("Upload Audio"):
|
289 |
+
# Upload audio section
|
290 |
+
audio_upload = gr.File(label="Upload an Audio File", type="binary")
|
291 |
+
upload_transcribe_button = gr.Button("Transcribe Audio", elem_id="transcribe_button")
|
292 |
+
upload_transcription = gr.Textbox(label="Transcription", interactive=False)
|
293 |
+
# upload_audio_query = gr.Textbox(label="Ask about the transcription (optional)")
|
294 |
+
upload_audio_output = gr.Textbox(label="Response", interactive=False)
|
295 |
+
upload_audio_button = gr.Button("Ask", elem_id="ask_button")
|
|
|
296 |
|
297 |
with gr.Tab("Record Audio"):
|
298 |
+
# Record audio section - Fixed to use compatible parameters
|
299 |
audio_recorder = gr.Audio(label="Record your voice", type="filepath")
|
300 |
record_transcribe_button = gr.Button("Transcribe Recording", elem_id="transcribe_button")
|
301 |
record_transcription = gr.Textbox(label="Transcription", interactive=False)
|
302 |
+
# record_audio_query = gr.Textbox(label="Ask about the transcription (optional)")
|
303 |
record_audio_output = gr.Textbox(label="Response", interactive=False)
|
304 |
record_audio_button = gr.Button("Ask", elem_id="ask_button")
|
305 |
|
|
|
313 |
image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
|
314 |
pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
|
315 |
|
316 |
+
# Voice Chat - Upload Audio tab actions
|
317 |
+
upload_transcribe_button.click(
|
318 |
+
process_uploaded_audio,
|
319 |
+
inputs=[audio_upload],
|
320 |
+
outputs=[upload_transcription]
|
321 |
+
)
|
322 |
|
323 |
+
upload_audio_button.click(
|
324 |
+
process_voice_query,
|
325 |
+
# inputs=[upload_transcription, upload_audio_query, temperature, top_p, max_output_tokens],
|
326 |
+
inputs=[upload_transcription, temperature, top_p, max_output_tokens],
|
327 |
+
outputs=[upload_audio_output]
|
328 |
+
)
|
329 |
|
330 |
# Voice Chat - Record Audio tab actions
|
331 |
record_transcribe_button.click(
|
|
|
334 |
outputs=[record_transcription]
|
335 |
)
|
336 |
|
|
|
337 |
record_audio_button.click(
|
338 |
+
process_voice_query,
|
339 |
+
# inputs=[record_transcription, record_audio_query, temperature, top_p, max_output_tokens],
|
340 |
inputs=[record_transcription, temperature, top_p, max_output_tokens],
|
341 |
outputs=[record_audio_output]
|
342 |
)
|
343 |
|
344 |
+
# Clear button resets all necessary fields
|
345 |
clear_button.click(
|
346 |
clear_chat,
|
347 |
outputs=[
|
|
|
350 |
image_text_query, image_output,
|
351 |
pdf_upload, pdf_text_query, pdf_output,
|
352 |
audio_upload, upload_transcription, upload_audio_query, upload_audio_output,
|
353 |
+
audio_recorder, record_transcription, record_audio_query, record_audio_output,
|
354 |
temperature, top_p, max_output_tokens
|
355 |
]
|
356 |
)
|