Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,8 @@ import base64
|
|
8 |
import json
|
9 |
import time
|
10 |
from dotenv import load_dotenv
|
|
|
|
|
11 |
|
12 |
# Load environment variables
|
13 |
load_dotenv()
|
@@ -363,97 +365,85 @@ def process_question(question, history):
|
|
363 |
history.append({"role": "assistant", "content": f"Error: {str(e)}"})
|
364 |
return "", history, "", None
|
365 |
|
366 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
367 |
if audio_file is None:
|
368 |
-
return "Please record or upload an audio file.", history, "", None
|
369 |
try:
|
370 |
transcript = rag.transcribe_audio(audio_file)
|
371 |
if not transcript or not str(transcript).strip():
|
372 |
history.append({"role": "user", "content": "π€ [No audio detected or transcription failed]"})
|
373 |
history.append({"role": "assistant", "content": "Sorry, I couldn't understand the audio. Please try again."})
|
374 |
-
return "", history, "", None
|
375 |
if not rag.thread_id:
|
376 |
-
return "Please upload a document first.", history, "", None
|
377 |
response = rag.ask_question(transcript)
|
378 |
history.append({"role": "user", "content": f"π€ {transcript}"})
|
379 |
history.append({"role": "assistant", "content": response})
|
380 |
-
|
|
|
381 |
except Exception as e:
|
382 |
history.append({"role": "user", "content": f"π€ [Error transcribing audio: {str(e)}]"})
|
383 |
history.append({"role": "assistant", "content": "It seems there was an error while transcribing audio due to a technical issue. If there's anything specific from the document or any other questions you have regarding the content, please let me know, and I can assist you with that information."})
|
384 |
-
return "", history, "", None
|
385 |
-
|
386 |
-
def process_audio_base64(audio_base64, history):
|
387 |
-
if not audio_base64:
|
388 |
-
return "Please record an audio message first.", history
|
389 |
-
try:
|
390 |
-
# Convert base64 to audio file
|
391 |
-
audio_data = base64.b64decode(audio_base64.split(',')[1])
|
392 |
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
|
393 |
-
tmp.write(audio_data)
|
394 |
-
tmp.flush()
|
395 |
-
tmp_path = tmp.name
|
396 |
-
|
397 |
-
# Transcribe audio
|
398 |
-
with open(tmp_path, "rb") as audio_file:
|
399 |
-
transcript = openai.audio.transcriptions.create(
|
400 |
-
model="whisper-1",
|
401 |
-
file=audio_file,
|
402 |
-
language="en"
|
403 |
-
)
|
404 |
-
os.remove(tmp_path)
|
405 |
-
|
406 |
-
if not rag.thread_id:
|
407 |
-
return "Please upload a document first.", history
|
408 |
-
|
409 |
-
# Get response from assistant
|
410 |
-
response = rag.ask_question(transcript.text)
|
411 |
-
history.append((f"π€ {transcript.text}", response))
|
412 |
-
return "", history
|
413 |
-
except Exception as e:
|
414 |
-
return "", history + [("Audio input", f"Error: {str(e)}")]
|
415 |
|
416 |
# Create Gradio interface with improved layout
|
417 |
with gr.Blocks(css=custom_css, title="Document Q&A System") as demo:
|
418 |
gr.Markdown("""
|
419 |
# <span style='color:#1976D2;'>Document Q&A System</span>
|
420 |
-
<div style='text-align:center; color:#1976D2; margin-bottom:18px;'>Upload a document,
|
421 |
""")
|
422 |
|
423 |
# Define shared components at the top
|
424 |
-
chatbot = gr.Chatbot(height=
|
425 |
file_output = gr.Textbox(label="Upload Status", interactive=False, elem_classes="textbox")
|
426 |
question = gr.Textbox(label="Type your question and press Enter", placeholder="Ask a question about your document...", elem_classes="textbox")
|
427 |
audio_input = gr.Audio(type="filepath", label="Record or Upload Audio", elem_classes="gradio-audio", visible=False)
|
428 |
audio_status = gr.Textbox(label="Audio Status", interactive=False, elem_classes="status-text", visible=False)
|
|
|
429 |
|
430 |
with gr.Row():
|
431 |
-
#
|
432 |
-
with gr.Column(scale=1, min_width=
|
433 |
with gr.Group(elem_classes="compact-box"):
|
434 |
-
gr.Markdown("<div class='section-title'>
|
435 |
file_input = gr.File(label="Upload Document", file_types=[".pdf", ".txt", ".doc", ".docx"], file_count="single", type="binary", elem_classes="upload-btn")
|
|
|
|
|
|
|
436 |
reset_btn = gr.Button("Reset Chat & Upload New Document", elem_classes="reset-btn")
|
|
|
437 |
file_input.change(process_file, file_input, file_output)
|
438 |
def reset_all():
|
439 |
rag.thread_id = None
|
440 |
-
return "", [], "", None
|
441 |
-
reset_btn.click(reset_all, None, [file_output, chatbot, question, audio_input])
|
442 |
-
|
443 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
444 |
with gr.Column(scale=3, min_width=400):
|
445 |
with gr.Group(elem_classes="compact-box"):
|
446 |
chatbot
|
447 |
with gr.Row():
|
448 |
question
|
449 |
send_btn = gr.Button("Send", elem_classes="send-btn")
|
450 |
-
mic_btn = gr.Button("π€", elem_classes="audio-btn")
|
451 |
send_btn.click(process_question, [question, chatbot], [question, chatbot, question, audio_input])
|
452 |
question.submit(process_question, [question, chatbot], [question, chatbot, question, audio_input])
|
453 |
-
audio_input.change(process_audio, [audio_input, chatbot], [audio_input, chatbot, question, audio_input])
|
454 |
-
def show_audio():
|
455 |
-
return {audio_input: gr.update(visible=True), audio_status: gr.update(visible=True)}
|
456 |
-
mic_btn.click(show_audio, None, [audio_input, audio_status])
|
457 |
|
458 |
# Add JavaScript for audio handling
|
459 |
demo.load(
|
|
|
8 |
import json
|
9 |
import time
|
10 |
from dotenv import load_dotenv
|
11 |
+
from gtts import gTTS
|
12 |
+
import io
|
13 |
|
14 |
# Load environment variables
|
15 |
load_dotenv()
|
|
|
365 |
history.append({"role": "assistant", "content": f"Error: {str(e)}"})
|
366 |
return "", history, "", None
|
367 |
|
368 |
+
def synthesize_text(text):
|
369 |
+
try:
|
370 |
+
tts = gTTS(text)
|
371 |
+
fp = io.BytesIO()
|
372 |
+
tts.write_to_fp(fp)
|
373 |
+
fp.seek(0)
|
374 |
+
return fp.read()
|
375 |
+
except Exception as e:
|
376 |
+
return None
|
377 |
+
|
378 |
+
def process_voice_note(audio_file, history):
|
379 |
if audio_file is None:
|
380 |
+
return "Please record or upload an audio file.", history, "", None, None
|
381 |
try:
|
382 |
transcript = rag.transcribe_audio(audio_file)
|
383 |
if not transcript or not str(transcript).strip():
|
384 |
history.append({"role": "user", "content": "π€ [No audio detected or transcription failed]"})
|
385 |
history.append({"role": "assistant", "content": "Sorry, I couldn't understand the audio. Please try again."})
|
386 |
+
return "", history, "", None, None
|
387 |
if not rag.thread_id:
|
388 |
+
return "Please upload a document first.", history, "", None, None
|
389 |
response = rag.ask_question(transcript)
|
390 |
history.append({"role": "user", "content": f"π€ {transcript}"})
|
391 |
history.append({"role": "assistant", "content": response})
|
392 |
+
tts_audio = synthesize_text(response)
|
393 |
+
return "", history, "", None, tts_audio
|
394 |
except Exception as e:
|
395 |
history.append({"role": "user", "content": f"π€ [Error transcribing audio: {str(e)}]"})
|
396 |
history.append({"role": "assistant", "content": "It seems there was an error while transcribing audio due to a technical issue. If there's anything specific from the document or any other questions you have regarding the content, please let me know, and I can assist you with that information."})
|
397 |
+
return "", history, "", None, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
398 |
|
399 |
# Create Gradio interface with improved layout
|
400 |
with gr.Blocks(css=custom_css, title="Document Q&A System") as demo:
|
401 |
gr.Markdown("""
|
402 |
# <span style='color:#1976D2;'>Document Q&A System</span>
|
403 |
+
<div style='text-align:center; color:#1976D2; margin-bottom:18px;'>Upload a document, record your voice, and chat!</div>
|
404 |
""")
|
405 |
|
406 |
# Define shared components at the top
|
407 |
+
chatbot = gr.Chatbot(height=400, elem_classes="gradio-chatbot", label=None, type="messages")
|
408 |
file_output = gr.Textbox(label="Upload Status", interactive=False, elem_classes="textbox")
|
409 |
question = gr.Textbox(label="Type your question and press Enter", placeholder="Ask a question about your document...", elem_classes="textbox")
|
410 |
audio_input = gr.Audio(type="filepath", label="Record or Upload Audio", elem_classes="gradio-audio", visible=False)
|
411 |
audio_status = gr.Textbox(label="Audio Status", interactive=False, elem_classes="status-text", visible=False)
|
412 |
+
tts_output = gr.Audio(label="Assistant Voice Reply", interactive=False, visible=False)
|
413 |
|
414 |
with gr.Row():
|
415 |
+
# Controls on the top left
|
416 |
+
with gr.Column(scale=1, min_width=320):
|
417 |
with gr.Group(elem_classes="compact-box"):
|
418 |
+
gr.Markdown("<div class='section-title'>Upload & Voice</div>")
|
419 |
file_input = gr.File(label="Upload Document", file_types=[".pdf", ".txt", ".doc", ".docx"], file_count="single", type="binary", elem_classes="upload-btn")
|
420 |
+
mic_btn = gr.Button("π€ Record Voice", elem_classes="audio-btn")
|
421 |
+
audio_input
|
422 |
+
send_voice_btn = gr.Button("Send Voice Note", elem_classes="send-btn", visible=False)
|
423 |
reset_btn = gr.Button("Reset Chat & Upload New Document", elem_classes="reset-btn")
|
424 |
+
file_output
|
425 |
file_input.change(process_file, file_input, file_output)
|
426 |
def reset_all():
|
427 |
rag.thread_id = None
|
428 |
+
return "", [], "", None, None
|
429 |
+
reset_btn.click(reset_all, None, [file_output, chatbot, question, audio_input, tts_output])
|
430 |
+
def show_audio():
|
431 |
+
return {audio_input: gr.update(visible=True), send_voice_btn: gr.update(visible=True)}
|
432 |
+
mic_btn.click(show_audio, None, [audio_input, send_voice_btn])
|
433 |
+
def hide_audio():
|
434 |
+
return {audio_input: gr.update(visible=False), send_voice_btn: gr.update(visible=False)}
|
435 |
+
send_voice_btn.click(process_voice_note, [audio_input, chatbot], [file_output, chatbot, question, audio_input, tts_output])
|
436 |
+
send_voice_btn.click(hide_audio, None, [audio_input, send_voice_btn])
|
437 |
+
tts_output
|
438 |
+
# Chatbot/chat area to the right
|
439 |
with gr.Column(scale=3, min_width=400):
|
440 |
with gr.Group(elem_classes="compact-box"):
|
441 |
chatbot
|
442 |
with gr.Row():
|
443 |
question
|
444 |
send_btn = gr.Button("Send", elem_classes="send-btn")
|
|
|
445 |
send_btn.click(process_question, [question, chatbot], [question, chatbot, question, audio_input])
|
446 |
question.submit(process_question, [question, chatbot], [question, chatbot, question, audio_input])
|
|
|
|
|
|
|
|
|
447 |
|
448 |
# Add JavaScript for audio handling
|
449 |
demo.load(
|