DreamStream-1 commited on
Commit
4214099
Β·
verified Β·
1 Parent(s): 4d8736d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -50
app.py CHANGED
@@ -8,6 +8,8 @@ import base64
8
  import json
9
  import time
10
  from dotenv import load_dotenv
 
 
11
 
12
  # Load environment variables
13
  load_dotenv()
@@ -363,97 +365,85 @@ def process_question(question, history):
363
  history.append({"role": "assistant", "content": f"Error: {str(e)}"})
364
  return "", history, "", None
365
 
366
- def process_audio(audio_file, history):
 
 
 
 
 
 
 
 
 
 
367
  if audio_file is None:
368
- return "Please record or upload an audio file.", history, "", None
369
  try:
370
  transcript = rag.transcribe_audio(audio_file)
371
  if not transcript or not str(transcript).strip():
372
  history.append({"role": "user", "content": "🎀 [No audio detected or transcription failed]"})
373
  history.append({"role": "assistant", "content": "Sorry, I couldn't understand the audio. Please try again."})
374
- return "", history, "", None
375
  if not rag.thread_id:
376
- return "Please upload a document first.", history, "", None
377
  response = rag.ask_question(transcript)
378
  history.append({"role": "user", "content": f"🎀 {transcript}"})
379
  history.append({"role": "assistant", "content": response})
380
- return "", history, "", None
 
381
  except Exception as e:
382
  history.append({"role": "user", "content": f"🎀 [Error transcribing audio: {str(e)}]"})
383
  history.append({"role": "assistant", "content": "It seems there was an error while transcribing audio due to a technical issue. If there's anything specific from the document or any other questions you have regarding the content, please let me know, and I can assist you with that information."})
384
- return "", history, "", None
385
-
386
- def process_audio_base64(audio_base64, history):
387
- if not audio_base64:
388
- return "Please record an audio message first.", history
389
- try:
390
- # Convert base64 to audio file
391
- audio_data = base64.b64decode(audio_base64.split(',')[1])
392
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
393
- tmp.write(audio_data)
394
- tmp.flush()
395
- tmp_path = tmp.name
396
-
397
- # Transcribe audio
398
- with open(tmp_path, "rb") as audio_file:
399
- transcript = openai.audio.transcriptions.create(
400
- model="whisper-1",
401
- file=audio_file,
402
- language="en"
403
- )
404
- os.remove(tmp_path)
405
-
406
- if not rag.thread_id:
407
- return "Please upload a document first.", history
408
-
409
- # Get response from assistant
410
- response = rag.ask_question(transcript.text)
411
- history.append((f"🎀 {transcript.text}", response))
412
- return "", history
413
- except Exception as e:
414
- return "", history + [("Audio input", f"Error: {str(e)}")]
415
 
416
  # Create Gradio interface with improved layout
417
  with gr.Blocks(css=custom_css, title="Document Q&A System") as demo:
418
  gr.Markdown("""
419
  # <span style='color:#1976D2;'>Document Q&A System</span>
420
- <div style='text-align:center; color:#1976D2; margin-bottom:18px;'>Upload a document, ask questions, or use your voice!</div>
421
  """)
422
 
423
  # Define shared components at the top
424
- chatbot = gr.Chatbot(height=350, elem_classes="gradio-chatbot", label=None, type="messages")
425
  file_output = gr.Textbox(label="Upload Status", interactive=False, elem_classes="textbox")
426
  question = gr.Textbox(label="Type your question and press Enter", placeholder="Ask a question about your document...", elem_classes="textbox")
427
  audio_input = gr.Audio(type="filepath", label="Record or Upload Audio", elem_classes="gradio-audio", visible=False)
428
  audio_status = gr.Textbox(label="Audio Status", interactive=False, elem_classes="status-text", visible=False)
 
429
 
430
  with gr.Row():
431
- # Sidebar on the left
432
- with gr.Column(scale=1, min_width=280):
433
  with gr.Group(elem_classes="compact-box"):
434
- gr.Markdown("<div class='section-title'>Document Q&A</div>")
435
  file_input = gr.File(label="Upload Document", file_types=[".pdf", ".txt", ".doc", ".docx"], file_count="single", type="binary", elem_classes="upload-btn")
 
 
 
436
  reset_btn = gr.Button("Reset Chat & Upload New Document", elem_classes="reset-btn")
 
437
  file_input.change(process_file, file_input, file_output)
438
  def reset_all():
439
  rag.thread_id = None
440
- return "", [], "", None
441
- reset_btn.click(reset_all, None, [file_output, chatbot, question, audio_input])
442
- file_output
443
- # Main chat area
 
 
 
 
 
 
 
444
  with gr.Column(scale=3, min_width=400):
445
  with gr.Group(elem_classes="compact-box"):
446
  chatbot
447
  with gr.Row():
448
  question
449
  send_btn = gr.Button("Send", elem_classes="send-btn")
450
- mic_btn = gr.Button("🎀", elem_classes="audio-btn")
451
  send_btn.click(process_question, [question, chatbot], [question, chatbot, question, audio_input])
452
  question.submit(process_question, [question, chatbot], [question, chatbot, question, audio_input])
453
- audio_input.change(process_audio, [audio_input, chatbot], [audio_input, chatbot, question, audio_input])
454
- def show_audio():
455
- return {audio_input: gr.update(visible=True), audio_status: gr.update(visible=True)}
456
- mic_btn.click(show_audio, None, [audio_input, audio_status])
457
 
458
  # Add JavaScript for audio handling
459
  demo.load(
 
8
  import json
9
  import time
10
  from dotenv import load_dotenv
11
+ from gtts import gTTS
12
+ import io
13
 
14
  # Load environment variables
15
  load_dotenv()
 
365
  history.append({"role": "assistant", "content": f"Error: {str(e)}"})
366
  return "", history, "", None
367
 
368
+ def synthesize_text(text):
369
+ try:
370
+ tts = gTTS(text)
371
+ fp = io.BytesIO()
372
+ tts.write_to_fp(fp)
373
+ fp.seek(0)
374
+ return fp.read()
375
+ except Exception as e:
376
+ return None
377
+
378
+ def process_voice_note(audio_file, history):
379
  if audio_file is None:
380
+ return "Please record or upload an audio file.", history, "", None, None
381
  try:
382
  transcript = rag.transcribe_audio(audio_file)
383
  if not transcript or not str(transcript).strip():
384
  history.append({"role": "user", "content": "🎀 [No audio detected or transcription failed]"})
385
  history.append({"role": "assistant", "content": "Sorry, I couldn't understand the audio. Please try again."})
386
+ return "", history, "", None, None
387
  if not rag.thread_id:
388
+ return "Please upload a document first.", history, "", None, None
389
  response = rag.ask_question(transcript)
390
  history.append({"role": "user", "content": f"🎀 {transcript}"})
391
  history.append({"role": "assistant", "content": response})
392
+ tts_audio = synthesize_text(response)
393
+ return "", history, "", None, tts_audio
394
  except Exception as e:
395
  history.append({"role": "user", "content": f"🎀 [Error transcribing audio: {str(e)}]"})
396
  history.append({"role": "assistant", "content": "It seems there was an error while transcribing audio due to a technical issue. If there's anything specific from the document or any other questions you have regarding the content, please let me know, and I can assist you with that information."})
397
+ return "", history, "", None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
398
 
399
  # Create Gradio interface with improved layout
400
  with gr.Blocks(css=custom_css, title="Document Q&A System") as demo:
401
  gr.Markdown("""
402
  # <span style='color:#1976D2;'>Document Q&A System</span>
403
+ <div style='text-align:center; color:#1976D2; margin-bottom:18px;'>Upload a document, record your voice, and chat!</div>
404
  """)
405
 
406
  # Define shared components at the top
407
+ chatbot = gr.Chatbot(height=400, elem_classes="gradio-chatbot", label=None, type="messages")
408
  file_output = gr.Textbox(label="Upload Status", interactive=False, elem_classes="textbox")
409
  question = gr.Textbox(label="Type your question and press Enter", placeholder="Ask a question about your document...", elem_classes="textbox")
410
  audio_input = gr.Audio(type="filepath", label="Record or Upload Audio", elem_classes="gradio-audio", visible=False)
411
  audio_status = gr.Textbox(label="Audio Status", interactive=False, elem_classes="status-text", visible=False)
412
+ tts_output = gr.Audio(label="Assistant Voice Reply", interactive=False, visible=False)
413
 
414
  with gr.Row():
415
+ # Controls on the top left
416
+ with gr.Column(scale=1, min_width=320):
417
  with gr.Group(elem_classes="compact-box"):
418
+ gr.Markdown("<div class='section-title'>Upload & Voice</div>")
419
  file_input = gr.File(label="Upload Document", file_types=[".pdf", ".txt", ".doc", ".docx"], file_count="single", type="binary", elem_classes="upload-btn")
420
+ mic_btn = gr.Button("🎀 Record Voice", elem_classes="audio-btn")
421
+ audio_input
422
+ send_voice_btn = gr.Button("Send Voice Note", elem_classes="send-btn", visible=False)
423
  reset_btn = gr.Button("Reset Chat & Upload New Document", elem_classes="reset-btn")
424
+ file_output
425
  file_input.change(process_file, file_input, file_output)
426
  def reset_all():
427
  rag.thread_id = None
428
+ return "", [], "", None, None
429
+ reset_btn.click(reset_all, None, [file_output, chatbot, question, audio_input, tts_output])
430
+ def show_audio():
431
+ return {audio_input: gr.update(visible=True), send_voice_btn: gr.update(visible=True)}
432
+ mic_btn.click(show_audio, None, [audio_input, send_voice_btn])
433
+ def hide_audio():
434
+ return {audio_input: gr.update(visible=False), send_voice_btn: gr.update(visible=False)}
435
+ send_voice_btn.click(process_voice_note, [audio_input, chatbot], [file_output, chatbot, question, audio_input, tts_output])
436
+ send_voice_btn.click(hide_audio, None, [audio_input, send_voice_btn])
437
+ tts_output
438
+ # Chatbot/chat area to the right
439
  with gr.Column(scale=3, min_width=400):
440
  with gr.Group(elem_classes="compact-box"):
441
  chatbot
442
  with gr.Row():
443
  question
444
  send_btn = gr.Button("Send", elem_classes="send-btn")
 
445
  send_btn.click(process_question, [question, chatbot], [question, chatbot, question, audio_input])
446
  question.submit(process_question, [question, chatbot], [question, chatbot, question, audio_input])
 
 
 
 
447
 
448
  # Add JavaScript for audio handling
449
  demo.load(