Shreyas094 commited on
Commit
d0129e6
·
verified ·
1 Parent(s): 95ac269

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -73
app.py CHANGED
@@ -401,45 +401,50 @@ def summarize_web_results(query: str, search_results: List[Dict[str, str]], conv
401
  return f"An error occurred during summarization: {str(e)}"
402
 
403
  # Modify the existing respond function to handle both PDF and web search
404
- def respond(message, history, model, temperature, num_calls, use_web_search, selected_docs):
405
- logging.info(f"User Query: {message}")
406
- logging.info(f"Model Used: {model}")
407
- logging.info(f"Selected Documents: {selected_docs}")
408
- logging.info(f"Use Web Search: {use_web_search}")
 
409
 
410
  response = ""
411
 
412
- if use_web_search:
413
- original_query = message
414
- rephrased_query = rephrase_query(message, conversation_manager)
415
- logging.info(f"Original query: {original_query}")
416
- logging.info(f"Rephrased query: {rephrased_query}")
417
-
418
- final_summary = ""
419
- for _ in range(num_calls):
420
- search_results = get_web_search_results(rephrased_query)
421
- if not search_results:
422
- final_summary += f"No search results found for the query: {rephrased_query}\n\n"
423
- elif "error" in search_results[0]:
424
- final_summary += search_results[0]["error"] + "\n\n"
 
 
 
 
 
 
 
 
 
 
425
  else:
426
- summary = summarize_web_results(rephrased_query, search_results, conversation_manager)
427
- final_summary += summary + "\n\n"
428
 
429
- if final_summary:
430
- conversation_manager.add_interaction(original_query, final_summary)
431
- response = final_summary
432
  else:
433
- response = "Unable to generate a response. Please try a different query."
434
-
435
- else:
436
- # Existing PDF search logic
437
- try:
438
  embed = get_embeddings()
439
  if os.path.exists("faiss_database"):
440
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
441
  retriever = database.as_retriever(search_kwargs={"k": 20})
442
 
 
443
  all_relevant_docs = retriever.get_relevant_documents(message)
444
  relevant_docs = [doc for doc in all_relevant_docs if doc.metadata["source"] in selected_docs]
445
 
@@ -447,8 +452,9 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
447
  response = "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
448
  else:
449
  context_str = "\n".join([doc.page_content for doc in relevant_docs])
450
- logging.info(f"Context length: {len(context_str)}")
451
 
 
452
  if model.startswith("duckduckgo/"):
453
  # Use DuckDuckGo chat with context
454
  for partial_response in get_response_from_duckduckgo(message, model, context_str, num_calls, temperature):
@@ -464,20 +470,12 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
464
  else:
465
  response = "No documents available. Please upload PDF documents to answer questions."
466
 
467
- except Exception as e:
468
- logging.error(f"Error with {model}: {str(e)}")
469
- if "microsoft/Phi-3-mini-4k-instruct" in model:
470
- logging.info("Falling back to Mistral model due to Phi-3 error")
471
- fallback_model = "mistralai/Mistral-7B-Instruct-v0.3"
472
- return respond(message, history, fallback_model, temperature, num_calls, use_web_search, selected_docs)
473
- else:
474
- response = f"An error occurred with the {model} model: {str(e)}. Please try again or select a different model."
475
 
476
- # Update the conversation history
477
- history.append((message, response))
478
-
479
- # Yield the updated history
480
- yield history
481
 
482
  logging.basicConfig(level=logging.DEBUG)
483
 
@@ -617,21 +615,34 @@ Write a detailed and complete response that answers the following user question:
617
 
618
  logging.info("Finished generating response")
619
 
620
- async def transcribe_async(audio_file):
 
621
  if audio_file is None:
622
- return "", "No audio file provided"
 
623
 
624
  try:
625
- def process_audio():
626
- with open(audio_file, "rb") as f:
627
- audio_data = f.read()
628
- return whisper_api(audio_data)["text"]
 
 
 
 
629
 
630
- loop = asyncio.get_event_loop()
631
- response = await loop.run_in_executor(executor, process_audio)
632
- return response, "Transcription completed successfully"
 
 
 
 
 
633
  except Exception as e:
634
- return "", f"Error during transcription: {str(e)}"
 
 
635
 
636
  def vote(data: gr.LikeData):
637
  if data.liked:
@@ -685,20 +696,19 @@ custom_placeholder = "Ask a question (Note: You can toggle between Web Search an
685
  def update_textbox(transcription):
686
  return gr.Textbox.update(value=transcription)
687
 
688
- # Update the Gradio interface
689
  with gr.Blocks() as demo:
690
  gr.Markdown("# AI-powered PDF Chat and Web Search Assistant with Speech Input")
691
 
692
  with gr.Row():
693
  with gr.Column(scale=1):
694
- audio_input = gr.Audio(sources="microphone", type="filepath", label="Speak your query")
695
  transcribe_button = gr.Button("Transcribe")
696
 
697
  with gr.Column(scale=2):
698
  chatbot = gr.Chatbot(
699
  show_copy_button=True,
700
- likeable=True,
701
- layout="bubble",
702
  height=400,
703
  value=initial_conversation()
704
  )
@@ -716,7 +726,6 @@ with gr.Blocks() as demo:
716
  use_web_search = gr.Checkbox(label="Use Web Search", value=True)
717
  document_selector = gr.CheckboxGroup(label="Select documents to query")
718
 
719
- # Add file upload functionality
720
  gr.Markdown("## Upload and Manage PDF Documents")
721
  with gr.Row():
722
  file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
@@ -727,9 +736,9 @@ with gr.Blocks() as demo:
727
  update_output = gr.Textbox(label="Update Status")
728
  delete_button = gr.Button("Delete Selected Documents")
729
 
730
- # Update the Gradio interface to use the async function
731
  transcribe_button.click(
732
- transcribe_async,
733
  inputs=[audio_input],
734
  outputs=[query_textbox]
735
  )
@@ -758,18 +767,5 @@ with gr.Blocks() as demo:
758
  outputs=[update_output, document_selector]
759
  )
760
 
761
- gr.Markdown(
762
- """
763
- ## How to use
764
- 1. Use the microphone to speak your query, then click "Transcribe", or type directly in the text box.
765
- 2. Click "Submit" to get a response from the AI.
766
- 3. Upload PDF documents using the file input at the bottom.
767
- 4. Select the PDF parser (pypdf or llamaparse) and click "Upload Document" to update the vector store.
768
- 5. Select the documents you want to query using the checkboxes.
769
- 6. Toggle "Use Web Search" to switch between PDF chat and web search.
770
- 7. Adjust Temperature and Number of API Calls to fine-tune the response generation.
771
- """
772
- )
773
-
774
  if __name__ == "__main__":
775
- demo.launch(share=True)
 
401
  return f"An error occurred during summarization: {str(e)}"
402
 
403
  # Modify the existing respond function to handle both PDF and web search
404
+ # Modify your existing respond function to handle both PDF and web search
405
+ async def respond(message, history, model, temperature, num_calls, use_web_search, selected_docs, progress=gr.Progress()):
406
+ logger.info(f"User Query: {message}")
407
+ logger.info(f"Model Used: {model}")
408
+ logger.info(f"Selected Documents: {selected_docs}")
409
+ logger.info(f"Use Web Search: {use_web_search}")
410
 
411
  response = ""
412
 
413
+ try:
414
+ if use_web_search:
415
+ progress(0, desc="Starting web search")
416
+ original_query = message
417
+ rephrased_query = rephrase_query(message, conversation_manager)
418
+ logger.info(f"Original query: {original_query}")
419
+ logger.info(f"Rephrased query: {rephrased_query}")
420
+
421
+ final_summary = ""
422
+ for i in range(num_calls):
423
+ progress((i + 1) * 33, desc=f"Performing web search {i+1}/{num_calls}")
424
+ search_results = get_web_search_results(rephrased_query)
425
+ if not search_results:
426
+ final_summary += f"No search results found for the query: {rephrased_query}\n\n"
427
+ elif "error" in search_results[0]:
428
+ final_summary += search_results[0]["error"] + "\n\n"
429
+ else:
430
+ summary = summarize_web_results(rephrased_query, search_results, conversation_manager)
431
+ final_summary += summary + "\n\n"
432
+
433
+ if final_summary:
434
+ conversation_manager.add_interaction(original_query, final_summary)
435
+ response = final_summary
436
  else:
437
+ response = "Unable to generate a response. Please try a different query."
 
438
 
 
 
 
439
  else:
440
+ # Existing PDF search logic
441
+ progress(0, desc="Starting PDF search")
 
 
 
442
  embed = get_embeddings()
443
  if os.path.exists("faiss_database"):
444
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
445
  retriever = database.as_retriever(search_kwargs={"k": 20})
446
 
447
+ progress(33, desc="Retrieving relevant documents")
448
  all_relevant_docs = retriever.get_relevant_documents(message)
449
  relevant_docs = [doc for doc in all_relevant_docs if doc.metadata["source"] in selected_docs]
450
 
 
452
  response = "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
453
  else:
454
  context_str = "\n".join([doc.page_content for doc in relevant_docs])
455
+ logger.info(f"Context length: {len(context_str)}")
456
 
457
+ progress(66, desc="Generating response")
458
  if model.startswith("duckduckgo/"):
459
  # Use DuckDuckGo chat with context
460
  for partial_response in get_response_from_duckduckgo(message, model, context_str, num_calls, temperature):
 
470
  else:
471
  response = "No documents available. Please upload PDF documents to answer questions."
472
 
473
+ except Exception as e:
474
+ logger.error(f"Error in respond function: {str(e)}")
475
+ response = f"An error occurred: {str(e)}. Please try again or select a different model."
 
 
 
 
 
476
 
477
+ progress(100, desc="Response generation complete")
478
+ return response
 
 
 
479
 
480
  logging.basicConfig(level=logging.DEBUG)
481
 
 
615
 
616
  logging.info("Finished generating response")
617
 
618
+ # Modified transcribe function
619
+ async def transcribe_with_timeout(audio_file, progress=gr.Progress()):
620
  if audio_file is None:
621
+ logger.warning("No audio file provided")
622
+ return "No audio file provided"
623
 
624
  try:
625
+ logger.info(f"Starting transcription for file: {audio_file}")
626
+ progress(0, desc="Starting transcription")
627
+
628
+ with open(audio_file, "rb") as f:
629
+ audio_data = f.read()
630
+
631
+ logger.info("Audio file read successfully")
632
+ progress(50, desc="Audio loaded, sending to API")
633
 
634
+ response = await asyncio.wait_for(asyncio.to_thread(whisper_api, audio_data), timeout=30)
635
+
636
+ logger.info("Transcription complete")
637
+ progress(100, desc="Transcription complete")
638
+ return response["text"]
639
+ except asyncio.TimeoutError:
640
+ logger.error("Transcription timed out")
641
+ return "Transcription timed out. Please try again with a shorter audio clip."
642
  except Exception as e:
643
+ logger.exception(f"Error during transcription: {str(e)}")
644
+ return f"Error during transcription: {str(e)}"
645
+ )}"
646
 
647
  def vote(data: gr.LikeData):
648
  if data.liked:
 
696
  def update_textbox(transcription):
697
  return gr.Textbox.update(value=transcription)
698
 
699
+ # Gradio interface
700
  with gr.Blocks() as demo:
701
  gr.Markdown("# AI-powered PDF Chat and Web Search Assistant with Speech Input")
702
 
703
  with gr.Row():
704
  with gr.Column(scale=1):
705
+ audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Speak your query")
706
  transcribe_button = gr.Button("Transcribe")
707
 
708
  with gr.Column(scale=2):
709
  chatbot = gr.Chatbot(
710
  show_copy_button=True,
711
+ bubble=True,
 
712
  height=400,
713
  value=initial_conversation()
714
  )
 
726
  use_web_search = gr.Checkbox(label="Use Web Search", value=True)
727
  document_selector = gr.CheckboxGroup(label="Select documents to query")
728
 
 
729
  gr.Markdown("## Upload and Manage PDF Documents")
730
  with gr.Row():
731
  file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
 
736
  update_output = gr.Textbox(label="Update Status")
737
  delete_button = gr.Button("Delete Selected Documents")
738
 
739
+ # Connect components
740
  transcribe_button.click(
741
+ transcribe_with_timeout,
742
  inputs=[audio_input],
743
  outputs=[query_textbox]
744
  )
 
767
  outputs=[update_output, document_selector]
768
  )
769
 
 
 
 
 
 
 
 
 
 
 
 
 
 
770
  if __name__ == "__main__":
771
+ demo.launch(debug=True, show_error=True)