Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -401,45 +401,50 @@ def summarize_web_results(query: str, search_results: List[Dict[str, str]], conv
|
|
401 |
return f"An error occurred during summarization: {str(e)}"
|
402 |
|
403 |
# Modify the existing respond function to handle both PDF and web search
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
|
|
409 |
|
410 |
response = ""
|
411 |
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
425 |
else:
|
426 |
-
|
427 |
-
final_summary += summary + "\n\n"
|
428 |
|
429 |
-
if final_summary:
|
430 |
-
conversation_manager.add_interaction(original_query, final_summary)
|
431 |
-
response = final_summary
|
432 |
else:
|
433 |
-
|
434 |
-
|
435 |
-
else:
|
436 |
-
# Existing PDF search logic
|
437 |
-
try:
|
438 |
embed = get_embeddings()
|
439 |
if os.path.exists("faiss_database"):
|
440 |
database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
|
441 |
retriever = database.as_retriever(search_kwargs={"k": 20})
|
442 |
|
|
|
443 |
all_relevant_docs = retriever.get_relevant_documents(message)
|
444 |
relevant_docs = [doc for doc in all_relevant_docs if doc.metadata["source"] in selected_docs]
|
445 |
|
@@ -447,8 +452,9 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
|
|
447 |
response = "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
|
448 |
else:
|
449 |
context_str = "\n".join([doc.page_content for doc in relevant_docs])
|
450 |
-
|
451 |
|
|
|
452 |
if model.startswith("duckduckgo/"):
|
453 |
# Use DuckDuckGo chat with context
|
454 |
for partial_response in get_response_from_duckduckgo(message, model, context_str, num_calls, temperature):
|
@@ -464,20 +470,12 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
|
|
464 |
else:
|
465 |
response = "No documents available. Please upload PDF documents to answer questions."
|
466 |
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
logging.info("Falling back to Mistral model due to Phi-3 error")
|
471 |
-
fallback_model = "mistralai/Mistral-7B-Instruct-v0.3"
|
472 |
-
return respond(message, history, fallback_model, temperature, num_calls, use_web_search, selected_docs)
|
473 |
-
else:
|
474 |
-
response = f"An error occurred with the {model} model: {str(e)}. Please try again or select a different model."
|
475 |
|
476 |
-
|
477 |
-
|
478 |
-
|
479 |
-
# Yield the updated history
|
480 |
-
yield history
|
481 |
|
482 |
logging.basicConfig(level=logging.DEBUG)
|
483 |
|
@@ -617,21 +615,34 @@ Write a detailed and complete response that answers the following user question:
|
|
617 |
|
618 |
logging.info("Finished generating response")
|
619 |
|
620 |
-
|
|
|
621 |
if audio_file is None:
|
622 |
-
|
|
|
623 |
|
624 |
try:
|
625 |
-
|
626 |
-
|
627 |
-
|
628 |
-
|
|
|
|
|
|
|
|
|
629 |
|
630 |
-
|
631 |
-
|
632 |
-
|
|
|
|
|
|
|
|
|
|
|
633 |
except Exception as e:
|
634 |
-
|
|
|
|
|
635 |
|
636 |
def vote(data: gr.LikeData):
|
637 |
if data.liked:
|
@@ -685,20 +696,19 @@ custom_placeholder = "Ask a question (Note: You can toggle between Web Search an
|
|
685 |
def update_textbox(transcription):
|
686 |
return gr.Textbox.update(value=transcription)
|
687 |
|
688 |
-
#
|
689 |
with gr.Blocks() as demo:
|
690 |
gr.Markdown("# AI-powered PDF Chat and Web Search Assistant with Speech Input")
|
691 |
|
692 |
with gr.Row():
|
693 |
with gr.Column(scale=1):
|
694 |
-
audio_input = gr.Audio(sources="microphone", type="filepath", label="Speak your query")
|
695 |
transcribe_button = gr.Button("Transcribe")
|
696 |
|
697 |
with gr.Column(scale=2):
|
698 |
chatbot = gr.Chatbot(
|
699 |
show_copy_button=True,
|
700 |
-
|
701 |
-
layout="bubble",
|
702 |
height=400,
|
703 |
value=initial_conversation()
|
704 |
)
|
@@ -716,7 +726,6 @@ with gr.Blocks() as demo:
|
|
716 |
use_web_search = gr.Checkbox(label="Use Web Search", value=True)
|
717 |
document_selector = gr.CheckboxGroup(label="Select documents to query")
|
718 |
|
719 |
-
# Add file upload functionality
|
720 |
gr.Markdown("## Upload and Manage PDF Documents")
|
721 |
with gr.Row():
|
722 |
file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
|
@@ -727,9 +736,9 @@ with gr.Blocks() as demo:
|
|
727 |
update_output = gr.Textbox(label="Update Status")
|
728 |
delete_button = gr.Button("Delete Selected Documents")
|
729 |
|
730 |
-
#
|
731 |
transcribe_button.click(
|
732 |
-
|
733 |
inputs=[audio_input],
|
734 |
outputs=[query_textbox]
|
735 |
)
|
@@ -758,18 +767,5 @@ with gr.Blocks() as demo:
|
|
758 |
outputs=[update_output, document_selector]
|
759 |
)
|
760 |
|
761 |
-
gr.Markdown(
|
762 |
-
"""
|
763 |
-
## How to use
|
764 |
-
1. Use the microphone to speak your query, then click "Transcribe", or type directly in the text box.
|
765 |
-
2. Click "Submit" to get a response from the AI.
|
766 |
-
3. Upload PDF documents using the file input at the bottom.
|
767 |
-
4. Select the PDF parser (pypdf or llamaparse) and click "Upload Document" to update the vector store.
|
768 |
-
5. Select the documents you want to query using the checkboxes.
|
769 |
-
6. Toggle "Use Web Search" to switch between PDF chat and web search.
|
770 |
-
7. Adjust Temperature and Number of API Calls to fine-tune the response generation.
|
771 |
-
"""
|
772 |
-
)
|
773 |
-
|
774 |
if __name__ == "__main__":
|
775 |
-
demo.launch(
|
|
|
401 |
return f"An error occurred during summarization: {str(e)}"
|
402 |
|
403 |
# Modify the existing respond function to handle both PDF and web search
|
404 |
+
# Modify your existing respond function to handle both PDF and web search
|
405 |
+
async def respond(message, history, model, temperature, num_calls, use_web_search, selected_docs, progress=gr.Progress()):
|
406 |
+
logger.info(f"User Query: {message}")
|
407 |
+
logger.info(f"Model Used: {model}")
|
408 |
+
logger.info(f"Selected Documents: {selected_docs}")
|
409 |
+
logger.info(f"Use Web Search: {use_web_search}")
|
410 |
|
411 |
response = ""
|
412 |
|
413 |
+
try:
|
414 |
+
if use_web_search:
|
415 |
+
progress(0, desc="Starting web search")
|
416 |
+
original_query = message
|
417 |
+
rephrased_query = rephrase_query(message, conversation_manager)
|
418 |
+
logger.info(f"Original query: {original_query}")
|
419 |
+
logger.info(f"Rephrased query: {rephrased_query}")
|
420 |
+
|
421 |
+
final_summary = ""
|
422 |
+
for i in range(num_calls):
|
423 |
+
progress((i + 1) * 33, desc=f"Performing web search {i+1}/{num_calls}")
|
424 |
+
search_results = get_web_search_results(rephrased_query)
|
425 |
+
if not search_results:
|
426 |
+
final_summary += f"No search results found for the query: {rephrased_query}\n\n"
|
427 |
+
elif "error" in search_results[0]:
|
428 |
+
final_summary += search_results[0]["error"] + "\n\n"
|
429 |
+
else:
|
430 |
+
summary = summarize_web_results(rephrased_query, search_results, conversation_manager)
|
431 |
+
final_summary += summary + "\n\n"
|
432 |
+
|
433 |
+
if final_summary:
|
434 |
+
conversation_manager.add_interaction(original_query, final_summary)
|
435 |
+
response = final_summary
|
436 |
else:
|
437 |
+
response = "Unable to generate a response. Please try a different query."
|
|
|
438 |
|
|
|
|
|
|
|
439 |
else:
|
440 |
+
# Existing PDF search logic
|
441 |
+
progress(0, desc="Starting PDF search")
|
|
|
|
|
|
|
442 |
embed = get_embeddings()
|
443 |
if os.path.exists("faiss_database"):
|
444 |
database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
|
445 |
retriever = database.as_retriever(search_kwargs={"k": 20})
|
446 |
|
447 |
+
progress(33, desc="Retrieving relevant documents")
|
448 |
all_relevant_docs = retriever.get_relevant_documents(message)
|
449 |
relevant_docs = [doc for doc in all_relevant_docs if doc.metadata["source"] in selected_docs]
|
450 |
|
|
|
452 |
response = "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
|
453 |
else:
|
454 |
context_str = "\n".join([doc.page_content for doc in relevant_docs])
|
455 |
+
logger.info(f"Context length: {len(context_str)}")
|
456 |
|
457 |
+
progress(66, desc="Generating response")
|
458 |
if model.startswith("duckduckgo/"):
|
459 |
# Use DuckDuckGo chat with context
|
460 |
for partial_response in get_response_from_duckduckgo(message, model, context_str, num_calls, temperature):
|
|
|
470 |
else:
|
471 |
response = "No documents available. Please upload PDF documents to answer questions."
|
472 |
|
473 |
+
except Exception as e:
|
474 |
+
logger.error(f"Error in respond function: {str(e)}")
|
475 |
+
response = f"An error occurred: {str(e)}. Please try again or select a different model."
|
|
|
|
|
|
|
|
|
|
|
476 |
|
477 |
+
progress(100, desc="Response generation complete")
|
478 |
+
return response
|
|
|
|
|
|
|
479 |
|
480 |
logging.basicConfig(level=logging.DEBUG)
|
481 |
|
|
|
615 |
|
616 |
logging.info("Finished generating response")
|
617 |
|
618 |
+
# Modified transcribe function
|
619 |
+
async def transcribe_with_timeout(audio_file, progress=gr.Progress()):
|
620 |
if audio_file is None:
|
621 |
+
logger.warning("No audio file provided")
|
622 |
+
return "No audio file provided"
|
623 |
|
624 |
try:
|
625 |
+
logger.info(f"Starting transcription for file: {audio_file}")
|
626 |
+
progress(0, desc="Starting transcription")
|
627 |
+
|
628 |
+
with open(audio_file, "rb") as f:
|
629 |
+
audio_data = f.read()
|
630 |
+
|
631 |
+
logger.info("Audio file read successfully")
|
632 |
+
progress(50, desc="Audio loaded, sending to API")
|
633 |
|
634 |
+
response = await asyncio.wait_for(asyncio.to_thread(whisper_api, audio_data), timeout=30)
|
635 |
+
|
636 |
+
logger.info("Transcription complete")
|
637 |
+
progress(100, desc="Transcription complete")
|
638 |
+
return response["text"]
|
639 |
+
except asyncio.TimeoutError:
|
640 |
+
logger.error("Transcription timed out")
|
641 |
+
return "Transcription timed out. Please try again with a shorter audio clip."
|
642 |
except Exception as e:
|
643 |
+
logger.exception(f"Error during transcription: {str(e)}")
|
644 |
+
return f"Error during transcription: {str(e)}"
|
645 |
+
)}"
|
646 |
|
647 |
def vote(data: gr.LikeData):
|
648 |
if data.liked:
|
|
|
696 |
def update_textbox(transcription):
|
697 |
return gr.Textbox.update(value=transcription)
|
698 |
|
699 |
+
# Gradio interface
|
700 |
with gr.Blocks() as demo:
|
701 |
gr.Markdown("# AI-powered PDF Chat and Web Search Assistant with Speech Input")
|
702 |
|
703 |
with gr.Row():
|
704 |
with gr.Column(scale=1):
|
705 |
+
audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Speak your query")
|
706 |
transcribe_button = gr.Button("Transcribe")
|
707 |
|
708 |
with gr.Column(scale=2):
|
709 |
chatbot = gr.Chatbot(
|
710 |
show_copy_button=True,
|
711 |
+
bubble=True,
|
|
|
712 |
height=400,
|
713 |
value=initial_conversation()
|
714 |
)
|
|
|
726 |
use_web_search = gr.Checkbox(label="Use Web Search", value=True)
|
727 |
document_selector = gr.CheckboxGroup(label="Select documents to query")
|
728 |
|
|
|
729 |
gr.Markdown("## Upload and Manage PDF Documents")
|
730 |
with gr.Row():
|
731 |
file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
|
|
|
736 |
update_output = gr.Textbox(label="Update Status")
|
737 |
delete_button = gr.Button("Delete Selected Documents")
|
738 |
|
739 |
+
# Connect components
|
740 |
transcribe_button.click(
|
741 |
+
transcribe_with_timeout,
|
742 |
inputs=[audio_input],
|
743 |
outputs=[query_textbox]
|
744 |
)
|
|
|
767 |
outputs=[update_output, document_selector]
|
768 |
)
|
769 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
770 |
if __name__ == "__main__":
|
771 |
+
demo.launch(debug=True, show_error=True)
|