SearchGPTTest

Sleeping

App Files Files Community

Shreyas094 commited on Aug 26, 2024

Commit

d0129e6

verified ·

1 Parent(s): 95ac269

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -73

app.py CHANGED Viewed

@@ -401,45 +401,50 @@ def summarize_web_results(query: str, search_results: List[Dict[str, str]], conv
         return f"An error occurred during summarization: {str(e)}"
 # Modify the existing respond function to handle both PDF and web search
-def respond(message, history, model, temperature, num_calls, use_web_search, selected_docs):
-    logging.info(f"User Query: {message}")
-    logging.info(f"Model Used: {model}")
-    logging.info(f"Selected Documents: {selected_docs}")
-    logging.info(f"Use Web Search: {use_web_search}")
     response = ""
-    if use_web_search:
-        original_query = message
-        rephrased_query = rephrase_query(message, conversation_manager)
-        logging.info(f"Original query: {original_query}")
-        logging.info(f"Rephrased query: {rephrased_query}")
-        final_summary = ""
-        for _ in range(num_calls):
-            search_results = get_web_search_results(rephrased_query)
-            if not search_results:
-                final_summary += f"No search results found for the query: {rephrased_query}\n\n"
-            elif "error" in search_results[0]:
-                final_summary += search_results[0]["error"] + "\n\n"
             else:
-                summary = summarize_web_results(rephrased_query, search_results, conversation_manager)
-                final_summary += summary + "\n\n"
-        if final_summary:
-            conversation_manager.add_interaction(original_query, final_summary)
-            response = final_summary
         else:
-            response = "Unable to generate a response. Please try a different query."
-    else:
-        # Existing PDF search logic
-        try:
             embed = get_embeddings()
             if os.path.exists("faiss_database"):
                 database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
                 retriever = database.as_retriever(search_kwargs={"k": 20})
                 all_relevant_docs = retriever.get_relevant_documents(message)
                 relevant_docs = [doc for doc in all_relevant_docs if doc.metadata["source"] in selected_docs]
@@ -447,8 +452,9 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
                     response = "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
                 else:
                     context_str = "\n".join([doc.page_content for doc in relevant_docs])
-                    logging.info(f"Context length: {len(context_str)}")
                     if model.startswith("duckduckgo/"):
                         # Use DuckDuckGo chat with context
                         for partial_response in get_response_from_duckduckgo(message, model, context_str, num_calls, temperature):
@@ -464,20 +470,12 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
             else:
                 response = "No documents available. Please upload PDF documents to answer questions."
-        except Exception as e:
-            logging.error(f"Error with {model}: {str(e)}")
-            if "microsoft/Phi-3-mini-4k-instruct" in model:
-                logging.info("Falling back to Mistral model due to Phi-3 error")
-                fallback_model = "mistralai/Mistral-7B-Instruct-v0.3"
-                return respond(message, history, fallback_model, temperature, num_calls, use_web_search, selected_docs)
-            else:
-                response = f"An error occurred with the {model} model: {str(e)}. Please try again or select a different model."
-    # Update the conversation history
-    history.append((message, response))
-    # Yield the updated history
-    yield history
 logging.basicConfig(level=logging.DEBUG)
@@ -617,21 +615,34 @@ Write a detailed and complete response that answers the following user question:
         logging.info("Finished generating response")
-async def transcribe_async(audio_file):
     if audio_file is None:
-        return "", "No audio file provided"
     try:
-        def process_audio():
-            with open(audio_file, "rb") as f:
-                audio_data = f.read()
-            return whisper_api(audio_data)["text"]
-        loop = asyncio.get_event_loop()
-        response = await loop.run_in_executor(executor, process_audio)
-        return response, "Transcription completed successfully"
     except Exception as e:
-        return "", f"Error during transcription: {str(e)}"
 def vote(data: gr.LikeData):
     if data.liked:
@@ -685,20 +696,19 @@ custom_placeholder = "Ask a question (Note: You can toggle between Web Search an
 def update_textbox(transcription):
     return gr.Textbox.update(value=transcription)
-# Update the Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown("# AI-powered PDF Chat and Web Search Assistant with Speech Input")
     with gr.Row():
         with gr.Column(scale=1):
-            audio_input = gr.Audio(sources="microphone", type="filepath", label="Speak your query")
             transcribe_button = gr.Button("Transcribe")
         with gr.Column(scale=2):
             chatbot = gr.Chatbot(
                 show_copy_button=True,
-                likeable=True,
-                layout="bubble",
                 height=400,
                 value=initial_conversation()
             )
@@ -716,7 +726,6 @@ with gr.Blocks() as demo:
         use_web_search = gr.Checkbox(label="Use Web Search", value=True)
         document_selector = gr.CheckboxGroup(label="Select documents to query")
-    # Add file upload functionality
     gr.Markdown("## Upload and Manage PDF Documents")
     with gr.Row():
         file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
@@ -727,9 +736,9 @@ with gr.Blocks() as demo:
     update_output = gr.Textbox(label="Update Status")
     delete_button = gr.Button("Delete Selected Documents")
-    # Update the Gradio interface to use the async function
     transcribe_button.click(
-        transcribe_async,
         inputs=[audio_input],
         outputs=[query_textbox]
     )
@@ -758,18 +767,5 @@ with gr.Blocks() as demo:
         outputs=[update_output, document_selector]
     )
-    gr.Markdown(
-    """
-    ## How to use
-    1. Use the microphone to speak your query, then click "Transcribe", or type directly in the text box.
-    2. Click "Submit" to get a response from the AI.
-    3. Upload PDF documents using the file input at the bottom.
-    4. Select the PDF parser (pypdf or llamaparse) and click "Upload Document" to update the vector store.
-    5. Select the documents you want to query using the checkboxes.
-    6. Toggle "Use Web Search" to switch between PDF chat and web search.
-    7. Adjust Temperature and Number of API Calls to fine-tune the response generation.
-    """
-    )
 if __name__ == "__main__":
-    demo.launch(share=True)

         return f"An error occurred during summarization: {str(e)}"
 # Modify the existing respond function to handle both PDF and web search
+# Modify your existing respond function to handle both PDF and web search
+async def respond(message, history, model, temperature, num_calls, use_web_search, selected_docs, progress=gr.Progress()):
+    logger.info(f"User Query: {message}")
+    logger.info(f"Model Used: {model}")
+    logger.info(f"Selected Documents: {selected_docs}")
+    logger.info(f"Use Web Search: {use_web_search}")
     response = ""
+    try:
+        if use_web_search:
+            progress(0, desc="Starting web search")
+            original_query = message
+            rephrased_query = rephrase_query(message, conversation_manager)
+            logger.info(f"Original query: {original_query}")
+            logger.info(f"Rephrased query: {rephrased_query}")
+            final_summary = ""
+            for i in range(num_calls):
+                progress((i + 1) * 33, desc=f"Performing web search {i+1}/{num_calls}")
+                search_results = get_web_search_results(rephrased_query)
+                if not search_results:
+                    final_summary += f"No search results found for the query: {rephrased_query}\n\n"
+                elif "error" in search_results[0]:
+                    final_summary += search_results[0]["error"] + "\n\n"
+                else:
+                    summary = summarize_web_results(rephrased_query, search_results, conversation_manager)
+                    final_summary += summary + "\n\n"
+            if final_summary:
+                conversation_manager.add_interaction(original_query, final_summary)
+                response = final_summary
             else:
+                response = "Unable to generate a response. Please try a different query."
         else:
+            # Existing PDF search logic
+            progress(0, desc="Starting PDF search")
             embed = get_embeddings()
             if os.path.exists("faiss_database"):
                 database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
                 retriever = database.as_retriever(search_kwargs={"k": 20})
+                progress(33, desc="Retrieving relevant documents")
                 all_relevant_docs = retriever.get_relevant_documents(message)
                 relevant_docs = [doc for doc in all_relevant_docs if doc.metadata["source"] in selected_docs]
                     response = "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
                 else:
                     context_str = "\n".join([doc.page_content for doc in relevant_docs])
+                    logger.info(f"Context length: {len(context_str)}")
+                    progress(66, desc="Generating response")
                     if model.startswith("duckduckgo/"):
                         # Use DuckDuckGo chat with context
                         for partial_response in get_response_from_duckduckgo(message, model, context_str, num_calls, temperature):
             else:
                 response = "No documents available. Please upload PDF documents to answer questions."
+    except Exception as e:
+        logger.error(f"Error in respond function: {str(e)}")
+        response = f"An error occurred: {str(e)}. Please try again or select a different model."
+    progress(100, desc="Response generation complete")
+    return response
 logging.basicConfig(level=logging.DEBUG)
         logging.info("Finished generating response")
+# Modified transcribe function
+async def transcribe_with_timeout(audio_file, progress=gr.Progress()):
     if audio_file is None:
+        logger.warning("No audio file provided")
+        return "No audio file provided"
     try:
+        logger.info(f"Starting transcription for file: {audio_file}")
+        progress(0, desc="Starting transcription")
+        with open(audio_file, "rb") as f:
+            audio_data = f.read()
+        logger.info("Audio file read successfully")
+        progress(50, desc="Audio loaded, sending to API")
+        response = await asyncio.wait_for(asyncio.to_thread(whisper_api, audio_data), timeout=30)
+        logger.info("Transcription complete")
+        progress(100, desc="Transcription complete")
+        return response["text"]
+    except asyncio.TimeoutError:
+        logger.error("Transcription timed out")
+        return "Transcription timed out. Please try again with a shorter audio clip."
     except Exception as e:
+        logger.exception(f"Error during transcription: {str(e)}")
+        return f"Error during transcription: {str(e)}"
+)}"
 def vote(data: gr.LikeData):
     if data.liked:
 def update_textbox(transcription):
     return gr.Textbox.update(value=transcription)
+# Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown("# AI-powered PDF Chat and Web Search Assistant with Speech Input")
     with gr.Row():
         with gr.Column(scale=1):
+            audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Speak your query")
             transcribe_button = gr.Button("Transcribe")
         with gr.Column(scale=2):
             chatbot = gr.Chatbot(
                 show_copy_button=True,
+                bubble=True,
                 height=400,
                 value=initial_conversation()
             )
         use_web_search = gr.Checkbox(label="Use Web Search", value=True)
         document_selector = gr.CheckboxGroup(label="Select documents to query")
     gr.Markdown("## Upload and Manage PDF Documents")
     with gr.Row():
         file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
     update_output = gr.Textbox(label="Update Status")
     delete_button = gr.Button("Delete Selected Documents")
+    # Connect components
     transcribe_button.click(
+        transcribe_with_timeout,
         inputs=[audio_input],
         outputs=[query_textbox]
     )
         outputs=[update_output, document_selector]
     )
 if __name__ == "__main__":
+    demo.launch(debug=True, show_error=True)