SearchGPT

Running

App Files Files Community

Shreyas094 commited on Jul 6, 2024

Commit

8b01918

verified ·

1 Parent(s): 0f075d7

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -80

app.py CHANGED Viewed

@@ -210,69 +210,103 @@ def google_search(term, num_results=5, lang="en", timeout=5, safe="active", ssl_
         print(f"Result {i}:")
         print(f"  Link: {result['link']}")
         if result['text']:
-            print(f"  Text: {result['text'][:100]}...")  # Display the first 100 characters of the text for brevity
         else:
-            print("  No text extracted")
     return all_results
-def process_question(question, documents, history, temperature, top_p, repetition_penalty, enable_web_search):
     global conversation_history
-    embeddings = get_embeddings()
-    # Check the memory database for similar questions
-    for prev_question, prev_answer in memory_database.items():
-        similarity = get_similarity(question, prev_question)
-        if similarity > 0.8:
-            return prev_answer
-    # Retrieve relevant documents from the vector store
-    if os.path.exists("faiss_database"):
-        db = FAISS.load_local("faiss_database", embeddings, allow_dangerous_deserialization=True)
-        relevant_docs = db.similarity_search(question, k=3)
     else:
-        relevant_docs = []
-    # Perform web search if enabled and no relevant documents found
-    if enable_web_search and len(relevant_docs) == 0:
-        web_search_results = google_search(question, num_results=5)
-        web_docs = [Document(page_content=res["text"] or "", metadata={"source": res["link"]}) for res in web_search_results if res["text"]]
-        if web_docs:
-            # Update the FAISS vector store with new documents
-            create_or_update_database(web_docs, embeddings)
-            db = FAISS.load_local("faiss_database", embeddings, allow_dangerous_deserialization=True)
-            relevant_docs = db.similarity_search(question, k=3)
-    context = "\n\n".join([doc.page_content for doc in relevant_docs])
-    if is_related_to_history(question, history):
-        context = "None"
-    else:
-        history_text = "\n".join([f"Q: {h['question']}\nA: {h['answer']}" for h in history]) if history else "None"
-        context = context if context else "None"
-    prompt_text = ChatPromptTemplate(
-        input_variables=["history", "context", "question"],
-        template=prompt
-    ).format(history=history_text, context=context, question=question)
-    model = get_model(temperature, top_p, repetition_penalty)
-    answer = generate_chunked_response(model, prompt_text)
-    conversation_history = manage_conversation_history(question, answer, history)
-    memory_database[question] = answer
     return answer
-def process_uploaded_file(file, is_recursive):
-    if is_recursive:
-        data = load_and_split_document_recursive(file)
-    else:
-        data = load_and_split_document_basic(file)
-    embeddings = get_embeddings()
-    create_or_update_database(data, embeddings)
-    return "File processed and data added to the vector database."
 def extract_db_to_excel():
     embed = get_embeddings()
@@ -303,43 +337,47 @@ def export_memory_db_to_excel():
     return excel_path
 with gr.Blocks() as demo:
-    with gr.Row():
-        pdf_file = gr.File(label="Upload PDF")
-    with gr.Row():
-        recursive_check = gr.Checkbox(label="Use Recursive Text Splitter")
-        upload_button = gr.Button("Upload and Process")
-    with gr.Row():
-        upload_output = gr.Textbox(label="Upload Output")
     with gr.Row():
-        question = gr.Textbox(label="Your Question")
-    with gr.Row():
-        temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, label="Temperature")
-        top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.9, label="Top P")
-        repetition_penalty = gr.Slider(minimum=0.0, maximum=2.0, value=1.0, label="Repetition Penalty")
-        web_search_check = gr.Checkbox(label="Enable Web Search")
-    with gr.Row():
-        ask_button = gr.Button("Ask")
-    with gr.Row():
-        answer = gr.Textbox(label="Answer")
     with gr.Row():
-        clear_button = gr.Button("Clear Cache")
-    with gr.Row():
-        clear_output = gr.Textbox(label="Clear Output")
-    with gr.Row():
-        export_db_button = gr.Button("Export Database to Excel")
-        export_db_output = gr.Textbox(label="Export Output")
-    with gr.Row():
-        export_memory_button = gr.Button("Export Memory DB to Excel")
-        export_memory_output = gr.Textbox(label="Export Output")
-    upload_button.click(process_uploaded_file, [pdf_file, recursive_check], upload_output)
-    ask_button.click(process_question, [question, pdf_file, conversation_history, temperature, top_p, repetition_penalty, web_search_check], answer)
-    clear_button.click(clear_cache, [], clear_output)
-    export_db_button.click(extract_db_to_excel, [], export_db_output)
-    export_memory_button.click(export_memory_db_to_excel, [], export_memory_output)
-demo.launch()

         print(f"Result {i}:")
         print(f"  Link: {result['link']}")
         if result['text']:
+            print(f"  Text: {result['text'][:100]}...")  # Print first 100 characters
         else:
+            print("  Text: None")
+    print("End of search results")
+    if not all_results:
+        print("No search results found. Returning a default message.")
+        return [{"link": None, "text": "No information found in the web search results."}]
     return all_results
+def ask_question(question, temperature, top_p, repetition_penalty, web_search):
     global conversation_history
+    if not question:
+        return "Please enter a question."
+    if question in memory_database and not web_search:
+        answer = memory_database[question]
     else:
+        model = get_model(temperature, top_p, repetition_penalty)
+        embed = get_embeddings()
+        if web_search:
+            search_results = google_search(question)
+            context_str = "\n".join([result["text"] for result in search_results if result["text"]])
+            # Convert web search results to Document format
+            web_docs = [Document(page_content=result["text"], metadata={"source": result["link"]}) for result in search_results if result["text"]]
+            # Create a temporary FAISS database for web search results
+            temp_database = FAISS.from_documents(web_docs, embed)
+            retriever = temp_database.as_retriever()
+            relevant_docs = retriever.get_relevant_documents(question)
+            context_str = "\n".join([doc.page_content for doc in relevant_docs])
+            prompt_template = """
+            Answer the question based on the following web search results:
+            Web Search Results:
+            {context}
+            Current Question: {question}
+            If the web search results don't contain relevant information, state that the information is not available in the search results.
+            Provide a concise and direct answer to the question without mentioning the web search or these instructions:
+            """
+            prompt_val = ChatPromptTemplate.from_template(prompt_template)
+            formatted_prompt = prompt_val.format(context=context_str, question=question)
+        else:
+            # Check if the FAISS database exists
+            if os.path.exists("faiss_database"):
+                database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
+            else:
+                return "No FAISS database found. Please upload documents to create the vector store."
+            history_str = "\n".join([f"Q: {item['question']}\nA: {item['answer']}" for item in conversation_history])
+            if is_related_to_history(question, conversation_history):
+                context_str = "No additional context needed. Please refer to the conversation history."
+            else:
+                retriever = database.as_retriever()
+                relevant_docs = retriever.get_relevant_documents(question)
+                context_str = "\n".join([doc.page_content for doc in relevant_docs])
+            prompt_val = ChatPromptTemplate.from_template(prompt)
+            formatted_prompt = prompt_val.format(history=history_str, context=context_str, question=question)
+        answer = generate_chunked_response(model, formatted_prompt)
+        answer = re.split(r'Question:|Current Question:', answer)[-1].strip()
+        # Remove any remaining prompt instructions from the answer
+        answer_lines = answer.split('\n')
+        answer = '\n'.join(line for line in answer_lines if not line.startswith('If') and not line.startswith('Provide'))
+        if not web_search:
+            memory_database[question] = answer
+    if not web_search:
+        conversation_history = manage_conversation_history(question, answer, conversation_history)
     return answer
+def update_vectors(files, use_recursive_splitter):
+    if not files:
+        return "Please upload at least one PDF file."
+    embed = get_embeddings()
+    total_chunks = 0
+    for file in files:
+        if use_recursive_splitter:
+            data = load_and_split_document_recursive(file)
+        else:
+            data = load_and_split_document_basic(file)
+        create_or_update_database(data, embed)
+        total_chunks += len(data)
+    return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files."
 def extract_db_to_excel():
     embed = get_embeddings()
     return excel_path
+# Gradio interface
 with gr.Blocks() as demo:
+    gr.Markdown("# Chat with your PDF documents")
     with gr.Row():
+        file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
+        update_button = gr.Button("Update Vector Store")
+        use_recursive_splitter = gr.Checkbox(label="Use Recursive Text Splitter", value=False)
+    update_output = gr.Textbox(label="Update Status")
+    update_button.click(update_vectors, inputs=[file_input, use_recursive_splitter], outputs=update_output)
     with gr.Row():
+        with gr.Column(scale=2):
+            chatbot = gr.Chatbot(label="Conversation")
+            question_input = gr.Textbox(label="Ask a question about your documents")
+            submit_button = gr.Button("Submit")
+        with gr.Column(scale=1):
+            temperature_slider = gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, value=0.5, step=0.1)
+            top_p_slider = gr.Slider(label="Top P", minimum=0.0, maximum=1.0, value=0.9, step=0.1)
+            repetition_penalty_slider = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, value=1.0, step=0.1)
+            web_search_checkbox = gr.Checkbox(label="Enable Web Search", value=False)
+    def chat(question, history):
+        answer = ask_question(question, temperature_slider.value, top_p_slider.value, repetition_penalty_slider.value, web_search_checkbox.value)
+        history.append((question, answer))
+        return "", history
+    submit_button.click(chat, inputs=[question_input, chatbot], outputs=[question_input, chatbot])
+    extract_button = gr.Button("Extract Database to Excel")
+    excel_output = gr.File(label="Download Excel File")
+    extract_button.click(extract_db_to_excel, inputs=[], outputs=excel_output)
+    export_memory_button = gr.Button("Export Memory Database to Excel")
+    memory_excel_output = gr.File(label="Download Memory Excel File")
+    export_memory_button.click(export_memory_db_to_excel, inputs=[], outputs=memory_excel_output)
+    clear_button = gr.Button("Clear Cache")
+    clear_output = gr.Textbox(label="Cache Status")
+    clear_button.click(clear_cache, inputs=[], outputs=clear_output)
+if __name__ == "__main__":
+    demo.launch()