SearchGPT

Running

App Files Files Community

Shreyas094 commited on Jul 6, 2024

Commit

177c5b5

verified ·

1 Parent(s): 46953d2

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -6

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os
 import json
 import gradio as gr
 import pandas as pd
 from tempfile import NamedTemporaryFile
@@ -13,6 +14,8 @@ from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_community.llms import HuggingFaceHub
 from langchain_core.runnables import RunnableParallel, RunnablePassthrough
 from langchain_core.documents import Document
 huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
@@ -58,15 +61,24 @@ def clear_cache():
     else:
         return "No cache to clear."
 prompt = """
-Answer the question based on the following context and conversation history:
 Conversation History:
 {history}
 Context from documents:
 {context}
-Question: {question}
 Provide a concise and direct answer to the question:
 """
@@ -100,6 +112,13 @@ def manage_conversation_history(question, answer, history, max_history=5):
         history.pop(0)
     return history
 def ask_question(question, temperature, top_p, repetition_penalty):
     global conversation_history
@@ -114,14 +133,19 @@ def ask_question(question, temperature, top_p, repetition_penalty):
         model = get_model(temperature, top_p, repetition_penalty)
         history_str = "\n".join([f"Q: {item['question']}\nA: {item['answer']}" for item in conversation_history])
         prompt_val = ChatPromptTemplate.from_template(prompt)
-        retriever = database.as_retriever()
-        relevant_docs = retriever.get_relevant_documents(question)
-        context_str = "\n".join([doc.page_content for doc in relevant_docs])
         formatted_prompt = prompt_val.format(history=history_str, context=context_str, question=question)
         answer = generate_chunked_response(model, formatted_prompt)
-        answer = answer.split("Question:")[-1].strip()
         memory_database[question] = answer

 import os
 import json
+import re
 import gradio as gr
 import pandas as pd
 from tempfile import NamedTemporaryFile
 from langchain_community.llms import HuggingFaceHub
 from langchain_core.runnables import RunnableParallel, RunnablePassthrough
 from langchain_core.documents import Document
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
 huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
     else:
         return "No cache to clear."
+def get_similarity(text1, text2):
+    vectorizer = TfidfVectorizer().fit_transform([text1, text2])
+    return cosine_similarity(vectorizer[0:1], vectorizer[1:2])[0][0]
 prompt = """
+Answer the question based on the following information:
 Conversation History:
 {history}
 Context from documents:
 {context}
+Current Question: {question}
+If the question is referring to the conversation history, use that information to answer.
+If the question is not related to the conversation history, use the context from documents to answer.
+If you don't have enough information to answer, say so.
 Provide a concise and direct answer to the question:
 """
         history.pop(0)
     return history
+def is_related_to_history(question, history, threshold=0.3):
+    if not history:
+        return False
+    history_text = " ".join([f"{h['question']} {h['answer']}" for h in history])
+    similarity = get_similarity(question, history_text)
+    return similarity > threshold
 def ask_question(question, temperature, top_p, repetition_penalty):
     global conversation_history
         model = get_model(temperature, top_p, repetition_penalty)
         history_str = "\n".join([f"Q: {item['question']}\nA: {item['answer']}" for item in conversation_history])
+        if is_related_to_history(question, conversation_history):
+            context_str = "No additional context needed. Please refer to the conversation history."
+        else:
+            retriever = database.as_retriever()
+            relevant_docs = retriever.get_relevant_documents(question)
+            context_str = "\n".join([doc.page_content for doc in relevant_docs])
         prompt_val = ChatPromptTemplate.from_template(prompt)
         formatted_prompt = prompt_val.format(history=history_str, context=context_str, question=question)
         answer = generate_chunked_response(model, formatted_prompt)
+        answer = re.split(r'Question:|Current Question:', answer)[-1].strip()
         memory_database[question] = answer