SearchGPT

Running

App Files Files Community

Shreyas094 commited on Jul 22, 2024

Commit

63b644a

verified ·

1 Parent(s): 6fac185

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -20

app.py CHANGED Viewed

@@ -29,33 +29,41 @@ from langchain_core.documents import Document
 huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
-# Download necessary NLTK data
-nltk.download('punkt')
-nltk.download('averaged_perceptron_tagger')
-class ContextDrivenChatbot:
-    def __init__(self, history_size=5):
         self.history = []
         self.history_size = history_size
-        self.vectorizer = TfidfVectorizer()
-        nltk.download('punkt', quiet=True)
-        nltk.download('averaged_perceptron_tagger', quiet=True)
     def add_to_history(self, text):
         self.history.append(text)
         if len(self.history) > self.history_size:
             self.history.pop(0)
     def get_context(self):
         return " ".join(self.history)
     def is_follow_up_question(self, question):
-        tokens = word_tokenize(question.lower())
         follow_up_indicators = set(['it', 'this', 'that', 'these', 'those', 'he', 'she', 'they', 'them'])
-        return any(token in follow_up_indicators for token in tokens)
     def extract_topics(self, text):
-        tokens = nltk.pos_tag(word_tokenize(text))
-        return [word for word, pos in tokens if pos.startswith('NN')]
     def get_most_relevant_context(self, question):
         if not self.history:
@@ -64,11 +72,12 @@ class ContextDrivenChatbot:
         # Create a combined context from history
         combined_context = self.get_context()
-        # Vectorize the context and the question
-        vectors = self.vectorizer.fit_transform([combined_context, question])
         # Calculate similarity
-        similarity = cosine_similarity(vectors[0], vectors[1])[0][0]
         # If similarity is low, it might be a new topic
         if similarity < 0.3:  # This threshold can be adjusted
@@ -91,7 +100,7 @@ class ContextDrivenChatbot:
         # Add the new question to history
         self.add_to_history(question)
-        return contextualized_question, topics
 def load_document(file: NamedTemporaryFile) -> List[Document]:
     """Loads and splits the document into pages."""
@@ -262,7 +271,7 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
     max_attempts = 3
     context_reduction_factor = 0.7
-    contextualized_question, topics = chatbot.process_question(question)
     if web_search:
         search_results = google_search(contextualized_question)
@@ -282,12 +291,13 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                 context_str = "\n".join([f"Source: {doc.metadata['source']}\nContent: {doc.page_content}" for doc in web_docs])
                 prompt_template = """
-                Answer the question based on the following web search results and conversation context:
                 Web Search Results:
                 {context}
                 Conversation Context: {conv_context}
                 Current Question: {question}
                 Topics: {topics}
                 If the web search results don't contain relevant information, state that the information is not available in the search results.
                 Provide a summarized and direct answer to the question without mentioning the web search or these instructions.
                 Do not include any source information in your answer.
@@ -298,7 +308,8 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
                     context=context_str,
                     conv_context=chatbot.get_context(),
                     question=question,
-                    topics=", ".join(topics)
                 )
                 full_response = generate_chunked_response(model, formatted_prompt)
@@ -415,7 +426,7 @@ with gr.Blocks() as demo:
             repetition_penalty_slider = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, value=1.0, step=0.1)
             web_search_checkbox = gr.Checkbox(label="Enable Web Search", value=False)
-    context_driven_chatbot = ContextDrivenChatbot()
     def chat(question, history, temperature, top_p, repetition_penalty, web_search):
         answer = ask_question(question, temperature, top_p, repetition_penalty, web_search, context_driven_chatbot)

 huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
+# Load spaCy model
+nlp = spacy.load("en_core_web_sm")
+# Load SentenceTransformer model
+sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
+class EnhancedContextDrivenChatbot:
+    def __init__(self, history_size=10):
         self.history = []
         self.history_size = history_size
+        self.entity_tracker = {}
     def add_to_history(self, text):
         self.history.append(text)
         if len(self.history) > self.history_size:
             self.history.pop(0)
+        # Update entity tracker
+        doc = nlp(text)
+        for ent in doc.ents:
+            if ent.label_ not in self.entity_tracker:
+                self.entity_tracker[ent.label_] = set()
+            self.entity_tracker[ent.label_].add(ent.text)
     def get_context(self):
         return " ".join(self.history)
     def is_follow_up_question(self, question):
+        doc = nlp(question.lower())
         follow_up_indicators = set(['it', 'this', 'that', 'these', 'those', 'he', 'she', 'they', 'them'])
+        return any(token.text in follow_up_indicators for token in doc)
     def extract_topics(self, text):
+        doc = nlp(text)
+        return [chunk.text for chunk in doc.noun_chunks]
     def get_most_relevant_context(self, question):
         if not self.history:
         # Create a combined context from history
         combined_context = self.get_context()
+        # Get embeddings
+        context_embedding = sentence_model.encode([combined_context])[0]
+        question_embedding = sentence_model.encode([question])[0]
         # Calculate similarity
+        similarity = cosine_similarity([context_embedding], [question_embedding])[0][0]
         # If similarity is low, it might be a new topic
         if similarity < 0.3:  # This threshold can be adjusted
         # Add the new question to history
         self.add_to_history(question)
+        return contextualized_question, topics, self.entity_tracker
 def load_document(file: NamedTemporaryFile) -> List[Document]:
     """Loads and splits the document into pages."""
     max_attempts = 3
     context_reduction_factor = 0.7
+    contextualized_question, topics, entity_tracker = chatbot.process_question(question)
     if web_search:
         search_results = google_search(contextualized_question)
                 context_str = "\n".join([f"Source: {doc.metadata['source']}\nContent: {doc.page_content}" for doc in web_docs])
                 prompt_template = """
+                Answer the question based on the following web search results, conversation context, and entity information:
                 Web Search Results:
                 {context}
                 Conversation Context: {conv_context}
                 Current Question: {question}
                 Topics: {topics}
+                Entity Information: {entities}
                 If the web search results don't contain relevant information, state that the information is not available in the search results.
                 Provide a summarized and direct answer to the question without mentioning the web search or these instructions.
                 Do not include any source information in your answer.
                     context=context_str,
                     conv_context=chatbot.get_context(),
                     question=question,
+                    topics=", ".join(topics),
+                    entities=json.dumps(entity_tracker)
                 )
                 full_response = generate_chunked_response(model, formatted_prompt)
             repetition_penalty_slider = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, value=1.0, step=0.1)
             web_search_checkbox = gr.Checkbox(label="Enable Web Search", value=False)
+    context_driven_chatbot = EnhancedContextDrivenChatbot()
     def chat(question, history, temperature, top_p, repetition_penalty, web_search):
         answer = ask_question(question, temperature, top_p, repetition_penalty, web_search, context_driven_chatbot)