SearchGPT

Running

App Files Files Community

Shreyas094 commited on Jul 20, 2024

Commit

4892e48

verified ·

1 Parent(s): 4281635

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -32

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import pandas as pd
 import requests
 import random
 import urllib.parse
 from tempfile import NamedTemporaryFile
 from typing import List, Dict
 from bs4 import BeautifulSoup
@@ -22,48 +23,35 @@ from langchain_core.documents import Document
 huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
 class Agent1:
-    def __init__(self, model):
-        self.model = model
     def rephrase_and_split(self, user_input: str) -> List[str]:
-        rephrase_prompt = PromptTemplate(
-            input_variables=["query"],
-            template="""
-            Determine how many questions are asked in the search query provided to you. For each question, split them individually and rephrase each one into a few concise, search-engine-friendly formats. Ensure that the queries are distinct and relevant, capturing the core of each question without unnecessary repetition.
-            Query: {query}
-            Rephrased queries:"""
-        )
-        chain = LLMChain(llm=self.model, prompt=rephrase_prompt)
-        response = chain.run(query=user_input).strip()
-        # Split the response at "Rephrased queries:" and take the second part
-        split_response = response.split("Rephrased queries:", 1)
-        if len(split_response) > 1:
-            response = split_response[1].strip()
-        # Split the response into individual queries
-        rephrased_queries = [q.strip() for q in response.split('\n') if q.strip()]
-        # Filter out any queries that seem to be instructions or explanations
-        rephrased_queries = [q for q in rephrased_queries if not q.lower().startswith(("rephrase", "query", "provide"))]
-        # If no valid rephrased queries, return the original input split into two if it contains 'and'
-        if not rephrased_queries:
-            if ' and ' in user_input.lower():
-                return user_input.split(' and ')
-            else:
-                return [user_input]
-        return rephrased_queries
     def process(self, user_input: str) -> Dict[str, List[Dict[str, str]]]:
         queries = self.rephrase_and_split(user_input)
-        print("Rephrased queries:", queries)
         results = {}
         for query in queries:
             results[query] = google_search(query)

 import requests
 import random
 import urllib.parse
+import spacy
 from tempfile import NamedTemporaryFile
 from typing import List, Dict
 from bs4 import BeautifulSoup
 huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
 class Agent1:
+    def __init__(self):
+        self.nlp = spacy.load("en_core_web_sm")
     def rephrase_and_split(self, user_input: str) -> List[str]:
+        doc = self.nlp(user_input)
+        # Identify question words
+        question_words = set(["what", "when", "where", "who", "whom", "which", "whose", "why", "how"])
+        # Split sentences
+        sentences = list(doc.sents)
+        # Identify questions
+        questions = []
+        for sent in sentences:
+            if sent[0].text.lower() in question_words or sent[-1].text == "?":
+                questions.append(sent.text)
+            elif any(token.text.lower() in question_words for token in sent):
+                questions.append(sent.text)
+        # If no questions identified, return the original input
+        if not questions:
+            return [user_input]
+        return questions
     def process(self, user_input: str) -> Dict[str, List[Dict[str, str]]]:
         queries = self.rephrase_and_split(user_input)
+        print("Identified queries:", queries)
         results = {}
         for query in queries:
             results[query] = google_search(query)