SearchGPT

Running

Shreyas094 commited on Jul 20, 2024

Commit

b52d39b

verified ·

1 Parent(s): 111c87a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,6 +7,9 @@ import requests
 import random
 import urllib.parse
 import spacy
 from tempfile import NamedTemporaryFile
 from typing import List, Dict
 from bs4 import BeautifulSoup
@@ -22,26 +25,28 @@ from langchain_core.documents import Document
 huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
 class Agent1:
     def __init__(self):
-        self.nlp = spacy.load("en_core_web_sm")
     def rephrase_and_split(self, user_input: str) -> List[str]:
-        doc = self.nlp(user_input)
         # Identify question words
         question_words = set(["what", "when", "where", "who", "whom", "which", "whose", "why", "how"])
         # Split sentences
-        sentences = list(doc.sents)
         # Identify questions
         questions = []
         for sent in sentences:
-            if sent[0].text.lower() in question_words or sent[-1].text == "?":
-                questions.append(sent.text)
-            elif any(token.text.lower() in question_words for token in sent):
-                questions.append(sent.text)
         # If no questions identified, return the original input
         if not questions:

 import random
 import urllib.parse
 import spacy
+import nltk
+from nltk.tokenize import sent_tokenize
+from typing import List, Dict
 from tempfile import NamedTemporaryFile
 from typing import List, Dict
 from bs4 import BeautifulSoup
 huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
+# Download necessary NLTK data
+nltk.download('punkt')
 class Agent1:
     def __init__(self):
+        pass
     def rephrase_and_split(self, user_input: str) -> List[str]:
         # Identify question words
         question_words = set(["what", "when", "where", "who", "whom", "which", "whose", "why", "how"])
         # Split sentences
+        sentences = sent_tokenize(user_input)
         # Identify questions
         questions = []
         for sent in sentences:
+            words = sent.lower().split()
+            if words[0] in question_words or sent.strip().endswith('?'):
+                questions.append(sent)
+            elif any(word in question_words for word in words):
+                questions.append(sent)
         # If no questions identified, return the original input
         if not questions: