Shreyas094 commited on
Commit
b52d39b
1 Parent(s): 111c87a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -8
app.py CHANGED
@@ -7,6 +7,9 @@ import requests
7
  import random
8
  import urllib.parse
9
  import spacy
 
 
 
10
  from tempfile import NamedTemporaryFile
11
  from typing import List, Dict
12
  from bs4 import BeautifulSoup
@@ -22,26 +25,28 @@ from langchain_core.documents import Document
22
 
23
  huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
24
 
 
 
 
25
  class Agent1:
26
  def __init__(self):
27
- self.nlp = spacy.load("en_core_web_sm")
28
 
29
  def rephrase_and_split(self, user_input: str) -> List[str]:
30
- doc = self.nlp(user_input)
31
-
32
  # Identify question words
33
  question_words = set(["what", "when", "where", "who", "whom", "which", "whose", "why", "how"])
34
 
35
  # Split sentences
36
- sentences = list(doc.sents)
37
 
38
  # Identify questions
39
  questions = []
40
  for sent in sentences:
41
- if sent[0].text.lower() in question_words or sent[-1].text == "?":
42
- questions.append(sent.text)
43
- elif any(token.text.lower() in question_words for token in sent):
44
- questions.append(sent.text)
 
45
 
46
  # If no questions identified, return the original input
47
  if not questions:
 
7
  import random
8
  import urllib.parse
9
  import spacy
10
+ import nltk
11
+ from nltk.tokenize import sent_tokenize
12
+ from typing import List, Dict
13
  from tempfile import NamedTemporaryFile
14
  from typing import List, Dict
15
  from bs4 import BeautifulSoup
 
25
 
26
  huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
27
 
28
+ # Download necessary NLTK data
29
+ nltk.download('punkt')
30
+
31
  class Agent1:
32
  def __init__(self):
33
+ pass
34
 
35
  def rephrase_and_split(self, user_input: str) -> List[str]:
 
 
36
  # Identify question words
37
  question_words = set(["what", "when", "where", "who", "whom", "which", "whose", "why", "how"])
38
 
39
  # Split sentences
40
+ sentences = sent_tokenize(user_input)
41
 
42
  # Identify questions
43
  questions = []
44
  for sent in sentences:
45
+ words = sent.lower().split()
46
+ if words[0] in question_words or sent.strip().endswith('?'):
47
+ questions.append(sent)
48
+ elif any(word in question_words for word in words):
49
+ questions.append(sent)
50
 
51
  # If no questions identified, return the original input
52
  if not questions: