Shreyas094 commited on
Commit
4892e48
·
verified ·
1 Parent(s): 4281635

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -32
app.py CHANGED
@@ -6,6 +6,7 @@ import pandas as pd
6
  import requests
7
  import random
8
  import urllib.parse
 
9
  from tempfile import NamedTemporaryFile
10
  from typing import List, Dict
11
  from bs4 import BeautifulSoup
@@ -22,48 +23,35 @@ from langchain_core.documents import Document
22
  huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
23
 
24
  class Agent1:
25
- def __init__(self, model):
26
- self.model = model
27
 
28
-
29
  def rephrase_and_split(self, user_input: str) -> List[str]:
30
- rephrase_prompt = PromptTemplate(
31
- input_variables=["query"],
32
- template="""
33
- Determine how many questions are asked in the search query provided to you. For each question, split them individually and rephrase each one into a few concise, search-engine-friendly formats. Ensure that the queries are distinct and relevant, capturing the core of each question without unnecessary repetition.
34
-
35
-
36
- Query: {query}
37
-
38
- Rephrased queries:"""
39
- )
40
-
41
- chain = LLMChain(llm=self.model, prompt=rephrase_prompt)
42
- response = chain.run(query=user_input).strip()
43
 
44
- # Split the response at "Rephrased queries:" and take the second part
45
- split_response = response.split("Rephrased queries:", 1)
46
- if len(split_response) > 1:
47
- response = split_response[1].strip()
48
 
49
- # Split the response into individual queries
50
- rephrased_queries = [q.strip() for q in response.split('\n') if q.strip()]
51
 
52
- # Filter out any queries that seem to be instructions or explanations
53
- rephrased_queries = [q for q in rephrased_queries if not q.lower().startswith(("rephrase", "query", "provide"))]
 
 
 
 
 
54
 
55
- # If no valid rephrased queries, return the original input split into two if it contains 'and'
56
- if not rephrased_queries:
57
- if ' and ' in user_input.lower():
58
- return user_input.split(' and ')
59
- else:
60
- return [user_input]
61
 
62
- return rephrased_queries
63
 
64
  def process(self, user_input: str) -> Dict[str, List[Dict[str, str]]]:
65
  queries = self.rephrase_and_split(user_input)
66
- print("Rephrased queries:", queries)
67
  results = {}
68
  for query in queries:
69
  results[query] = google_search(query)
 
6
  import requests
7
  import random
8
  import urllib.parse
9
+ import spacy
10
  from tempfile import NamedTemporaryFile
11
  from typing import List, Dict
12
  from bs4 import BeautifulSoup
 
23
  huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
24
 
25
  class Agent1:
26
+ def __init__(self):
27
+ self.nlp = spacy.load("en_core_web_sm")
28
 
 
29
  def rephrase_and_split(self, user_input: str) -> List[str]:
30
+ doc = self.nlp(user_input)
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ # Identify question words
33
+ question_words = set(["what", "when", "where", "who", "whom", "which", "whose", "why", "how"])
 
 
34
 
35
+ # Split sentences
36
+ sentences = list(doc.sents)
37
 
38
+ # Identify questions
39
+ questions = []
40
+ for sent in sentences:
41
+ if sent[0].text.lower() in question_words or sent[-1].text == "?":
42
+ questions.append(sent.text)
43
+ elif any(token.text.lower() in question_words for token in sent):
44
+ questions.append(sent.text)
45
 
46
+ # If no questions identified, return the original input
47
+ if not questions:
48
+ return [user_input]
 
 
 
49
 
50
+ return questions
51
 
52
  def process(self, user_input: str) -> Dict[str, List[Dict[str, str]]]:
53
  queries = self.rephrase_and_split(user_input)
54
+ print("Identified queries:", queries)
55
  results = {}
56
  for query in queries:
57
  results[query] = google_search(query)