MAsad789565 commited on
Commit
fa9ef68
·
verified ·
1 Parent(s): dd996c1

Update documents/query_results_extractor.py

Browse files
documents/query_results_extractor.py CHANGED
@@ -17,10 +17,14 @@ class QueryResultsExtractor:
17
  self.query = self.soup.find("textarea").text.strip()
18
  query_result_elements = self.soup.find_all("div", class_="g")
19
  for idx, result in enumerate(query_result_elements):
20
- site = result.find("cite").find_previous("span").text.strip()
 
 
 
 
21
  url = result.find("a")["href"]
22
  title = result.find("h3").text.strip()
23
-
24
  abstract_element_conditions = [
25
  {"data-sncf": "1"},
26
  {"class_": "ITZIwc"},
@@ -32,7 +36,7 @@ class QueryResultsExtractor:
32
  break
33
  else:
34
  abstract = ""
35
-
36
  logger.mesg(
37
  f"{title}\n" f" - {site}\n" f" - {url}\n" f" - {abstract}\n" f"\n"
38
  )
@@ -48,6 +52,7 @@ class QueryResultsExtractor:
48
  )
49
  logger.success(f"- {len(query_result_elements)} query results")
50
 
 
51
  def extract_related_questions(self):
52
  related_question_elements = self.soup.find_all(
53
  "div", class_="related-question-pair"
 
17
  self.query = self.soup.find("textarea").text.strip()
18
  query_result_elements = self.soup.find_all("div", class_="g")
19
  for idx, result in enumerate(query_result_elements):
20
+ cite_tag = result.find("cite")
21
+ if cite_tag:
22
+ site = cite_tag.find_previous("span").text.strip()
23
+ else:
24
+ site = "Unknown"
25
  url = result.find("a")["href"]
26
  title = result.find("h3").text.strip()
27
+
28
  abstract_element_conditions = [
29
  {"data-sncf": "1"},
30
  {"class_": "ITZIwc"},
 
36
  break
37
  else:
38
  abstract = ""
39
+
40
  logger.mesg(
41
  f"{title}\n" f" - {site}\n" f" - {url}\n" f" - {abstract}\n" f"\n"
42
  )
 
52
  )
53
  logger.success(f"- {len(query_result_elements)} query results")
54
 
55
+
56
  def extract_related_questions(self):
57
  related_question_elements = self.soup.find_all(
58
  "div", class_="related-question-pair"