dball commited on
Commit
3ec3001
·
verified ·
1 Parent(s): 9caf3d5

Deduplicate extracted content parts

Browse files

Deduplicate extracted content parts because every part showed up twice.

Add `extract_website_content_parts` to the `tools` list.

Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -34,13 +34,13 @@ def extract_website_content_parts(url: str, extraction_pattern: str) -> List[str
34
  url: The URL of the website from which content parts should be extracted
35
  extraction_pattern: The regular expression string of the content parts to extract from the website
36
  Returns:
37
- List[str]: The content parts matching extraction_pattern of the website `url`
38
  """
39
  try:
40
  response = requests.get(url)
41
  response.raise_for_status()
42
  matches: List[str] = re.findall(extraction_pattern, response.text)
43
- return matches
44
  except requests.RequestException as e:
45
  return [f"Error fetching website content: {str(e)}"]
46
 
@@ -92,7 +92,7 @@ with open("prompts.yaml", 'r') as stream:
92
 
93
  agent = CodeAgent(
94
  model=model,
95
- tools=[final_answer, search_tool, get_website_content, get_papers_url_for_date, get_current_time_in_timezone],
96
  max_steps=30,
97
  verbosity_level=1,
98
  grammar=None,
 
34
  url: The URL of the website from which content parts should be extracted
35
  extraction_pattern: The regular expression string of the content parts to extract from the website
36
  Returns:
37
+ List[str]: The deduplicated content parts matching extraction_pattern of the website `url`
38
  """
39
  try:
40
  response = requests.get(url)
41
  response.raise_for_status()
42
  matches: List[str] = re.findall(extraction_pattern, response.text)
43
+ return list(set(matches))
44
  except requests.RequestException as e:
45
  return [f"Error fetching website content: {str(e)}"]
46
 
 
92
 
93
  agent = CodeAgent(
94
  model=model,
95
+ tools=[final_answer, search_tool, extract_website_content_parts, get_website_content, get_papers_url_for_date, get_current_time_in_timezone],
96
  max_steps=30,
97
  verbosity_level=1,
98
  grammar=None,