Spaces:
Sleeping
Sleeping
Deduplicate extracted content parts
Browse filesDeduplicate extracted content parts because every part showed up twice.
Add `extract_website_content_parts` to the `tools` list.
app.py
CHANGED
@@ -34,13 +34,13 @@ def extract_website_content_parts(url: str, extraction_pattern: str) -> List[str
|
|
34 |
url: The URL of the website from which content parts should be extracted
|
35 |
extraction_pattern: The regular expression string of the content parts to extract from the website
|
36 |
Returns:
|
37 |
-
List[str]: The content parts matching extraction_pattern of the website `url`
|
38 |
"""
|
39 |
try:
|
40 |
response = requests.get(url)
|
41 |
response.raise_for_status()
|
42 |
matches: List[str] = re.findall(extraction_pattern, response.text)
|
43 |
-
return matches
|
44 |
except requests.RequestException as e:
|
45 |
return [f"Error fetching website content: {str(e)}"]
|
46 |
|
@@ -92,7 +92,7 @@ with open("prompts.yaml", 'r') as stream:
|
|
92 |
|
93 |
agent = CodeAgent(
|
94 |
model=model,
|
95 |
-
tools=[final_answer, search_tool, get_website_content, get_papers_url_for_date, get_current_time_in_timezone],
|
96 |
max_steps=30,
|
97 |
verbosity_level=1,
|
98 |
grammar=None,
|
|
|
34 |
url: The URL of the website from which content parts should be extracted
|
35 |
extraction_pattern: The regular expression string of the content parts to extract from the website
|
36 |
Returns:
|
37 |
+
List[str]: The deduplicated content parts matching extraction_pattern of the website `url`
|
38 |
"""
|
39 |
try:
|
40 |
response = requests.get(url)
|
41 |
response.raise_for_status()
|
42 |
matches: List[str] = re.findall(extraction_pattern, response.text)
|
43 |
+
return list(set(matches))
|
44 |
except requests.RequestException as e:
|
45 |
return [f"Error fetching website content: {str(e)}"]
|
46 |
|
|
|
92 |
|
93 |
agent = CodeAgent(
|
94 |
model=model,
|
95 |
+
tools=[final_answer, search_tool, extract_website_content_parts, get_website_content, get_papers_url_for_date, get_current_time_in_timezone],
|
96 |
max_steps=30,
|
97 |
verbosity_level=1,
|
98 |
grammar=None,
|