Spaces:
Sleeping
Sleeping
Fix both system prompts
Browse files
search.py
CHANGED
@@ -44,11 +44,10 @@ class GoogleSearch:
|
|
44 |
else:
|
45 |
return f"Search failed with status {response.status}"
|
46 |
|
47 |
-
async def google_image_search(self, query: str
|
48 |
"""
|
49 |
Args:
|
50 |
query: Search query
|
51 |
-
num_results: Max results to return
|
52 |
Returns:
|
53 |
dict: JSON response from Google API.
|
54 |
"""
|
@@ -71,7 +70,7 @@ class GoogleSearch:
|
|
71 |
results = "Web Search results:\n\n" + "\n\n".join(
|
72 |
[
|
73 |
f"Link:{result['link']}\nTitle:{result['title']}"
|
74 |
-
for result in data["items"][:
|
75 |
]
|
76 |
)
|
77 |
return results
|
|
|
44 |
else:
|
45 |
return f"Search failed with status {response.status}"
|
46 |
|
47 |
+
async def google_image_search(self, query: str) -> str:
|
48 |
"""
|
49 |
Args:
|
50 |
query: Search query
|
|
|
51 |
Returns:
|
52 |
dict: JSON response from Google API.
|
53 |
"""
|
|
|
70 |
results = "Web Search results:\n\n" + "\n\n".join(
|
71 |
[
|
72 |
f"Link:{result['link']}\nTitle:{result['title']}"
|
73 |
+
for result in data["items"][:4]
|
74 |
]
|
75 |
)
|
76 |
return results
|
tools.py
CHANGED
@@ -61,39 +61,48 @@ def truncate_content(content: str, max_length: int = 10000) -> str:
|
|
61 |
return content[:max_length]
|
62 |
|
63 |
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
-
|
72 |
-
|
73 |
|
74 |
-
|
75 |
-
|
76 |
|
77 |
-
|
78 |
-
|
79 |
|
80 |
-
|
81 |
-
|
82 |
|
83 |
-
|
84 |
-
|
85 |
|
86 |
-
|
87 |
-
|
88 |
|
89 |
-
|
90 |
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
|
98 |
|
99 |
def parse_youtube_video(youtube_url: str) -> str:
|
|
|
61 |
return content[:max_length]
|
62 |
|
63 |
|
64 |
+
class WebPageTranscription:
|
65 |
+
def __init__(self):
|
66 |
+
self.counter = 0
|
67 |
+
|
68 |
+
def transcribe_webpage(self, website_url: str) -> str:
|
69 |
+
"""Visits website url and returns markdown of contents
|
70 |
+
Args:
|
71 |
+
website_url:str"""
|
72 |
+
if self.counter > 1:
|
73 |
+
return "No more transcriptions, move on"
|
74 |
+
self.counter += 1
|
75 |
+
try:
|
76 |
+
# Send a GET request to the URL with a 20-second timeout
|
77 |
+
response = requests.get(website_url, timeout=20)
|
78 |
+
response.raise_for_status() # Raise an exception for bad status codes
|
79 |
|
80 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
81 |
+
content_div = soup.find("div", id="mw-content-text")
|
82 |
|
83 |
+
if not content_div:
|
84 |
+
content_div = soup.find("div")
|
85 |
|
86 |
+
# Only extract <p> and <table> tags
|
87 |
+
elements = content_div.find_all(["p", "table"])
|
88 |
|
89 |
+
# Join selected HTML chunks
|
90 |
+
html_subset = "".join(str(el) for el in elements)
|
91 |
|
92 |
+
# Convert the HTML content to Markdown
|
93 |
+
markdown_content = html2text.HTML2Text().handle(str(html_subset))
|
94 |
|
95 |
+
# Remove multiple line breaks
|
96 |
+
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
|
97 |
|
98 |
+
return truncate_content(markdown_content, 20000)
|
99 |
|
100 |
+
except requests.exceptions.Timeout:
|
101 |
+
return "The request timed out. Please try again later or check the URL."
|
102 |
+
except RequestException as e:
|
103 |
+
return f"Error fetching the webpage: {str(e)}"
|
104 |
+
except Exception as e:
|
105 |
+
return f"An unexpected error occurred: {str(e)}"
|
106 |
|
107 |
|
108 |
def parse_youtube_video(youtube_url: str) -> str:
|
utils.py
CHANGED
@@ -61,12 +61,14 @@ def format_final_answer(question: str, answer: str) -> str:
|
|
61 |
messages=[
|
62 |
{
|
63 |
"role": "system",
|
64 |
-
"content": """ You're tasked with reformatting an answer from an unreliable AI into the expected format as per their instructions.
|
65 |
<instructions>
|
66 |
-
You are a general AI assistant. I will ask you a question.
|
67 |
</instructions>
|
|
|
68 |
<question>"""
|
69 |
+ question
|
|
|
70 |
+ """
|
71 |
Now here is their answer. Only reply with the corrected formatting
|
72 |
""",
|
|
|
61 |
messages=[
|
62 |
{
|
63 |
"role": "system",
|
64 |
+
"content": """ You're tasked with correcting/reformatting an answer from an unreliable AI into the expected format as per their instructions.
|
65 |
<instructions>
|
66 |
+
You are a general AI assistant. I will ask you a question. Your answer should only be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
|
67 |
</instructions>
|
68 |
+
For context, here is the question the AI answered
|
69 |
<question>"""
|
70 |
+ question
|
71 |
+
+ """</question>"""
|
72 |
+ """
|
73 |
Now here is their answer. Only reply with the corrected formatting
|
74 |
""",
|