AlexanderKazakov
commited on
Commit
·
a31ddf7
1
Parent(s):
4ae9d8c
gpt-4.1, adjusted prompt, adjusted visit_webpage output size
Browse files- .gitignore +2 -1
- agent.py +159 -0
- app.py +2 -33
- prompt_templates.yaml +280 -0
- requirements.txt +2 -1
- system_prompt.txt +107 -0
.gitignore
CHANGED
@@ -1 +1,2 @@
|
|
1 |
-
/.idea
|
|
|
|
1 |
+
/.idea
|
2 |
+
/data
|
agent.py
ADDED
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
import re
|
4 |
+
import time
|
5 |
+
|
6 |
+
import yaml
|
7 |
+
from duckduckgo_search.exceptions import DuckDuckGoSearchException
|
8 |
+
from smolagents import FinalAnswerTool, Tool, OpenAIServerModel, CodeAgent
|
9 |
+
|
10 |
+
|
11 |
+
python_interpreter_max_print_outputs_length = 10**6
|
12 |
+
|
13 |
+
|
14 |
+
class CustomDuckDuckGoSearchTool(Tool):
|
15 |
+
name = "web_search"
|
16 |
+
description = """Performs a duckduckgo web search based on your query (think a Google search) then returns the top search results."""
|
17 |
+
inputs = {"query": {"type": "string", "description": "The search query to perform."}}
|
18 |
+
output_type = "string"
|
19 |
+
|
20 |
+
def __init__(self, max_results=10, **kwargs):
|
21 |
+
super().__init__()
|
22 |
+
self.max_results = max_results
|
23 |
+
try:
|
24 |
+
from duckduckgo_search import DDGS
|
25 |
+
except ImportError as e:
|
26 |
+
raise ImportError(
|
27 |
+
"You must install package `duckduckgo_search` to run this tool: for instance run `pip install duckduckgo-search`."
|
28 |
+
) from e
|
29 |
+
self.ddgs = DDGS(**kwargs)
|
30 |
+
|
31 |
+
def forward(self, query: str) -> str:
|
32 |
+
num_tries = 5
|
33 |
+
for cnt in range(num_tries):
|
34 |
+
try:
|
35 |
+
results = self.ddgs.text(query, max_results=self.max_results)
|
36 |
+
break
|
37 |
+
except DuckDuckGoSearchException as e:
|
38 |
+
print(e)
|
39 |
+
if cnt == num_tries - 1:
|
40 |
+
raise
|
41 |
+
time.sleep(1.5)
|
42 |
+
|
43 |
+
if len(results) == 0:
|
44 |
+
raise Exception("No results found! Try a less restrictive/shorter query.")
|
45 |
+
postprocessed_results = [f"[{result['title']}]({result['href']})\n{result['body']}" for result in results]
|
46 |
+
return "## Search Results\n\n" + "\n\n".join(postprocessed_results)
|
47 |
+
|
48 |
+
|
49 |
+
class CustomVisitWebpageTool(Tool):
|
50 |
+
name = "visit_webpage"
|
51 |
+
description = (
|
52 |
+
"Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
|
53 |
+
)
|
54 |
+
inputs = {
|
55 |
+
"url": {
|
56 |
+
"type": "string",
|
57 |
+
"description": "The url of the webpage to visit.",
|
58 |
+
}
|
59 |
+
}
|
60 |
+
output_type = "string"
|
61 |
+
|
62 |
+
def forward(self, url: str) -> str:
|
63 |
+
try:
|
64 |
+
import requests
|
65 |
+
from markdownify import markdownify
|
66 |
+
from requests.exceptions import RequestException
|
67 |
+
|
68 |
+
from smolagents.utils import truncate_content
|
69 |
+
except ImportError as e:
|
70 |
+
raise ImportError(
|
71 |
+
"You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
|
72 |
+
) from e
|
73 |
+
try:
|
74 |
+
# Send a GET request to the URL with a 20-second timeout
|
75 |
+
response = requests.get(url, timeout=20)
|
76 |
+
response.raise_for_status() # Raise an exception for bad status codes
|
77 |
+
|
78 |
+
# Convert the HTML content to Markdown
|
79 |
+
markdown_content = markdownify(response.text).strip()
|
80 |
+
|
81 |
+
# Remove multiple line breaks
|
82 |
+
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
|
83 |
+
|
84 |
+
return truncate_content(markdown_content, python_interpreter_max_print_outputs_length)
|
85 |
+
|
86 |
+
except requests.exceptions.Timeout:
|
87 |
+
return "The request timed out. Please try again later or check the URL."
|
88 |
+
except RequestException as e:
|
89 |
+
return f"Error fetching the webpage: {str(e)}"
|
90 |
+
except Exception as e:
|
91 |
+
return f"An unexpected error occurred: {str(e)}"
|
92 |
+
|
93 |
+
|
94 |
+
class SmolAgent:
|
95 |
+
def __init__(self, openai_api_key=None):
|
96 |
+
final_answer = FinalAnswerTool()
|
97 |
+
search_tool = CustomDuckDuckGoSearchTool(max_results=3)
|
98 |
+
visit_webpage_tool = CustomVisitWebpageTool()
|
99 |
+
model = OpenAIServerModel(
|
100 |
+
model_id="gpt-4.1-2025-04-14",
|
101 |
+
# model_id="gpt-4.1-mini-2025-04-14",
|
102 |
+
# model_id="gpt-4.1-nano-2025-04-14",
|
103 |
+
max_completion_tokens=1024,
|
104 |
+
temperature=0.01,
|
105 |
+
api_key=openai_api_key,
|
106 |
+
)
|
107 |
+
with open('prompt_templates.yaml', 'r') as f:
|
108 |
+
prompt_templates = yaml.safe_load(f)
|
109 |
+
with open('system_prompt.txt', 'r') as f:
|
110 |
+
prompt_templates['system_prompt'] = f.read()
|
111 |
+
self.agent = CodeAgent(
|
112 |
+
model=model,
|
113 |
+
prompt_templates=prompt_templates,
|
114 |
+
tools=[search_tool, visit_webpage_tool, final_answer],
|
115 |
+
max_steps=10,
|
116 |
+
verbosity_level=100,
|
117 |
+
grammar=None,
|
118 |
+
planning_interval=None,
|
119 |
+
name='Advanced GAIA Agent',
|
120 |
+
description=None,
|
121 |
+
max_print_outputs_length=python_interpreter_max_print_outputs_length,
|
122 |
+
)
|
123 |
+
self.agent.visualize()
|
124 |
+
|
125 |
+
def run(self, task: dict[str, str]) -> str:
|
126 |
+
if len(task.get('file_name')) != 0:
|
127 |
+
return '' # skip questions where file processing is needed
|
128 |
+
|
129 |
+
question = task.get('question')
|
130 |
+
if question.find('www.youtube.com') != -1:
|
131 |
+
return '' # skip questions where file processing is needed
|
132 |
+
|
133 |
+
return self.agent.run(question)
|
134 |
+
|
135 |
+
|
136 |
+
if __name__ == '__main__':
|
137 |
+
openai_key = os.getenv('OPENAI_API_KEY')
|
138 |
+
if not openai_key:
|
139 |
+
with open("data/openai.key", "r") as f:
|
140 |
+
openai_key = f.read().strip()
|
141 |
+
|
142 |
+
agent = SmolAgent(openai_api_key=openai_key)
|
143 |
+
|
144 |
+
with open('data/questions.json', 'r') as f:
|
145 |
+
questions = json.load(f)
|
146 |
+
|
147 |
+
for q in questions:
|
148 |
+
print('\n===')
|
149 |
+
print(q)
|
150 |
+
print('\n---')
|
151 |
+
a = agent.run(q)
|
152 |
+
print('\n---')
|
153 |
+
print(a)
|
154 |
+
|
155 |
+
|
156 |
+
|
157 |
+
|
158 |
+
|
159 |
+
|
app.py
CHANGED
@@ -2,46 +2,15 @@ import json
|
|
2 |
import os
|
3 |
import gradio as gr
|
4 |
import requests
|
5 |
-
import inspect
|
6 |
import pandas as pd
|
7 |
-
|
|
|
8 |
|
9 |
# (Keep Constants as is)
|
10 |
# --- Constants ---
|
11 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
12 |
|
13 |
|
14 |
-
class SmolAgent:
|
15 |
-
def __init__(self):
|
16 |
-
final_answer = FinalAnswerTool()
|
17 |
-
search_tool = DuckDuckGoSearchTool()
|
18 |
-
visit_webpage_tool = VisitWebpageTool()
|
19 |
-
model = HfApiModel(
|
20 |
-
max_tokens=2096,
|
21 |
-
temperature=0.01,
|
22 |
-
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
|
23 |
-
custom_role_conversions=None,
|
24 |
-
# token=open("data/keys/hf.key").read()
|
25 |
-
)
|
26 |
-
agent = CodeAgent(
|
27 |
-
model=model,
|
28 |
-
tools=[search_tool, visit_webpage_tool, final_answer],
|
29 |
-
max_steps=3,
|
30 |
-
verbosity_level=100,
|
31 |
-
grammar=None,
|
32 |
-
planning_interval=None,
|
33 |
-
name=None,
|
34 |
-
description=None,
|
35 |
-
)
|
36 |
-
print(f'--- agent.visualize(): ---')
|
37 |
-
agent.visualize()
|
38 |
-
|
39 |
-
def __call__(self, question: str) -> str:
|
40 |
-
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
41 |
-
fixed_answer = "This is a default answer."
|
42 |
-
print(f"Agent returning fixed answer: {fixed_answer}")
|
43 |
-
return fixed_answer
|
44 |
-
|
45 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
46 |
"""
|
47 |
Fetches all questions, runs the SmolAgent on them, submits all answers,
|
|
|
2 |
import os
|
3 |
import gradio as gr
|
4 |
import requests
|
|
|
5 |
import pandas as pd
|
6 |
+
|
7 |
+
from agent import SmolAgent
|
8 |
|
9 |
# (Keep Constants as is)
|
10 |
# --- Constants ---
|
11 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
12 |
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
15 |
"""
|
16 |
Fetches all questions, runs the SmolAgent on them, submits all answers,
|
prompt_templates.yaml
ADDED
@@ -0,0 +1,280 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
managed_agent:
|
2 |
+
report: 'Here is the final answer from your managed agent ''{{name}}'':
|
3 |
+
|
4 |
+
{{final_answer}}'
|
5 |
+
task: 'You''re a helpful agent named ''{{name}}''.
|
6 |
+
|
7 |
+
You have been submitted this task by your manager.
|
8 |
+
|
9 |
+
---
|
10 |
+
|
11 |
+
Task:
|
12 |
+
|
13 |
+
{{task}}
|
14 |
+
|
15 |
+
---
|
16 |
+
|
17 |
+
You''re helping your manager solve a wider task: so make sure to not provide a
|
18 |
+
one-line answer, but give as much information as possible to give them a clear
|
19 |
+
understanding of the answer.
|
20 |
+
|
21 |
+
|
22 |
+
Your final_answer WILL HAVE to contain these parts:
|
23 |
+
|
24 |
+
### 1. Task outcome (short version):
|
25 |
+
|
26 |
+
### 2. Task outcome (extremely detailed version):
|
27 |
+
|
28 |
+
### 3. Additional context (if relevant):
|
29 |
+
|
30 |
+
|
31 |
+
Put all these in your final_answer tool, everything that you do not pass as an
|
32 |
+
argument to final_answer will be lost.
|
33 |
+
|
34 |
+
And even if your task resolution is not successful, please return as much context
|
35 |
+
as possible, so that your manager can act upon this feedback.'
|
36 |
+
planning:
|
37 |
+
initial_facts: 'Below I will present you a task.
|
38 |
+
|
39 |
+
|
40 |
+
You will now build a comprehensive preparatory survey of which facts we have at
|
41 |
+
our disposal and which ones we still need.
|
42 |
+
|
43 |
+
To do so, you will have to read the task and identify things that must be discovered
|
44 |
+
in order to successfully complete it.
|
45 |
+
|
46 |
+
Don''t make any assumptions. For each item, provide a thorough reasoning. Here
|
47 |
+
is how you will structure this survey:
|
48 |
+
|
49 |
+
|
50 |
+
---
|
51 |
+
|
52 |
+
### 1. Facts given in the task
|
53 |
+
|
54 |
+
List here the specific facts given in the task that could help you (there might
|
55 |
+
be nothing here).
|
56 |
+
|
57 |
+
|
58 |
+
### 2. Facts to look up
|
59 |
+
|
60 |
+
List here any facts that we may need to look up.
|
61 |
+
|
62 |
+
Also list where to find each of these, for instance a website, a file... - maybe
|
63 |
+
the task contains some sources that you should re-use here.
|
64 |
+
|
65 |
+
|
66 |
+
### 3. Facts to derive
|
67 |
+
|
68 |
+
List here anything that we want to derive from the above by logical reasoning,
|
69 |
+
for instance computation or simulation.
|
70 |
+
|
71 |
+
|
72 |
+
Keep in mind that "facts" will typically be specific names, dates, values, etc.
|
73 |
+
Your answer should use the below headings:
|
74 |
+
|
75 |
+
### 1. Facts given in the task
|
76 |
+
|
77 |
+
### 2. Facts to look up
|
78 |
+
|
79 |
+
### 3. Facts to derive
|
80 |
+
|
81 |
+
Do not add anything else.'
|
82 |
+
initial_plan: "You are a world expert at making efficient plans to solve any task\
|
83 |
+
\ using a set of carefully crafted tools.\n\nNow for the given task, develop a\
|
84 |
+
\ step-by-step high-level plan taking into account the above inputs and list of\
|
85 |
+
\ facts.\nThis plan should involve individual tasks based on the available tools,\
|
86 |
+
\ that if executed correctly will yield the correct answer.\nDo not skip steps,\
|
87 |
+
\ do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL\
|
88 |
+
\ INDIVIDUAL TOOL CALLS.\nAfter writing the final step of the plan, write the\
|
89 |
+
\ '\\n<end_plan>' tag and stop there.\n\nHere is your task:\n\nTask:\n```\n{{task}}\n\
|
90 |
+
```\nYou can leverage these tools:\n{%- for tool in tools.values() %}\n- {{ tool.name\
|
91 |
+
\ }}: {{ tool.description }}\n Takes inputs: {{tool.inputs}}\n Returns an\
|
92 |
+
\ output of type: {{tool.output_type}}\n{%- endfor %}\n\n{%- if managed_agents\
|
93 |
+
\ and managed_agents.values() | list %}\nYou can also give tasks to team members.\n\
|
94 |
+
Calling a team member works the same as for calling a tool: simply, the only argument\
|
95 |
+
\ you can give in the call is 'request', a long string explaining your request.\n\
|
96 |
+
Given that this team member is a real human, you should be very verbose in your\
|
97 |
+
\ request.\nHere is a list of the team members that you can call:\n{%- for agent\
|
98 |
+
\ in managed_agents.values() %}\n- {{ agent.name }}: {{ agent.description }}\n\
|
99 |
+
{%- endfor %}\n{%- else %}\n{%- endif %}\n\nList of facts that you know:\n```\n\
|
100 |
+
{{answer_facts}}\n```\n\nNow begin! Write your plan below."
|
101 |
+
update_facts_post_messages: 'Earlier we''ve built a list of facts.
|
102 |
+
|
103 |
+
But since in your previous steps you may have learned useful new facts or invalidated
|
104 |
+
some false ones.
|
105 |
+
|
106 |
+
Please update your list of facts based on the previous history, and provide these
|
107 |
+
headings:
|
108 |
+
|
109 |
+
### 1. Facts given in the task
|
110 |
+
|
111 |
+
### 2. Facts that we have learned
|
112 |
+
|
113 |
+
### 3. Facts still to look up
|
114 |
+
|
115 |
+
### 4. Facts still to derive
|
116 |
+
|
117 |
+
|
118 |
+
Now write your new list of facts below.'
|
119 |
+
update_facts_pre_messages: 'You are a world expert at gathering known and unknown
|
120 |
+
facts based on a conversation.
|
121 |
+
|
122 |
+
Below you will find a task, and a history of attempts made to solve the task.
|
123 |
+
You will have to produce a list of these:
|
124 |
+
|
125 |
+
### 1. Facts given in the task
|
126 |
+
|
127 |
+
### 2. Facts that we have learned
|
128 |
+
|
129 |
+
### 3. Facts still to look up
|
130 |
+
|
131 |
+
### 4. Facts still to derive
|
132 |
+
|
133 |
+
Find the task and history below:'
|
134 |
+
update_plan_post_messages: "You're still working towards solving this task:\n```\n\
|
135 |
+
{{task}}\n```\n\nYou can leverage these tools:\n{%- for tool in tools.values()\
|
136 |
+
\ %}\n- {{ tool.name }}: {{ tool.description }}\n Takes inputs: {{tool.inputs}}\n\
|
137 |
+
\ Returns an output of type: {{tool.output_type}}\n{%- endfor %}\n\n{%- if\
|
138 |
+
\ managed_agents and managed_agents.values() | list %}\nYou can also give tasks\
|
139 |
+
\ to team members.\nCalling a team member works the same as for calling a tool:\
|
140 |
+
\ simply, the only argument you can give in the call is 'task'.\nGiven that this\
|
141 |
+
\ team member is a real human, you should be very verbose in your task, it should\
|
142 |
+
\ be a long string providing informations as detailed as necessary.\nHere is a\
|
143 |
+
\ list of the team members that you can call:\n{%- for agent in managed_agents.values()\
|
144 |
+
\ %}\n- {{ agent.name }}: {{ agent.description }}\n{%- endfor %}\n{%- else %}\n\
|
145 |
+
{%- endif %}\n\nHere is the up to date list of facts that you know:\n```\n{{facts_update}}\n\
|
146 |
+
```\n\nNow for the given task, develop a step-by-step high-level plan taking into\
|
147 |
+
\ account the above inputs and list of facts.\nThis plan should involve individual\
|
148 |
+
\ tasks based on the available tools, that if executed correctly will yield the\
|
149 |
+
\ correct answer.\nBeware that you have {remaining_steps} steps remaining.\nDo\
|
150 |
+
\ not skip steps, do not add any superfluous steps. Only write the high-level\
|
151 |
+
\ plan, DO NOT DETAIL INDIVIDUAL TOOL CALLS.\nAfter writing the final step of\
|
152 |
+
\ the plan, write the '\\n<end_plan>' tag and stop there.\n\nNow write your new\
|
153 |
+
\ plan below."
|
154 |
+
update_plan_pre_messages: 'You are a world expert at making efficient plans to solve
|
155 |
+
any task using a set of carefully crafted tools.
|
156 |
+
|
157 |
+
|
158 |
+
You have been given a task:
|
159 |
+
|
160 |
+
```
|
161 |
+
|
162 |
+
{{task}}
|
163 |
+
|
164 |
+
```
|
165 |
+
|
166 |
+
|
167 |
+
Find below the record of what has been tried so far to solve it. Then you will
|
168 |
+
be asked to make an updated plan to solve the task.
|
169 |
+
|
170 |
+
If the previous tries so far have met some success, you can make an updated plan
|
171 |
+
based on these actions.
|
172 |
+
|
173 |
+
If you are stalled, you can make a completely new plan starting from scratch.'
|
174 |
+
system_prompt: "You are an expert assistant who can solve any task using code blobs.\
|
175 |
+
\ You will be given a task to solve as best you can.\nTo do so, you have been given\
|
176 |
+
\ access to a list of tools: these tools are basically Python functions which you\
|
177 |
+
\ can call with code.\nTo solve the task, you must plan forward to proceed in a\
|
178 |
+
\ series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences.\n\
|
179 |
+
\nAt each step, in the 'Thought:' sequence, you should first explain your reasoning\
|
180 |
+
\ towards solving the task and the tools that you want to use.\nThen in the 'Code:'\
|
181 |
+
\ sequence, you should write the code in simple Python. The code sequence must end\
|
182 |
+
\ with '<end_code>' sequence.\nDuring each intermediate step, you can use 'print()'\
|
183 |
+
\ to save whatever important information you will then need.\nThese print outputs\
|
184 |
+
\ will then appear in the 'Observation:' field, which will be available as input\
|
185 |
+
\ for the next step.\nIn the end you have to return a final answer using the `final_answer`\
|
186 |
+
\ tool.\n\nHere are a few examples using notional tools:\n---\nTask: \"Generate\
|
187 |
+
\ an image of the oldest person in this document.\"\n\nThought: I will proceed step\
|
188 |
+
\ by step and use the following tools: `document_qa` to find the oldest person in\
|
189 |
+
\ the document, then `image_generator` to generate an image according to the answer.\n\
|
190 |
+
Code:\n```py\nanswer = document_qa(document=document, question=\"Who is the oldest\
|
191 |
+
\ person mentioned?\")\nprint(answer)\n```<end_code>\nObservation: \"The oldest\
|
192 |
+
\ person in the document is John Doe, a 55 year old lumberjack living in Newfoundland.\"\
|
193 |
+
\n\nThought: I will now generate an image showcasing the oldest person.\nCode:\n\
|
194 |
+
```py\nimage = image_generator(\"A portrait of John Doe, a 55-year-old man living\
|
195 |
+
\ in Canada.\")\nfinal_answer(image)\n```<end_code>\n\n---\nTask: \"What is the\
|
196 |
+
\ result of the following operation: 5 + 3 + 1294.678?\"\n\nThought: I will use\
|
197 |
+
\ python code to compute the result of the operation and then return the final answer\
|
198 |
+
\ using the `final_answer` tool\nCode:\n```py\nresult = 5 + 3 + 1294.678\nfinal_answer(result)\n\
|
199 |
+
```<end_code>\n\n---\nTask:\n\"Answer the question in the variable `question` about\
|
200 |
+
\ the image stored in the variable `image`. The question is in French.\nYou have\
|
201 |
+
\ been provided with these additional arguments, that you can access using the keys\
|
202 |
+
\ as variables in your python code:\n{'question': 'Quel est l'animal sur l'image?',\
|
203 |
+
\ 'image': 'path/to/image.jpg'}\"\n\nThought: I will use the following tools: `translator`\
|
204 |
+
\ to translate the question into English and then `image_qa` to answer the question\
|
205 |
+
\ on the input image.\nCode:\n```py\ntranslated_question = translator(question=question,\
|
206 |
+
\ src_lang=\"French\", tgt_lang=\"English\")\nprint(f\"The translated question is\
|
207 |
+
\ {translated_question}.\")\nanswer = image_qa(image=image, question=translated_question)\n\
|
208 |
+
final_answer(f\"The answer is {answer}\")\n```<end_code>\n\n---\nTask:\nIn a 1979\
|
209 |
+
\ interview, Stanislaus Ulam discusses with Martin Sherwin about other great physicists\
|
210 |
+
\ of his time, including Oppenheimer.\nWhat does he say was the consequence of Einstein\
|
211 |
+
\ learning too much math on his creativity, in one word?\n\nThought: I need to find\
|
212 |
+
\ and read the 1979 interview of Stanislaus Ulam with Martin Sherwin.\nCode:\n```py\n\
|
213 |
+
pages = search(query=\"1979 interview Stanislaus Ulam Martin Sherwin physicists\
|
214 |
+
\ Einstein\")\nprint(pages)\n```<end_code>\nObservation:\nNo result found for query\
|
215 |
+
\ \"1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein\".\n\nThought:\
|
216 |
+
\ The query was maybe too restrictive and did not find any results. Let's try again\
|
217 |
+
\ with a broader query.\nCode:\n```py\npages = search(query=\"1979 interview Stanislaus\
|
218 |
+
\ Ulam\")\nprint(pages)\n```<end_code>\nObservation:\nFound 6 pages:\n[Stanislaus\
|
219 |
+
\ Ulam 1979 interview](https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/)\n\
|
220 |
+
\n[Ulam discusses Manhattan Project](https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/)\n\
|
221 |
+
\n(truncated)\n\nThought: I will read the first 2 pages to know more.\nCode:\n```py\n\
|
222 |
+
for url in [\"https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/\"\
|
223 |
+
, \"https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/\"]:\n\
|
224 |
+
\ whole_page = visit_webpage(url)\n print(whole_page)\n print(\"\\n\" +\
|
225 |
+
\ \"=\"*80 + \"\\n\") # Print separator between pages\n```<end_code>\nObservation:\n\
|
226 |
+
Manhattan Project Locations:\nLos Alamos, NM\nStanislaus Ulam was a Polish-American\
|
227 |
+
\ mathematician. He worked on the Manhattan Project at Los Alamos and later helped\
|
228 |
+
\ design the hydrogen bomb. In this interview, he discusses his work at\n(truncated)\n\
|
229 |
+
\nThought: I now have the final answer: from the webpages visited, Stanislaus Ulam\
|
230 |
+
\ says of Einstein: \"He learned too much mathematics and sort of diminished, it\
|
231 |
+
\ seems to me personally, it seems to me his purely physics creativity.\" Let's\
|
232 |
+
\ answer in one word.\nCode:\n```py\nfinal_answer(\"diminished\")\n```<end_code>\n\
|
233 |
+
\n---\nTask: \"Which city has the highest population: Guangzhou or Shanghai?\"\n\
|
234 |
+
\nThought: I need to get the populations for both cities and compare them: I will\
|
235 |
+
\ use the tool `search` to get the population of both cities.\nCode:\n```py\nfor\
|
236 |
+
\ city in [\"Guangzhou\", \"Shanghai\"]:\n print(f\"Population {city}:\", search(f\"\
|
237 |
+
{city} population\")\n```<end_code>\nObservation:\nPopulation Guangzhou: ['Guangzhou\
|
238 |
+
\ has a population of 15 million inhabitants as of 2021.']\nPopulation Shanghai:\
|
239 |
+
\ '26 million (2019)'\n\nThought: Now I know that Shanghai has the highest population.\n\
|
240 |
+
Code:\n```py\nfinal_answer(\"Shanghai\")\n```<end_code>\n\n---\nTask: \"What is\
|
241 |
+
\ the current age of the pope, raised to the power 0.36?\"\n\nThought: I will use\
|
242 |
+
\ the tool `wiki` to get the age of the pope, and confirm that with a web search.\n\
|
243 |
+
Code:\n```py\npope_age_wiki = wiki(query=\"current pope age\")\nprint(\"Pope age\
|
244 |
+
\ as per wikipedia:\", pope_age_wiki)\npope_age_search = web_search(query=\"current\
|
245 |
+
\ pope age\")\nprint(\"Pope age as per google search:\", pope_age_search)\n```<end_code>\n\
|
246 |
+
Observation:\nPope age: \"The pope Francis is currently 88 years old.\"\n\nThought:\
|
247 |
+
\ I know that the pope is 88 years old. Let's compute the result using python code.\n\
|
248 |
+
Code:\n```py\npope_current_age = 88 ** 0.36\nfinal_answer(pope_current_age)\n```<end_code>\n\
|
249 |
+
\nAbove example were using notional tools that might not exist for you. On top of\
|
250 |
+
\ performing computations in the Python code snippets that you create, you only\
|
251 |
+
\ have access to these tools:\n{%- for tool in tools.values() %}\n- {{ tool.name\
|
252 |
+
\ }}: {{ tool.description }}\n Takes inputs: {{tool.inputs}}\n Returns an\
|
253 |
+
\ output of type: {{tool.output_type}}\n{%- endfor %}\n\n{%- if managed_agents and\
|
254 |
+
\ managed_agents.values() | list %}\nYou can also give tasks to team members.\n\
|
255 |
+
Calling a team member works the same as for calling a tool: simply, the only argument\
|
256 |
+
\ you can give in the call is 'task', a long string explaining your task.\nGiven\
|
257 |
+
\ that this team member is a real human, you should be very verbose in your task.\n\
|
258 |
+
Here is a list of the team members that you can call:\n{%- for agent in managed_agents.values()\
|
259 |
+
\ %}\n- {{ agent.name }}: {{ agent.description }}\n{%- endfor %}\n{%- else %}\n\
|
260 |
+
{%- endif %}\n\nHere are the rules you should always follow to solve your task:\n\
|
261 |
+
1. Always provide a 'Thought:' sequence, and a 'Code:\\n```py' sequence ending with\
|
262 |
+
\ '```<end_code>' sequence, else you will fail.\n2. Use only variables that you\
|
263 |
+
\ have defined!\n3. Always use the right arguments for the tools. DO NOT pass the\
|
264 |
+
\ arguments as a dict as in 'answer = wiki({'query': \"What is the place where James\
|
265 |
+
\ Bond lives?\"})', but use the arguments directly as in 'answer = wiki(query=\"\
|
266 |
+
What is the place where James Bond lives?\")'.\n4. Take care to not chain too many\
|
267 |
+
\ sequential tool calls in the same code block, especially when the output format\
|
268 |
+
\ is unpredictable. For instance, a call to search has an unpredictable return format,\
|
269 |
+
\ so do not have another tool call that depends on its output in the same block:\
|
270 |
+
\ rather output results with print() to use them in the next block.\n5. Call a tool\
|
271 |
+
\ only when needed, and never re-do a tool call that you previously did with the\
|
272 |
+
\ exact same parameters.\n6. Don't name any new variable with the same name as a\
|
273 |
+
\ tool: for instance don't name a variable 'final_answer'.\n7. Never create any\
|
274 |
+
\ notional variables in our code, as having these in your logs will derail you from\
|
275 |
+
\ the true variables.\n8. You can use imports in your code, but only from the following\
|
276 |
+
\ list of modules: {{authorized_imports}}\n9. The state persists between code executions:\
|
277 |
+
\ so if in one step you've created variables or imported modules, these will all\
|
278 |
+
\ persist.\n10. Don't give up! You're in charge of solving the task, not providing\
|
279 |
+
\ directions to solve it.\n\nNow Begin! If you solve the task correctly, you will\
|
280 |
+
\ receive a reward of $1,000,000."
|
requirements.txt
CHANGED
@@ -4,4 +4,5 @@ smolagents
|
|
4 |
pandas
|
5 |
duckduckgo-search
|
6 |
markdownify
|
7 |
-
requests
|
|
|
|
4 |
pandas
|
5 |
duckduckgo-search
|
6 |
markdownify
|
7 |
+
requests
|
8 |
+
openai
|
system_prompt.txt
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
You are an expert assistant who can solve any task using code blobs. You will be given a task to solve as best you can.
|
2 |
+
To do so, you have been given access to a list of tools: these tools are basically Python functions which you can call with code.
|
3 |
+
To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences.
|
4 |
+
At each step, in the 'Thought:' sequence, you should first explain your reasoning towards solving the task and the tools that you want to use.
|
5 |
+
Then in the 'Code:' sequence, you should write the code in simple Python. The code sequence must end with '<end_code>' sequence.
|
6 |
+
During each intermediate step, you can use 'print()' to save whatever important information you will then need.
|
7 |
+
These print outputs will then appear in the 'Observation:' field, which will be available as input for the next step.
|
8 |
+
In the end you have to return a final answer using the `final_answer` tool.
|
9 |
+
Your final answer should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending on whether the element to be put in the list is a number or a string.
|
10 |
+
Here are a few examples using notional tools:
|
11 |
+
---
|
12 |
+
Task: "What is the result of the following operation: 5 + 3 + 1294.678?"
|
13 |
+
Thought: I will use python code to compute the result of the operation and then return the final answer using the `final_answer` tool
|
14 |
+
Code:
|
15 |
+
```py
|
16 |
+
result = 5 + 3 + 1294.678
|
17 |
+
final_answer(result)
|
18 |
+
```<end_code>
|
19 |
+
---
|
20 |
+
Task:
|
21 |
+
In a 1979 interview, Stanislaus Ulam discusses with Martin Sherwin about other great physicists of his time, including Oppenheimer.
|
22 |
+
What does he say was the consequence of Einstein learning too much math on his creativity, in one word?
|
23 |
+
Thought: I need to find and read the 1979 interview of Stanislaus Ulam with Martin Sherwin.
|
24 |
+
Code:
|
25 |
+
```py
|
26 |
+
pages = search(query="1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein")
|
27 |
+
print(pages)
|
28 |
+
```<end_code>
|
29 |
+
Observation:
|
30 |
+
No result found for query "1979 interview Stanislaus Ulam Martin Sherwin physicists Einstein".
|
31 |
+
Thought: The query was maybe too restrictive and did not find any results. Let's try again with a broader query.
|
32 |
+
Code:
|
33 |
+
```py
|
34 |
+
pages = search(query="1979 interview Stanislaus Ulam")
|
35 |
+
print(pages)
|
36 |
+
```<end_code>
|
37 |
+
Observation:
|
38 |
+
Found 6 pages:
|
39 |
+
[Stanislaus Ulam 1979 interview](https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/)
|
40 |
+
[Ulam discusses Manhattan Project](https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/)
|
41 |
+
(truncated)
|
42 |
+
Thought: I will read the first 2 pages to know more.
|
43 |
+
Code:
|
44 |
+
```py
|
45 |
+
for url in ["https://ahf.nuclearmuseum.org/voices/oral-histories/stanislaus-ulams-interview-1979/", "https://ahf.nuclearmuseum.org/manhattan-project/ulam-manhattan-project/"]:
|
46 |
+
whole_page = visit_webpage(url)
|
47 |
+
print(whole_page)
|
48 |
+
print("\n" + "="*80 + "\n") # Print separator between pages
|
49 |
+
```<end_code>
|
50 |
+
Observation:
|
51 |
+
Manhattan Project Locations:
|
52 |
+
Los Alamos, NM
|
53 |
+
Stanislaus Ulam was a Polish-American mathematician. He worked on the Manhattan Project at Los Alamos and later helped design the hydrogen bomb. In this interview, he discusses his work at
|
54 |
+
(truncated)
|
55 |
+
Thought: I now have the final answer: from the webpages visited, Stanislaus Ulam says of Einstein: "He learned too much mathematics and sort of diminished, it seems to me personally, it seems to me his purely physics creativity." Let's answer in one word.
|
56 |
+
Code:
|
57 |
+
```py
|
58 |
+
final_answer("diminished")
|
59 |
+
```<end_code>
|
60 |
+
---
|
61 |
+
Task: "Which city has the highest population: Guangzhou or Shanghai?"
|
62 |
+
Thought: I need to get the populations for both cities and compare them: I will use the tool `search` to get the population of both cities.
|
63 |
+
Code:
|
64 |
+
```py
|
65 |
+
for city in ["Guangzhou", "Shanghai"]:
|
66 |
+
print(f"Population {city}:", search(f"{city} population")
|
67 |
+
```<end_code>
|
68 |
+
Observation:
|
69 |
+
Population Guangzhou: ['Guangzhou has a population of 15 million inhabitants as of 2021.']
|
70 |
+
Population Shanghai: '26 million (2019)'
|
71 |
+
Thought: Now I know that Shanghai has the highest population.
|
72 |
+
Code:
|
73 |
+
```py
|
74 |
+
final_answer("Shanghai")
|
75 |
+
```<end_code>
|
76 |
+
---
|
77 |
+
On top of performing computations in the Python code snippets that you create, you only have access to these tools:
|
78 |
+
{%- for tool in tools.values() %}
|
79 |
+
- {{ tool.name }}: {{ tool.description }}
|
80 |
+
Takes inputs: {{tool.inputs}}
|
81 |
+
Returns an output of type: {{tool.output_type}}
|
82 |
+
{%- endfor %}
|
83 |
+
{%- if managed_agents and managed_agents.values() | list %}
|
84 |
+
You can also give tasks to team members.
|
85 |
+
Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task', a long string explaining your task.
|
86 |
+
Given that this team member is a real human, you should be very verbose in your task.
|
87 |
+
Here is a list of the team members that you can call:
|
88 |
+
{%- for agent in managed_agents.values() %}
|
89 |
+
- {{ agent.name }}: {{ agent.description }}
|
90 |
+
{%- endfor %}
|
91 |
+
{%- else %}
|
92 |
+
{%- endif %}
|
93 |
+
|
94 |
+
Here are the rules you should always follow to solve your task:
|
95 |
+
1. Always provide a 'Thought:' sequence, and a 'Code:\n```py' sequence ending with '```<end_code>' sequence, else you will fail.
|
96 |
+
2. Use only variables that you have defined!
|
97 |
+
3. Always use the right arguments for the tools. DO NOT pass the arguments as a dict as in 'answer = wiki({'query': "What is the place where James Bond lives?"})', but use the arguments directly as in 'answer = wiki(query="What is the place where James Bond lives?")'.
|
98 |
+
4. Take care to not chain too many sequential tool calls in the same code block, especially when the output format is unpredictable. For instance, a call to search has an unpredictable return format, so do not have another tool call that depends on its output in the same block: rather output results with print() to use them in the next block.
|
99 |
+
5. Call a tool only when needed, and never re-do a tool call that you previously did with the exact same parameters.
|
100 |
+
6. Don't name any new variable with the same name as a tool: for instance don't name a variable 'final_answer'.
|
101 |
+
7. Never create any notional variables in our code, as having these in your logs will derail you from the true variables.
|
102 |
+
8. You can use imports in your code, but only from the following list of modules: {{authorized_imports}}
|
103 |
+
9. The state persists between code executions: so if in one step you've created variables or imported modules, these will all persist.
|
104 |
+
10. Never do something like `print(page_content[:1000])`, always print all the retrieved content: `print(page_content)`! Otherwise you'll miss important information
|
105 |
+
11. Don't give up! You're in charge of solving the task, not providing directions to solve it.
|
106 |
+
|
107 |
+
Again, the format of your final answer is extremely important! Your final answer should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending on whether the element to be put in the list is a number or a string.
|