import json import os import re import time import yaml from duckduckgo_search.exceptions import DuckDuckGoSearchException from smolagents import FinalAnswerTool, Tool, OpenAIServerModel, CodeAgent python_interpreter_max_print_outputs_length = 10**6 class CustomDuckDuckGoSearchTool(Tool): name = "web_search" description = """Performs a duckduckgo web search based on your query (think a Google search) then returns the top search results.""" inputs = {"query": {"type": "string", "description": "The search query to perform."}} output_type = "string" def __init__(self, max_results=10, **kwargs): super().__init__() self.max_results = max_results try: from duckduckgo_search import DDGS except ImportError as e: raise ImportError( "You must install package `duckduckgo_search` to run this tool: for instance run `pip install duckduckgo-search`." ) from e self.ddgs = DDGS(**kwargs) def forward(self, query: str) -> str: num_tries = 5 for cnt in range(num_tries): try: results = self.ddgs.text(query, max_results=self.max_results) break except DuckDuckGoSearchException as e: print(e) if cnt == num_tries - 1: raise time.sleep(1.5) if len(results) == 0: raise Exception("No results found! Try a less restrictive/shorter query.") postprocessed_results = [f"[{result['title']}]({result['href']})\n{result['body']}" for result in results] return "## Search Results\n\n" + "\n\n".join(postprocessed_results) class CustomVisitWebpageTool(Tool): name = "visit_webpage" description = ( "Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages." ) inputs = { "url": { "type": "string", "description": "The url of the webpage to visit.", } } output_type = "string" def forward(self, url: str) -> str: try: import requests from markdownify import markdownify from requests.exceptions import RequestException from smolagents.utils import truncate_content except ImportError as e: raise ImportError( "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`." ) from e try: # Send a GET request to the URL with a 20-second timeout response = requests.get(url, timeout=20) response.raise_for_status() # Raise an exception for bad status codes # Convert the HTML content to Markdown markdown_content = markdownify(response.text).strip() # Remove multiple line breaks markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content) return truncate_content(markdown_content, python_interpreter_max_print_outputs_length) except requests.exceptions.Timeout: return "The request timed out. Please try again later or check the URL." except RequestException as e: return f"Error fetching the webpage: {str(e)}" except Exception as e: return f"An unexpected error occurred: {str(e)}" class SmolAgent: def __init__(self, openai_api_key=None): final_answer = FinalAnswerTool() search_tool = CustomDuckDuckGoSearchTool(max_results=3) visit_webpage_tool = CustomVisitWebpageTool() model = OpenAIServerModel( model_id="gpt-4.1-2025-04-14", # model_id="gpt-4.1-mini-2025-04-14", # model_id="gpt-4.1-nano-2025-04-14", max_completion_tokens=1024, temperature=0.01, api_key=openai_api_key, ) with open('prompt_templates.yaml', 'r') as f: prompt_templates = yaml.safe_load(f) with open('system_prompt.txt', 'r') as f: prompt_templates['system_prompt'] = f.read() self.agent = CodeAgent( model=model, prompt_templates=prompt_templates, tools=[search_tool, visit_webpage_tool, final_answer], max_steps=10, verbosity_level=100, grammar=None, planning_interval=None, name='Advanced GAIA Agent', description=None, max_print_outputs_length=python_interpreter_max_print_outputs_length, ) self.agent.visualize() def run(self, task: dict[str, str]) -> str: if len(task.get('file_name')) != 0: return '' # skip questions where file processing is needed question = task.get('question') if question.find('www.youtube.com') != -1: return '' # skip questions where file processing is needed return self.agent.run(question) if __name__ == '__main__': openai_key = os.getenv('OPENAI_API_KEY') if not openai_key: with open("data/openai.key", "r") as f: openai_key = f.read().strip() agent = SmolAgent(openai_api_key=openai_key) with open('data/questions.json', 'r') as f: questions = json.load(f) for q in questions: print('\n===') print(q) print('\n---') a = agent.run(q) print('\n---') print(a)