from smolagents import CodeAgent, tool, OpenAIServerModel import requests import io from PIL import Image from constants import DEFAULT_API_URL class BasicAgent: def __init__(self, OPENROUTER_API: str, OPENROUTER_MODEL: str, api_url: str): self.api_url = api_url self.model = OpenAIServerModel( model_id=OPENROUTER_MODEL, api_base="https://openrouter.ai/api/v1", api_key=OPENROUTER_API ) self.agent = None def initialize_agent(self): self.agent = CodeAgent( model=self.model, tools=[get_file], add_base_tools=True, additional_authorized_imports=['pandas', 'io'] ) # self.agent.tools.pop('visit_webpage') # replacing tool return self.agent def run(self, input: tuple): id, question, file = input # check if has image img = None if file and file.split('.')[1] == 'png': img = [Image.open(get_file(file))] # make a prompt prompt = f""" You are CodeAgent assistant. The user asks you a question and you provide them with a verified and specific answer. As an agent you have some tools to use. You may use them on demand. Your normal workflow should follow the following sequence: 1. You received the question and (in some cases) additional information as a file_id. You analize it. 2. At the planning stage you making a sequence of steps to complete this task in planning variable 3. At the execution stage you provide an executable python code which will be parsed and executed by the program. Remember to provide executable code here, without any additional characters which may crash the execution and parsing. 4. After execution you decide whether you had a final result an you can close the task providing this result or you should continue. If ccontinue, review you previous planning sequence: what is done, what should be change in this plan or what we should add to it? EACH STEP INCLUDES BOTH PLANNING AND EXECUTIONS STAGES: * you provide planning in a separate variable at the beggining of your answer, variable name: planning * execution is the rest code where you trying to achive the planned goals. The example: User: What is Axelord? [Step 1] Agent: Planning stage: planning = " Okay, user want me to tell him what is Axelord. I do not have this answer in my memory so I need to find it. Plan: 1. To search: What is Axelord. 2. Provide an aswer. " Execution stage: query = "What is Axelord?" search_result = web_search(query) print(search_result) Result: The pages about this topic. Output: None [Step 2] Agent: Planning stage: planning = " Okay, I found some websites about that. Plan: 1. To search: What is Axelord. - DONE. 2. Visit the webpage to get more inforamtion. 3. Provide an answer. " Execution stage: url = "https://en.wikipedia.org/wiki/Axelord" wikipedia_page = visit_webpage(url) print(wikipedia_page) Result: page html output which contains an answer. Output: FinalAnswerTool(Answer) In the example above you saw how you can use searching tools, but you also has some others. You complete the task step by step. You recognise when the planning stage and you can do planning, and when the execution stage and you MUST provide an executable python code. The user's question is: {question} --- Additional information you may use: file_id: {file} IMPORTANT: * You do not answer in plain text. * You only answer in python code. Every plain text you need to provide you should include in a text variable * You may receive both images sometimes as a context. Do not forget to use them to handle the task. """ # run return self.agent.run( task = prompt, images = img ) # tools @tool def get_file(file_id: str) -> io.BytesIO | io.StringIO | None: """A tool that fetches the file's content from the server. Use it every time you need to get file for completing the task. IMPORTANT: ONLY USE IT WHEN THE FILE_ID ARGUMENT IS NOT EMPTY. YOU MUST ENSURE THAT FILE_ID ARGUMENT IS NOT EMPTY WHEN YOU USE IT. Args: file_id: id of a file required to fetch Returns: io.BytesIO: file emulation for .xlsx or .png files. You can use this object as a file itself. It is already converted to BytesIO. io.StringIO: file emulation for .py files. You can use this object as a file itself. It is already converted to StringIO. """ # make request response = requests.get(DEFAULT_API_URL+'/files/'+file_id.split('.')[0], timeout=15) response.raise_for_status() # prepare the output file_format = file_id.split('.')[1] match file_format: case 'png': print('png uploaded') return io.BytesIO(response.content) case 'xlsx': print('xlsx uploaded') return io.BytesIO(response.content) case 'py': print('py uploaded') return io.StringIO(response.text) case _: return None