CoralLeiCN commited on
Commit
646e8e8
·
1 Parent(s): a63626e

Add CodeExecutionTool for executing Python code and answering questions

Browse files
Files changed (2) hide show
  1. agent/agents.py +3 -0
  2. agent/tools.py +62 -11
agent/agents.py CHANGED
@@ -14,6 +14,7 @@ from agent.tools import (
14
  TranscribeAudioBytes,
15
  TranscribeYoutubeVideo,
16
  UnderstandImageBytes,
 
17
  )
18
  from agent.utils import gemini_client, gemini_model_liteLLM
19
 
@@ -52,6 +53,7 @@ class BasicAgent:
52
  download_file_tool = DownloadFile()
53
  read_excel_file_tool = ReadExcelFileBytes()
54
  understand_image_bytes = UnderstandImageBytes()
 
55
 
56
  model = gemini_model_liteLLM(self.model)
57
 
@@ -68,6 +70,7 @@ class BasicAgent:
68
  download_file_tool,
69
  read_excel_file_tool,
70
  understand_image_bytes,
 
71
  ],
72
  model=model,
73
  step_callbacks=STEP_CALLBACKS,
 
14
  TranscribeAudioBytes,
15
  TranscribeYoutubeVideo,
16
  UnderstandImageBytes,
17
+ CodeExecutionTool,
18
  )
19
  from agent.utils import gemini_client, gemini_model_liteLLM
20
 
 
53
  download_file_tool = DownloadFile()
54
  read_excel_file_tool = ReadExcelFileBytes()
55
  understand_image_bytes = UnderstandImageBytes()
56
+ code_execution_tool = CodeExecutionTool()
57
 
58
  model = gemini_model_liteLLM(self.model)
59
 
 
70
  download_file_tool,
71
  read_excel_file_tool,
72
  understand_image_bytes,
73
+ code_execution_tool,
74
  ],
75
  model=model,
76
  step_callbacks=STEP_CALLBACKS,
agent/tools.py CHANGED
@@ -1,10 +1,60 @@
 
 
 
1
  from google import genai
2
  from google.genai import types
3
- from smolagents import Tool
4
- import requests
5
  from PIL import Image
6
- import io
7
- from google.genai import types
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  class UnderstandImageBytes(Tool):
10
  name = "understand_image_bytes"
@@ -34,12 +84,12 @@ class UnderstandImageBytes(Tool):
34
  response = client.models.generate_content(
35
  model="gemini-2.5-flash-preview-05-20",
36
  contents=[
37
- f"{prompt} And answer the question accurately based on the visual information in the image. question: {question} ",
38
- types.Part.from_bytes(
39
- data=image_bytes,
40
- mime_type=f'image/{image.format}',
41
- ),
42
- ]
43
  )
44
 
45
  return response.text
@@ -156,8 +206,9 @@ class ReadExcelFileBytes(Tool):
156
  output_type = "string"
157
 
158
  def forward(self, excel_bytes: str):
159
- import pandas as pd
160
  from io import BytesIO
161
 
 
 
162
  df = pd.read_excel(BytesIO(excel_bytes))
163
  return df.to_string()
 
1
+ import io
2
+
3
+ import requests
4
  from google import genai
5
  from google.genai import types
 
 
6
  from PIL import Image
7
+ from smolagents import Tool
8
+
9
+
10
+ class CodeExecutionTool(Tool):
11
+ name = "execute_code"
12
+ description = """Execute Python code and answer the question if provided.
13
+ This tool uses Gemini to execute Python code and returns the output of the execution.
14
+ The code should be a valid Python snippet that can be executed safely.
15
+ """
16
+ inputs = {
17
+ "code_bytes": {
18
+ "type": "string",
19
+ "description": "The Python code to execute in bytes",
20
+ },
21
+ "question": {
22
+ "type": "string",
23
+ "description": "Optional question to answer based on the code execution",
24
+ "nullable": True,
25
+ },
26
+ }
27
+ output_type = "string"
28
+
29
+ def forward(self, code_bytes: str, question: str = ""):
30
+ client = genai.Client()
31
+ code_str = code_bytes.decode("utf-8")
32
+ contents = [
33
+ types.Content(
34
+ role="user",
35
+ parts=[
36
+ types.Part(
37
+ text=f"{question} \n\n Run this code and answer the question: \n ```python \n {code_str}"
38
+ )
39
+ ],
40
+ )
41
+ ]
42
+
43
+ # usually the response will have four parts:
44
+ # 1. The first response
45
+ # 2. The code execution
46
+ # 3. The code execution result
47
+ # 4. The final answer (augmented with the code execution result)
48
+ response = client.models.generate_content(
49
+ model="gemini-2.5-flash-preview-05-20",
50
+ contents=contents,
51
+ config=types.GenerateContentConfig(
52
+ tools=[types.Tool(code_execution=types.ToolCodeExecution)]
53
+ ),
54
+ )
55
+
56
+ return response.candidates[0].content.parts[-1].text
57
+
58
 
59
  class UnderstandImageBytes(Tool):
60
  name = "understand_image_bytes"
 
84
  response = client.models.generate_content(
85
  model="gemini-2.5-flash-preview-05-20",
86
  contents=[
87
+ f"{prompt} And answer the question accurately based on the visual information in the image. question: {question} ",
88
+ types.Part.from_bytes(
89
+ data=image_bytes,
90
+ mime_type=f"image/{image.format}",
91
+ ),
92
+ ],
93
  )
94
 
95
  return response.text
 
206
  output_type = "string"
207
 
208
  def forward(self, excel_bytes: str):
 
209
  from io import BytesIO
210
 
211
+ import pandas as pd
212
+
213
  df = pd.read_excel(BytesIO(excel_bytes))
214
  return df.to_string()