CoralLeiCN
commited on
Commit
·
646e8e8
1
Parent(s):
a63626e
Add CodeExecutionTool for executing Python code and answering questions
Browse files- agent/agents.py +3 -0
- agent/tools.py +62 -11
agent/agents.py
CHANGED
@@ -14,6 +14,7 @@ from agent.tools import (
|
|
14 |
TranscribeAudioBytes,
|
15 |
TranscribeYoutubeVideo,
|
16 |
UnderstandImageBytes,
|
|
|
17 |
)
|
18 |
from agent.utils import gemini_client, gemini_model_liteLLM
|
19 |
|
@@ -52,6 +53,7 @@ class BasicAgent:
|
|
52 |
download_file_tool = DownloadFile()
|
53 |
read_excel_file_tool = ReadExcelFileBytes()
|
54 |
understand_image_bytes = UnderstandImageBytes()
|
|
|
55 |
|
56 |
model = gemini_model_liteLLM(self.model)
|
57 |
|
@@ -68,6 +70,7 @@ class BasicAgent:
|
|
68 |
download_file_tool,
|
69 |
read_excel_file_tool,
|
70 |
understand_image_bytes,
|
|
|
71 |
],
|
72 |
model=model,
|
73 |
step_callbacks=STEP_CALLBACKS,
|
|
|
14 |
TranscribeAudioBytes,
|
15 |
TranscribeYoutubeVideo,
|
16 |
UnderstandImageBytes,
|
17 |
+
CodeExecutionTool,
|
18 |
)
|
19 |
from agent.utils import gemini_client, gemini_model_liteLLM
|
20 |
|
|
|
53 |
download_file_tool = DownloadFile()
|
54 |
read_excel_file_tool = ReadExcelFileBytes()
|
55 |
understand_image_bytes = UnderstandImageBytes()
|
56 |
+
code_execution_tool = CodeExecutionTool()
|
57 |
|
58 |
model = gemini_model_liteLLM(self.model)
|
59 |
|
|
|
70 |
download_file_tool,
|
71 |
read_excel_file_tool,
|
72 |
understand_image_bytes,
|
73 |
+
code_execution_tool,
|
74 |
],
|
75 |
model=model,
|
76 |
step_callbacks=STEP_CALLBACKS,
|
agent/tools.py
CHANGED
@@ -1,10 +1,60 @@
|
|
|
|
|
|
|
|
1 |
from google import genai
|
2 |
from google.genai import types
|
3 |
-
from smolagents import Tool
|
4 |
-
import requests
|
5 |
from PIL import Image
|
6 |
-
import
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
class UnderstandImageBytes(Tool):
|
10 |
name = "understand_image_bytes"
|
@@ -34,12 +84,12 @@ class UnderstandImageBytes(Tool):
|
|
34 |
response = client.models.generate_content(
|
35 |
model="gemini-2.5-flash-preview-05-20",
|
36 |
contents=[
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
]
|
43 |
)
|
44 |
|
45 |
return response.text
|
@@ -156,8 +206,9 @@ class ReadExcelFileBytes(Tool):
|
|
156 |
output_type = "string"
|
157 |
|
158 |
def forward(self, excel_bytes: str):
|
159 |
-
import pandas as pd
|
160 |
from io import BytesIO
|
161 |
|
|
|
|
|
162 |
df = pd.read_excel(BytesIO(excel_bytes))
|
163 |
return df.to_string()
|
|
|
1 |
+
import io
|
2 |
+
|
3 |
+
import requests
|
4 |
from google import genai
|
5 |
from google.genai import types
|
|
|
|
|
6 |
from PIL import Image
|
7 |
+
from smolagents import Tool
|
8 |
+
|
9 |
+
|
10 |
+
class CodeExecutionTool(Tool):
|
11 |
+
name = "execute_code"
|
12 |
+
description = """Execute Python code and answer the question if provided.
|
13 |
+
This tool uses Gemini to execute Python code and returns the output of the execution.
|
14 |
+
The code should be a valid Python snippet that can be executed safely.
|
15 |
+
"""
|
16 |
+
inputs = {
|
17 |
+
"code_bytes": {
|
18 |
+
"type": "string",
|
19 |
+
"description": "The Python code to execute in bytes",
|
20 |
+
},
|
21 |
+
"question": {
|
22 |
+
"type": "string",
|
23 |
+
"description": "Optional question to answer based on the code execution",
|
24 |
+
"nullable": True,
|
25 |
+
},
|
26 |
+
}
|
27 |
+
output_type = "string"
|
28 |
+
|
29 |
+
def forward(self, code_bytes: str, question: str = ""):
|
30 |
+
client = genai.Client()
|
31 |
+
code_str = code_bytes.decode("utf-8")
|
32 |
+
contents = [
|
33 |
+
types.Content(
|
34 |
+
role="user",
|
35 |
+
parts=[
|
36 |
+
types.Part(
|
37 |
+
text=f"{question} \n\n Run this code and answer the question: \n ```python \n {code_str}"
|
38 |
+
)
|
39 |
+
],
|
40 |
+
)
|
41 |
+
]
|
42 |
+
|
43 |
+
# usually the response will have four parts:
|
44 |
+
# 1. The first response
|
45 |
+
# 2. The code execution
|
46 |
+
# 3. The code execution result
|
47 |
+
# 4. The final answer (augmented with the code execution result)
|
48 |
+
response = client.models.generate_content(
|
49 |
+
model="gemini-2.5-flash-preview-05-20",
|
50 |
+
contents=contents,
|
51 |
+
config=types.GenerateContentConfig(
|
52 |
+
tools=[types.Tool(code_execution=types.ToolCodeExecution)]
|
53 |
+
),
|
54 |
+
)
|
55 |
+
|
56 |
+
return response.candidates[0].content.parts[-1].text
|
57 |
+
|
58 |
|
59 |
class UnderstandImageBytes(Tool):
|
60 |
name = "understand_image_bytes"
|
|
|
84 |
response = client.models.generate_content(
|
85 |
model="gemini-2.5-flash-preview-05-20",
|
86 |
contents=[
|
87 |
+
f"{prompt} And answer the question accurately based on the visual information in the image. question: {question} ",
|
88 |
+
types.Part.from_bytes(
|
89 |
+
data=image_bytes,
|
90 |
+
mime_type=f"image/{image.format}",
|
91 |
+
),
|
92 |
+
],
|
93 |
)
|
94 |
|
95 |
return response.text
|
|
|
206 |
output_type = "string"
|
207 |
|
208 |
def forward(self, excel_bytes: str):
|
|
|
209 |
from io import BytesIO
|
210 |
|
211 |
+
import pandas as pd
|
212 |
+
|
213 |
df = pd.read_excel(BytesIO(excel_bytes))
|
214 |
return df.to_string()
|