Spaces:
Running
on
T4
Running
on
T4
Commit
·
d2beadd
1
Parent(s):
f4ec98b
Update
Browse files- app.py +18 -51
- opentools/__init__.py +0 -0
- opentools/engine/base.py +43 -0
- opentools/engine/openai.py +268 -0
- opentools/models/executor.py +240 -0
- opentools/models/formatters.py +40 -0
- opentools/models/initializer.py +179 -0
- opentools/models/memory.py +84 -0
- opentools/models/planner.py +368 -0
- opentools/models/utlis.py +73 -0
- opentools/tools/README.md +44 -0
- opentools/tools/__init__.py +0 -0
- opentools/tools/base.py +103 -0
- opentools/tools/generalist_solution_generator/examples/mathvista_113.png +0 -0
- opentools/tools/generalist_solution_generator/tool.py +142 -0
- requirements.txt +13 -0
- setup.py +20 -0
app.py
CHANGED
@@ -1,57 +1,24 @@
|
|
1 |
-
import gradio as gr
|
2 |
import os
|
3 |
-
import
|
|
|
|
|
|
|
|
|
|
|
4 |
from PIL import Image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
12 |
-
file_path = f"uploaded_images/image_{timestamp}.png"
|
13 |
-
|
14 |
-
# Save the image
|
15 |
-
image.save(file_path)
|
16 |
-
|
17 |
-
# Check if saved
|
18 |
-
print(f"Image saved to: {file_path} | Exists: {os.path.exists(file_path)}")
|
19 |
-
|
20 |
-
# Open image and print dims
|
21 |
-
img = Image.open(file_path)
|
22 |
-
print(f"Image dimensions: {img.size}")
|
23 |
-
|
24 |
-
return f"Image saved to: {file_path}"
|
25 |
-
|
26 |
-
demo = gr.Interface(
|
27 |
-
fn=save_image,
|
28 |
-
inputs=gr.Image(type="pil"), # Accepts PIL Image objects
|
29 |
-
outputs="text"
|
30 |
-
)
|
31 |
-
|
32 |
-
demo.launch()
|
33 |
-
|
34 |
-
# import os
|
35 |
-
# import sys
|
36 |
-
# import json
|
37 |
-
# import argparse
|
38 |
-
# import time
|
39 |
-
# import io
|
40 |
-
# import uuid
|
41 |
-
# from PIL import Image
|
42 |
-
# from typing import List, Dict, Any, Iterator
|
43 |
-
# import gradio as gr
|
44 |
-
|
45 |
-
# # Add the project root to the Python path
|
46 |
-
# current_dir = os.path.dirname(os.path.abspath(__file__))
|
47 |
-
# project_root = os.path.dirname(os.path.dirname(os.path.dirname(current_dir)))
|
48 |
-
# sys.path.insert(0, project_root)
|
49 |
-
|
50 |
-
# from opentools.models.initializer import Initializer
|
51 |
-
# from opentools.models.planner import Planner
|
52 |
-
# from opentools.models.memory import Memory
|
53 |
-
# from opentools.models.executor import Executor
|
54 |
-
# from opentools.models.utlis import make_json_serializable
|
55 |
|
56 |
# solver = None
|
57 |
|
|
|
|
|
1 |
import os
|
2 |
+
import sys
|
3 |
+
import json
|
4 |
+
import argparse
|
5 |
+
import time
|
6 |
+
import io
|
7 |
+
import uuid
|
8 |
from PIL import Image
|
9 |
+
from typing import List, Dict, Any, Iterator
|
10 |
+
import gradio as gr
|
11 |
+
|
12 |
+
# Add the project root to the Python path
|
13 |
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
14 |
+
project_root = os.path.dirname(os.path.dirname(os.path.dirname(current_dir)))
|
15 |
+
sys.path.insert(0, project_root)
|
16 |
|
17 |
+
from .opentools.models.initializer import Initializer
|
18 |
+
from .opentools.models.planner import Planner
|
19 |
+
from .opentools.models.memory import Memory
|
20 |
+
from .opentools.models.executor import Executor
|
21 |
+
from .opentools.models.utlis import make_json_serializable
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
# solver = None
|
24 |
|
opentools/__init__.py
ADDED
File without changes
|
opentools/engine/base.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import hashlib
|
2 |
+
import diskcache as dc
|
3 |
+
from abc import ABC, abstractmethod
|
4 |
+
|
5 |
+
class EngineLM(ABC):
|
6 |
+
system_prompt: str = "You are a helpful, creative, and smart assistant."
|
7 |
+
model_string: str
|
8 |
+
@abstractmethod
|
9 |
+
def generate(self, prompt, system_prompt=None, **kwargs):
|
10 |
+
pass
|
11 |
+
|
12 |
+
def __call__(self, *args, **kwargs):
|
13 |
+
pass
|
14 |
+
|
15 |
+
|
16 |
+
class CachedEngine:
|
17 |
+
def __init__(self, cache_path):
|
18 |
+
super().__init__()
|
19 |
+
self.cache_path = cache_path
|
20 |
+
self.cache = dc.Cache(cache_path)
|
21 |
+
|
22 |
+
def _hash_prompt(self, prompt: str):
|
23 |
+
return hashlib.sha256(f"{prompt}".encode()).hexdigest()
|
24 |
+
|
25 |
+
def _check_cache(self, prompt: str):
|
26 |
+
if prompt in self.cache:
|
27 |
+
return self.cache[prompt]
|
28 |
+
else:
|
29 |
+
return None
|
30 |
+
|
31 |
+
def _save_cache(self, prompt: str, response: str):
|
32 |
+
self.cache[prompt] = response
|
33 |
+
|
34 |
+
def __getstate__(self):
|
35 |
+
# Remove the cache from the state before pickling
|
36 |
+
state = self.__dict__.copy()
|
37 |
+
del state['cache']
|
38 |
+
return state
|
39 |
+
|
40 |
+
def __setstate__(self, state):
|
41 |
+
# Restore the cache after unpickling
|
42 |
+
self.__dict__.update(state)
|
43 |
+
self.cache = dc.Cache(self.cache_path)
|
opentools/engine/openai.py
ADDED
@@ -0,0 +1,268 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
try:
|
2 |
+
from openai import OpenAI
|
3 |
+
except ImportError:
|
4 |
+
raise ImportError("If you'd like to use OpenAI models, please install the openai package by running `pip install openai`, and add 'OPENAI_API_KEY' to your environment variables.")
|
5 |
+
|
6 |
+
import os
|
7 |
+
import json
|
8 |
+
import base64
|
9 |
+
import platformdirs
|
10 |
+
from tenacity import (
|
11 |
+
retry,
|
12 |
+
stop_after_attempt,
|
13 |
+
wait_random_exponential,
|
14 |
+
)
|
15 |
+
from typing import List, Union
|
16 |
+
|
17 |
+
from .base import EngineLM, CachedEngine
|
18 |
+
|
19 |
+
import openai
|
20 |
+
|
21 |
+
from dotenv import load_dotenv
|
22 |
+
load_dotenv()
|
23 |
+
|
24 |
+
# Define global constant for structured models
|
25 |
+
# https://platform.openai.com/docs/guides/structured-outputs
|
26 |
+
# https://cookbook.openai.com/examples/structured_outputs_intro
|
27 |
+
from pydantic import BaseModel
|
28 |
+
|
29 |
+
class DefaultFormat(BaseModel):
|
30 |
+
response: str
|
31 |
+
|
32 |
+
# Define global constant for structured models
|
33 |
+
OPENAI_STRUCTURED_MODELS = ['gpt-4o', 'gpt-4o-2024-08-06','gpt-4o-mini', 'gpt-4o-mini-2024-07-18']
|
34 |
+
|
35 |
+
|
36 |
+
class ChatOpenAI(EngineLM, CachedEngine):
|
37 |
+
DEFAULT_SYSTEM_PROMPT = "You are a helpful, creative, and smart assistant."
|
38 |
+
|
39 |
+
def __init__(
|
40 |
+
self,
|
41 |
+
model_string="gpt-4o-mini-2024-07-18",
|
42 |
+
system_prompt=DEFAULT_SYSTEM_PROMPT,
|
43 |
+
is_multimodal: bool=False,
|
44 |
+
# enable_cache: bool=True,
|
45 |
+
enable_cache: bool=False, # NOTE: disable cache for now
|
46 |
+
**kwargs):
|
47 |
+
"""
|
48 |
+
:param model_string:
|
49 |
+
:param system_prompt:
|
50 |
+
:param is_multimodal:
|
51 |
+
"""
|
52 |
+
if enable_cache:
|
53 |
+
root = platformdirs.user_cache_dir("opentools")
|
54 |
+
cache_path = os.path.join(root, f"cache_openai_{model_string}.db")
|
55 |
+
# For example, cache_path = /root/.cache/opentools/cache_openai_gpt-4o-mini.db
|
56 |
+
# print(f"Cache path: {cache_path}")
|
57 |
+
|
58 |
+
self.image_cache_dir = os.path.join(root, "image_cache")
|
59 |
+
os.makedirs(self.image_cache_dir, exist_ok=True)
|
60 |
+
|
61 |
+
super().__init__(cache_path=cache_path)
|
62 |
+
|
63 |
+
self.system_prompt = system_prompt
|
64 |
+
if os.getenv("OPENAI_API_KEY") is None:
|
65 |
+
raise ValueError("Please set the OPENAI_API_KEY environment variable if you'd like to use OpenAI models.")
|
66 |
+
|
67 |
+
self.client = OpenAI(
|
68 |
+
api_key=os.getenv("OPENAI_API_KEY"),
|
69 |
+
)
|
70 |
+
self.model_string = model_string
|
71 |
+
self.is_multimodal = is_multimodal
|
72 |
+
self.enable_cache = enable_cache
|
73 |
+
|
74 |
+
if enable_cache:
|
75 |
+
print(f"!! Cache enabled for model: {self.model_string}")
|
76 |
+
else:
|
77 |
+
print(f"!! Cache disabled for model: {self.model_string}")
|
78 |
+
|
79 |
+
@retry(wait=wait_random_exponential(min=1, max=5), stop=stop_after_attempt(5))
|
80 |
+
def generate(self, content: Union[str, List[Union[str, bytes]]], system_prompt=None, **kwargs):
|
81 |
+
try:
|
82 |
+
# Print retry attempt information
|
83 |
+
attempt_number = self.generate.retry.statistics.get('attempt_number', 0) + 1
|
84 |
+
if attempt_number > 1:
|
85 |
+
print(f"Attempt {attempt_number} of 5")
|
86 |
+
|
87 |
+
if isinstance(content, str):
|
88 |
+
return self._generate_text(content, system_prompt=system_prompt, **kwargs)
|
89 |
+
|
90 |
+
elif isinstance(content, list):
|
91 |
+
if (not self.is_multimodal):
|
92 |
+
raise NotImplementedError("Multimodal generation is only supported for GPT-4 models.")
|
93 |
+
|
94 |
+
return self._generate_multimodal(content, system_prompt=system_prompt, **kwargs)
|
95 |
+
|
96 |
+
except openai.LengthFinishReasonError as e:
|
97 |
+
print(f"Token limit exceeded: {str(e)}")
|
98 |
+
print(f"Tokens used - Completion: {e.completion.usage.completion_tokens}, Prompt: {e.completion.usage.prompt_tokens}, Total: {e.completion.usage.total_tokens}")
|
99 |
+
return {
|
100 |
+
"error": "token_limit_exceeded",
|
101 |
+
"message": str(e),
|
102 |
+
"details": {
|
103 |
+
"completion_tokens": e.completion.usage.completion_tokens,
|
104 |
+
"prompt_tokens": e.completion.usage.prompt_tokens,
|
105 |
+
"total_tokens": e.completion.usage.total_tokens
|
106 |
+
}
|
107 |
+
}
|
108 |
+
except openai.RateLimitError as e:
|
109 |
+
print(f"Rate limit error encountered: {str(e)}")
|
110 |
+
return {
|
111 |
+
"error": "rate_limit",
|
112 |
+
"message": str(e),
|
113 |
+
"details": getattr(e, 'args', None)
|
114 |
+
}
|
115 |
+
except Exception as e:
|
116 |
+
print(f"Error in generate method: {str(e)}")
|
117 |
+
print(f"Error type: {type(e).__name__}")
|
118 |
+
print(f"Error details: {e.args}")
|
119 |
+
return {
|
120 |
+
"error": type(e).__name__,
|
121 |
+
"message": str(e),
|
122 |
+
"details": getattr(e, 'args', None)
|
123 |
+
}
|
124 |
+
|
125 |
+
def _generate_text(
|
126 |
+
self, prompt, system_prompt=None, temperature=0, max_tokens=4000, top_p=0.99, response_format=None
|
127 |
+
):
|
128 |
+
|
129 |
+
sys_prompt_arg = system_prompt if system_prompt else self.system_prompt
|
130 |
+
|
131 |
+
if self.enable_cache:
|
132 |
+
cache_key = sys_prompt_arg + prompt
|
133 |
+
cache_or_none = self._check_cache(cache_key)
|
134 |
+
if cache_or_none is not None:
|
135 |
+
return cache_or_none
|
136 |
+
|
137 |
+
if self.model_string in ['o1', 'o1-mini']: # only supports base response currently
|
138 |
+
# print(f"Using structured model: {self.model_string}")
|
139 |
+
response = self.client.beta.chat.completions.parse(
|
140 |
+
model=self.model_string,
|
141 |
+
messages=[
|
142 |
+
{"role": "user", "content": prompt},
|
143 |
+
],
|
144 |
+
max_completion_tokens=max_tokens
|
145 |
+
)
|
146 |
+
if response.choices[0].finishreason == "length":
|
147 |
+
response = "Token limit exceeded"
|
148 |
+
else:
|
149 |
+
response = response.choices[0].message.parsed
|
150 |
+
elif self.model_string in OPENAI_STRUCTURED_MODELS and response_format is not None:
|
151 |
+
# print(f"Using structured model: {self.model_string}")
|
152 |
+
response = self.client.beta.chat.completions.parse(
|
153 |
+
model=self.model_string,
|
154 |
+
messages=[
|
155 |
+
{"role": "system", "content": sys_prompt_arg},
|
156 |
+
{"role": "user", "content": prompt},
|
157 |
+
],
|
158 |
+
frequency_penalty=0,
|
159 |
+
presence_penalty=0,
|
160 |
+
stop=None,
|
161 |
+
temperature=temperature,
|
162 |
+
max_tokens=max_tokens,
|
163 |
+
top_p=top_p,
|
164 |
+
response_format=response_format
|
165 |
+
)
|
166 |
+
response = response.choices[0].message.parsed
|
167 |
+
else:
|
168 |
+
# print(f"Using non-structured model: {self.model_string}")
|
169 |
+
response = self.client.chat.completions.create(
|
170 |
+
model=self.model_string,
|
171 |
+
messages=[
|
172 |
+
{"role": "system", "content": sys_prompt_arg},
|
173 |
+
{"role": "user", "content": prompt},
|
174 |
+
],
|
175 |
+
frequency_penalty=0,
|
176 |
+
presence_penalty=0,
|
177 |
+
stop=None,
|
178 |
+
temperature=temperature,
|
179 |
+
max_tokens=max_tokens,
|
180 |
+
top_p=top_p,
|
181 |
+
)
|
182 |
+
response = response.choices[0].message.content
|
183 |
+
|
184 |
+
if self.enable_cache:
|
185 |
+
self._save_cache(cache_key, response)
|
186 |
+
return response
|
187 |
+
|
188 |
+
def __call__(self, prompt, **kwargs):
|
189 |
+
return self.generate(prompt, **kwargs)
|
190 |
+
|
191 |
+
def _format_content(self, content: List[Union[str, bytes]]) -> List[dict]:
|
192 |
+
formatted_content = []
|
193 |
+
for item in content:
|
194 |
+
if isinstance(item, bytes):
|
195 |
+
base64_image = base64.b64encode(item).decode('utf-8')
|
196 |
+
formatted_content.append({
|
197 |
+
"type": "image_url",
|
198 |
+
"image_url": {
|
199 |
+
"url": f"data:image/jpeg;base64,{base64_image}"
|
200 |
+
}
|
201 |
+
})
|
202 |
+
elif isinstance(item, str):
|
203 |
+
formatted_content.append({
|
204 |
+
"type": "text",
|
205 |
+
"text": item
|
206 |
+
})
|
207 |
+
else:
|
208 |
+
raise ValueError(f"Unsupported input type: {type(item)}")
|
209 |
+
return formatted_content
|
210 |
+
|
211 |
+
def _generate_multimodal(
|
212 |
+
self, content: List[Union[str, bytes]], system_prompt=None, temperature=0, max_tokens=4000, top_p=0.99, response_format=None
|
213 |
+
):
|
214 |
+
sys_prompt_arg = system_prompt if system_prompt else self.system_prompt
|
215 |
+
formatted_content = self._format_content(content)
|
216 |
+
|
217 |
+
if self.enable_cache:
|
218 |
+
cache_key = sys_prompt_arg + json.dumps(formatted_content)
|
219 |
+
cache_or_none = self._check_cache(cache_key)
|
220 |
+
if cache_or_none is not None:
|
221 |
+
# print(f"Cache hit for prompt: {cache_key[:200]}")
|
222 |
+
return cache_or_none
|
223 |
+
|
224 |
+
if self.model_string in ['o1', 'o1-mini']: # only supports base response currently
|
225 |
+
# print(f"Using structured model: {self.model_string}")
|
226 |
+
print(f'Max tokens: {max_tokens}')
|
227 |
+
response = self.client.chat.completions.create(
|
228 |
+
model=self.model_string,
|
229 |
+
messages=[
|
230 |
+
{"role": "user", "content": formatted_content},
|
231 |
+
],
|
232 |
+
max_completion_tokens=max_tokens
|
233 |
+
)
|
234 |
+
if response.choices[0].finish_reason == "length":
|
235 |
+
response_text = "Token limit exceeded"
|
236 |
+
else:
|
237 |
+
response_text = response.choices[0].message.content
|
238 |
+
elif self.model_string in OPENAI_STRUCTURED_MODELS and response_format is not None:
|
239 |
+
# print(f"Using structured model: {self.model_string}")
|
240 |
+
response = self.client.beta.chat.completions.parse(
|
241 |
+
model=self.model_string,
|
242 |
+
messages=[
|
243 |
+
{"role": "system", "content": sys_prompt_arg},
|
244 |
+
{"role": "user", "content": formatted_content},
|
245 |
+
],
|
246 |
+
temperature=temperature,
|
247 |
+
max_tokens=max_tokens,
|
248 |
+
top_p=top_p,
|
249 |
+
response_format=response_format
|
250 |
+
)
|
251 |
+
response_text = response.choices[0].message.parsed
|
252 |
+
else:
|
253 |
+
# print(f"Using non-structured model: {self.model_string}")
|
254 |
+
response = self.client.chat.completions.create(
|
255 |
+
model=self.model_string,
|
256 |
+
messages=[
|
257 |
+
{"role": "system", "content": sys_prompt_arg},
|
258 |
+
{"role": "user", "content": formatted_content},
|
259 |
+
],
|
260 |
+
temperature=temperature,
|
261 |
+
max_tokens=max_tokens,
|
262 |
+
top_p=top_p,
|
263 |
+
)
|
264 |
+
response_text = response.choices[0].message.content
|
265 |
+
|
266 |
+
if self.enable_cache:
|
267 |
+
self._save_cache(cache_key, response_text)
|
268 |
+
return response_text
|
opentools/models/executor.py
ADDED
@@ -0,0 +1,240 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
# import sys
|
3 |
+
import importlib
|
4 |
+
import re
|
5 |
+
from typing import Dict, Any, List
|
6 |
+
from datetime import datetime
|
7 |
+
|
8 |
+
from opentools.engine.openai import ChatOpenAI
|
9 |
+
from opentools.models.formatters import ToolCommand
|
10 |
+
|
11 |
+
import signal
|
12 |
+
from typing import Dict, Any, List, Optional
|
13 |
+
|
14 |
+
class TimeoutError(Exception):
|
15 |
+
pass
|
16 |
+
|
17 |
+
def timeout_handler(signum, frame):
|
18 |
+
raise TimeoutError("Function execution timed out")
|
19 |
+
|
20 |
+
class Executor:
|
21 |
+
def __init__(self, llm_engine_name: str, root_cache_dir: str = "solver_cache", num_threads: int = 1, max_time: int = 120, max_output_length: int = 100000, enable_signal: bool = True):
|
22 |
+
self.llm_engine_name = llm_engine_name
|
23 |
+
self.root_cache_dir = root_cache_dir
|
24 |
+
self.num_threads = num_threads
|
25 |
+
self.max_time = max_time
|
26 |
+
self.max_output_length = max_output_length
|
27 |
+
self.enable_signal = enable_signal
|
28 |
+
|
29 |
+
def set_query_cache_dir(self, query_cache_dir):
|
30 |
+
if query_cache_dir:
|
31 |
+
self.query_cache_dir = query_cache_dir
|
32 |
+
else:
|
33 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
34 |
+
self.query_cache_dir = os.path.join(self.root_cache_dir, timestamp)
|
35 |
+
os.makedirs(self.query_cache_dir, exist_ok=True)
|
36 |
+
|
37 |
+
def generate_tool_command(self, question: str, image: str, context: str, sub_goal: str, tool_name: str, tool_metadata: Dict[str, Any], bytes_mode:bool = False) -> ToolCommand:
|
38 |
+
prompt_generate_tool_command = f"""
|
39 |
+
Task: Generate a precise command to execute the selected tool based on the given information.
|
40 |
+
|
41 |
+
Query: {question}
|
42 |
+
Image: {image if not bytes_mode else 'image.jpg'}
|
43 |
+
Context: {context}
|
44 |
+
Sub-Goal: {sub_goal}
|
45 |
+
Selected Tool: {tool_name}
|
46 |
+
Tool Metadata: {tool_metadata}
|
47 |
+
|
48 |
+
Instructions:
|
49 |
+
1. Carefully review all provided information: the query, image path, context, sub-goal, selected tool, and tool metadata.
|
50 |
+
2. Analyze the tool's input_types from the metadata to understand required and optional parameters.
|
51 |
+
3. Construct a command or series of commands that aligns with the tool's usage pattern and addresses the sub-goal.
|
52 |
+
4. Ensure all required parameters are included and properly formatted.
|
53 |
+
5. Use appropriate values for parameters based on the given context, particularly the `Context` field which may contain relevant information from previous steps.
|
54 |
+
6. If multiple steps are needed to prepare data for the tool, include them in the command construction.
|
55 |
+
|
56 |
+
Output Format:
|
57 |
+
<analysis>: a step-by-step analysis of the context, sub-goal, and selected tool to guide the command construction.
|
58 |
+
<explanation>: a detailed explanation of the constructed command(s) and their parameters.
|
59 |
+
<command>: the Python code to execute the tool, which can be one of the following types:
|
60 |
+
a. A single line command with `execution = tool.execute()`.
|
61 |
+
b. A multi-line command with complex data preparation, ending with `execution = tool.execute()`.
|
62 |
+
c. Multiple lines of `execution = tool.execute()` calls for processing multiple items.
|
63 |
+
```python
|
64 |
+
<your command here>
|
65 |
+
```
|
66 |
+
|
67 |
+
Rules:
|
68 |
+
1. The command MUST be valid Python code and include at least one call to `tool.execute()`.
|
69 |
+
2. Each `tool.execute()` call MUST be assigned to the 'execution' variable in the format `execution = tool.execute(...)`.
|
70 |
+
3. For multiple executions, use separate `execution = tool.execute()` calls for each execution.
|
71 |
+
4. The final output MUST be assigned to the 'execution' variable, either directly from `tool.execute()` or as a processed form of multiple executions.
|
72 |
+
5. Use the exact parameter names as specified in the tool's input_types.
|
73 |
+
6. Enclose string values in quotes, use appropriate data types for other values (e.g., lists, numbers).
|
74 |
+
7. Do not include any code or text that is not part of the actual command.
|
75 |
+
8. Ensure the command directly addresses the sub-goal and query.
|
76 |
+
9. Include ALL required parameters, data, and paths to execute the tool in the command itself.
|
77 |
+
10. If preparation steps are needed, include them as separate Python statements before the `tool.execute()` calls.
|
78 |
+
|
79 |
+
Examples (Not to use directly unless relevant):
|
80 |
+
|
81 |
+
Example 1 (Single line command):
|
82 |
+
<analysis>: The tool requires an image path and a list of labels for object detection.
|
83 |
+
<explanation>: We pass the image path and a list containing "baseball" as the label to detect.
|
84 |
+
<command>:
|
85 |
+
```python
|
86 |
+
execution = tool.execute(image="path/to/image", labels=["baseball"])
|
87 |
+
```
|
88 |
+
|
89 |
+
Example 2 (Multi-line command with data preparation):
|
90 |
+
<analysis>: The tool requires an image path, multiple labels, and a threshold for object detection.
|
91 |
+
<explanation>: We prepare the data by defining variables for the image path, labels, and threshold, then pass these to the tool.execute() function.
|
92 |
+
<command>:
|
93 |
+
```python
|
94 |
+
image = "path/to/image"
|
95 |
+
labels = ["baseball", "football", "basketball"]
|
96 |
+
threshold = 0.5
|
97 |
+
execution = tool.execute(image=image, labels=labels, threshold=threshold)
|
98 |
+
```
|
99 |
+
|
100 |
+
Example 3 (Multiple executions):
|
101 |
+
<analysis>: We need to process multiple images for baseball detection.
|
102 |
+
<explanation>: We call the tool for each image path, using the same label and threshold for all.
|
103 |
+
<command>:
|
104 |
+
```python
|
105 |
+
execution = tool.execute(image="path/to/image1", labels=["baseball"], threshold=0.5)
|
106 |
+
execution = tool.execute(image="path/to/image2", labels=["baseball"], threshold=0.5)
|
107 |
+
execution = tool.execute(image="path/to/image3", labels=["baseball"], threshold=0.5)
|
108 |
+
```
|
109 |
+
|
110 |
+
Some Wrong Examples:
|
111 |
+
<command>:
|
112 |
+
```python
|
113 |
+
execution1 = tool.execute(query="...")
|
114 |
+
execution2 = tool.execute(query="...")
|
115 |
+
```
|
116 |
+
Reason: only `execution = tool.execute` is allowed, not `execution1` or `execution2`.
|
117 |
+
|
118 |
+
<command>:
|
119 |
+
```python
|
120 |
+
urls = [
|
121 |
+
"https://example.com/article1",
|
122 |
+
"https://example.com/article2"
|
123 |
+
]
|
124 |
+
|
125 |
+
execution = tool.execute(url=urls[0])
|
126 |
+
execution = tool.execute(url=urls[1])
|
127 |
+
```
|
128 |
+
Reason: The command should process multiple items in a single execution, not separate executions for each item.
|
129 |
+
|
130 |
+
Remember: Your <command> field MUST be valid Python code including any necessary data preparation steps and one or more `execution = tool.execute(` calls, without any additional explanatory text. The format `execution = tool.execute` must be strictly followed, and the last line must begin with `execution = tool.execute` to capture the final output.
|
131 |
+
"""
|
132 |
+
|
133 |
+
llm_generate_tool_command = ChatOpenAI(model_string=self.llm_engine_name, is_multimodal=False)
|
134 |
+
tool_command = llm_generate_tool_command(prompt_generate_tool_command, response_format=ToolCommand)
|
135 |
+
|
136 |
+
return tool_command
|
137 |
+
|
138 |
+
# def extract_explanation_and_command(self, text: str) -> tuple:
|
139 |
+
# # Extract explanation
|
140 |
+
# explanation_pattern = r"Command Explanation:(.*?)Generated Command:"
|
141 |
+
# explanation_match = re.search(explanation_pattern, text, re.DOTALL)
|
142 |
+
# explanation = explanation_match.group(1).strip() if explanation_match else "No explanation found."
|
143 |
+
# # Extract command
|
144 |
+
# command_pattern = r"Generated Command:.*?```python\n(.*?)```"
|
145 |
+
# command_match = re.search(command_pattern, text, re.DOTALL)
|
146 |
+
# command = command_match.group(1).strip() if command_match else "No command found."
|
147 |
+
|
148 |
+
def extract_explanation_and_command(self, response: ToolCommand) -> tuple:
|
149 |
+
def normarlize_code(code: str) -> str:
|
150 |
+
# Remove leading and trailing whitespace and triple backticks
|
151 |
+
return re.sub(r'^```python\s*', '', code).rstrip('```').strip()
|
152 |
+
|
153 |
+
explanation = response.explanation.strip()
|
154 |
+
command = normarlize_code(response.command.strip())
|
155 |
+
return explanation, command
|
156 |
+
|
157 |
+
def execute_tool_command(self, tool_name: str, command: str) -> Any:
|
158 |
+
"""
|
159 |
+
Execute a tool command with timeout protection. If execution exceeds max_time seconds,
|
160 |
+
the function will be interrupted and return a timeout message.
|
161 |
+
|
162 |
+
Args:
|
163 |
+
tool_name (str): Name of the tool to execute
|
164 |
+
command (str): Command string containing tool.execute() calls
|
165 |
+
|
166 |
+
Returns:
|
167 |
+
Any: List of execution results or error message
|
168 |
+
"""
|
169 |
+
|
170 |
+
def split_commands(command: str) -> List[str]:
|
171 |
+
# Use regex to find all tool.execute() commands and their surrounding code
|
172 |
+
pattern = r'.*?execution\s*=\s*tool\.execute\([^\n]*\)\s*(?:\n|$)'
|
173 |
+
blocks = re.findall(pattern, command, re.DOTALL)
|
174 |
+
return [block.strip() for block in blocks if block.strip()]
|
175 |
+
|
176 |
+
def execute_with_timeout(block: str, local_context: dict) -> Optional[str]:
|
177 |
+
if self.enable_signal:
|
178 |
+
# Set up the timeout handler
|
179 |
+
signal.signal(signal.SIGALRM, timeout_handler)
|
180 |
+
signal.alarm(self.max_time)
|
181 |
+
|
182 |
+
try:
|
183 |
+
# Execute the block in the local context
|
184 |
+
exec(block, globals(), local_context)
|
185 |
+
result = local_context.get('execution')
|
186 |
+
if self.enable_signal:
|
187 |
+
signal.alarm(0) # Disable the alarm
|
188 |
+
return result
|
189 |
+
except TimeoutError:
|
190 |
+
return f"Execution timed out after {self.max_time} seconds"
|
191 |
+
finally:
|
192 |
+
if self.enable_signal:
|
193 |
+
signal.alarm(0) # Ensure alarm is disabled even if other exceptions occur
|
194 |
+
|
195 |
+
# Import the tool module and instantiate it
|
196 |
+
module_name = f"tools.{tool_name.lower().replace('_tool', '')}.tool"
|
197 |
+
|
198 |
+
# print(f"Attempting to import module: {module_name}")
|
199 |
+
# print(f"Current sys.path: {sys.path}")
|
200 |
+
|
201 |
+
try:
|
202 |
+
# Dynamically import the module
|
203 |
+
module = importlib.import_module(module_name)
|
204 |
+
|
205 |
+
# Get the tool class
|
206 |
+
tool_class = getattr(module, tool_name)
|
207 |
+
|
208 |
+
# Check if the tool requires an LLM engine
|
209 |
+
# NOTE FIXME may need to refine base.py and tool.py to handle this better
|
210 |
+
if getattr(tool_class, 'require_llm_engine', False):
|
211 |
+
# Instantiate the tool with the model_string
|
212 |
+
tool = tool_class(model_string=self.llm_engine_name)
|
213 |
+
else:
|
214 |
+
# Instantiate the tool without model_string for tools that don't require it
|
215 |
+
tool = tool_class()
|
216 |
+
|
217 |
+
# Set the custom output directory
|
218 |
+
# NOTE FIXME: May have a better way to handle this
|
219 |
+
tool.set_custom_output_dir(self.query_cache_dir)
|
220 |
+
|
221 |
+
# Split the command into blocks, execute each one and store execution results
|
222 |
+
command_blocks = split_commands(command)
|
223 |
+
executions = []
|
224 |
+
|
225 |
+
for block in command_blocks:
|
226 |
+
# Create a local context to safely execute the block
|
227 |
+
local_context = {'tool': tool}
|
228 |
+
|
229 |
+
# Execute the block with timeout protection
|
230 |
+
result = execute_with_timeout(block, local_context)
|
231 |
+
|
232 |
+
if result is not None:
|
233 |
+
executions.append(result)
|
234 |
+
else:
|
235 |
+
executions.append(f"No execution captured from block: {block}")
|
236 |
+
|
237 |
+
# Return all the execution results
|
238 |
+
return executions
|
239 |
+
except Exception as e:
|
240 |
+
return f"Error in execute_tool_command: {str(e)}"
|
opentools/models/formatters.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel
|
2 |
+
|
3 |
+
# Planner: QueryAnalysis
|
4 |
+
class QueryAnalysis(BaseModel):
|
5 |
+
consice_summary: str
|
6 |
+
required_skills: str
|
7 |
+
relevant_tools: str
|
8 |
+
additional_considerations: str
|
9 |
+
|
10 |
+
def __str__(self):
|
11 |
+
return f"""
|
12 |
+
Consice Summary: {self.consice_summary}
|
13 |
+
|
14 |
+
Required Skills:
|
15 |
+
{self.required_skills}
|
16 |
+
|
17 |
+
Relevant Tools:
|
18 |
+
{self.relevant_tools}
|
19 |
+
|
20 |
+
Additional Considerations:
|
21 |
+
{self.additional_considerations}
|
22 |
+
"""
|
23 |
+
|
24 |
+
# Planner: NextStep
|
25 |
+
class NextStep(BaseModel):
|
26 |
+
justification: str
|
27 |
+
context: str
|
28 |
+
sub_goal: str
|
29 |
+
tool_name: str
|
30 |
+
|
31 |
+
# Executor: MemoryVerification
|
32 |
+
class MemoryVerification(BaseModel):
|
33 |
+
analysis: str
|
34 |
+
stop_signal: bool
|
35 |
+
|
36 |
+
# Executor: ToolCommand
|
37 |
+
class ToolCommand(BaseModel):
|
38 |
+
analysis: str
|
39 |
+
explanation: str
|
40 |
+
command: str
|
opentools/models/initializer.py
ADDED
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import importlib
|
4 |
+
import inspect
|
5 |
+
import traceback
|
6 |
+
from typing import Dict, Any, List, Tuple
|
7 |
+
|
8 |
+
|
9 |
+
class Initializer:
|
10 |
+
def __init__(self, enabled_tools: List[str] = [], model_string: str = None):
|
11 |
+
self.toolbox_metadata = {}
|
12 |
+
self.available_tools = []
|
13 |
+
self.enabled_tools = enabled_tools
|
14 |
+
self.model_string = model_string # llm model string
|
15 |
+
|
16 |
+
print("\nInitializing OpenTools...")
|
17 |
+
print(f"Enabled tools: {self.enabled_tools}")
|
18 |
+
print(f"LLM model string: {self.model_string}")
|
19 |
+
self._set_up_tools()
|
20 |
+
|
21 |
+
def get_project_root(self):
|
22 |
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
23 |
+
while current_dir != '/':
|
24 |
+
if os.path.exists(os.path.join(current_dir, 'opentools')):
|
25 |
+
return os.path.join(current_dir, 'opentools')
|
26 |
+
current_dir = os.path.dirname(current_dir)
|
27 |
+
raise Exception("Could not find project root")
|
28 |
+
|
29 |
+
def load_tools_and_get_metadata(self) -> Dict[str, Any]:
|
30 |
+
# Implementation of load_tools_and_get_metadata function
|
31 |
+
print("Loading tools and getting metadata...")
|
32 |
+
self.toolbox_metadata = {}
|
33 |
+
opentools_dir = self.get_project_root()
|
34 |
+
tools_dir = os.path.join(opentools_dir, 'tools')
|
35 |
+
|
36 |
+
print(f"OpenTools directory: {opentools_dir}")
|
37 |
+
print(f"Tools directory: {tools_dir}")
|
38 |
+
|
39 |
+
# Add the OpenTools directory and its parent to the Python path
|
40 |
+
sys.path.insert(0, opentools_dir)
|
41 |
+
sys.path.insert(0, os.path.dirname(opentools_dir))
|
42 |
+
print(f"Updated Python path: {sys.path}")
|
43 |
+
|
44 |
+
if not os.path.exists(tools_dir):
|
45 |
+
print(f"Error: Tools directory does not exist: {tools_dir}")
|
46 |
+
return self.toolbox_metadata
|
47 |
+
|
48 |
+
for root, dirs, files in os.walk(tools_dir):
|
49 |
+
# print(f"\nScanning directory: {root}")
|
50 |
+
if 'tool.py' in files and os.path.basename(root) in self.available_tools:
|
51 |
+
file = 'tool.py'
|
52 |
+
module_path = os.path.join(root, file)
|
53 |
+
module_name = os.path.splitext(file)[0]
|
54 |
+
relative_path = os.path.relpath(module_path, opentools_dir)
|
55 |
+
import_path = '.'.join(os.path.split(relative_path)).replace(os.sep, '.')[:-3]
|
56 |
+
|
57 |
+
print(f"\nAttempting to import: {import_path}")
|
58 |
+
try:
|
59 |
+
module = importlib.import_module(import_path)
|
60 |
+
for name, obj in inspect.getmembers(module):
|
61 |
+
if inspect.isclass(obj) and name.endswith('Tool') and name != 'BaseTool':
|
62 |
+
print(f"Found tool class: {name}")
|
63 |
+
# print(f"Class attributes: {dir(obj)}")
|
64 |
+
# print(f"Class __dict__: {obj.__dict__}")
|
65 |
+
try:
|
66 |
+
# Check if the tool requires an LLM engine
|
67 |
+
if hasattr(obj, 'require_llm_engine') and obj.require_llm_engine:
|
68 |
+
tool_instance = obj(model_string=self.model_string)
|
69 |
+
else:
|
70 |
+
tool_instance = obj()
|
71 |
+
|
72 |
+
# print(f"\nInstance attributes: {dir(tool_instance)}")
|
73 |
+
# print(f"\nInstance __dict__: {tool_instance.__dict__}")
|
74 |
+
|
75 |
+
self.toolbox_metadata[name] = {
|
76 |
+
'tool_name': getattr(tool_instance, 'tool_name', 'Unknown'),
|
77 |
+
'tool_description': getattr(tool_instance, 'tool_description', 'No description'),
|
78 |
+
'tool_version': getattr(tool_instance, 'tool_version', 'Unknown'),
|
79 |
+
'input_types': getattr(tool_instance, 'input_types', {}),
|
80 |
+
'output_type': getattr(tool_instance, 'output_type', 'Unknown'),
|
81 |
+
'demo_commands': getattr(tool_instance, 'demo_commands', []),
|
82 |
+
'user_metadata': getattr(tool_instance, 'user_metadata', {}), # NOTE: This is a placeholder for user-defined metadata
|
83 |
+
'require_llm_engine': getattr(obj, 'require_llm_engine', False),
|
84 |
+
}
|
85 |
+
print(f"\nMetadata for {name}: {self.toolbox_metadata[name]}")
|
86 |
+
except Exception as e:
|
87 |
+
print(f"Error instantiating {name}: {str(e)}")
|
88 |
+
except Exception as e:
|
89 |
+
print(f"Error loading module {module_name}: {str(e)}")
|
90 |
+
|
91 |
+
print(f"\nTotal number of tools loaded: {len(self.toolbox_metadata)}")
|
92 |
+
|
93 |
+
return self.toolbox_metadata
|
94 |
+
|
95 |
+
def run_demo_commands(self) -> List[str]:
|
96 |
+
print("\nRunning demo commands for each tool...")
|
97 |
+
self.available_tools = []
|
98 |
+
|
99 |
+
for tool_name, tool_data in self.toolbox_metadata.items():
|
100 |
+
print(f"\nChecking availability of {tool_name}...")
|
101 |
+
|
102 |
+
try:
|
103 |
+
# Import the tool module
|
104 |
+
module_name = f"tools.{tool_name.lower().replace('_tool', '')}.tool"
|
105 |
+
module = importlib.import_module(module_name)
|
106 |
+
|
107 |
+
# Get the tool class
|
108 |
+
tool_class = getattr(module, tool_name)
|
109 |
+
|
110 |
+
# Instantiate the tool
|
111 |
+
tool_instance = tool_class()
|
112 |
+
|
113 |
+
# FIXME This is a temporary workaround to avoid running demo commands
|
114 |
+
self.available_tools.append(tool_name)
|
115 |
+
|
116 |
+
# # TODO Run the first demo command if available
|
117 |
+
# demo_commands = tool_data.get('demo_commands', [])
|
118 |
+
# if demo_commands:
|
119 |
+
# print(f"Running demo command: {demo_commands[0]['command']}")
|
120 |
+
# # Extract the arguments from the demo command
|
121 |
+
# command = demo_commands[0]['command']
|
122 |
+
# args_start = command.index('(') + 1
|
123 |
+
# args_end = command.rindex(')')
|
124 |
+
# args_str = command[args_start:args_end]
|
125 |
+
|
126 |
+
# # Create a dictionary of arguments
|
127 |
+
# args_dict = eval(f"dict({args_str})")
|
128 |
+
|
129 |
+
# # Execute the demo command
|
130 |
+
# result = tool_instance.execute(**args_dict)
|
131 |
+
# print(f"Demo command executed successfully. Result: {result}")
|
132 |
+
|
133 |
+
# self.available_tools.append(tool_name)
|
134 |
+
# else:
|
135 |
+
# print(f"No demo commands available for {tool_name}")
|
136 |
+
# # If no demo commands, we'll assume the tool is available
|
137 |
+
# self.available_tools.append(tool_name)
|
138 |
+
|
139 |
+
except Exception as e:
|
140 |
+
print(f"Error checking availability of {tool_name}: {str(e)}")
|
141 |
+
print(traceback.format_exc())
|
142 |
+
|
143 |
+
# update the toolmetadata with the available tools
|
144 |
+
self.toolbox_metadata = {tool: self.toolbox_metadata[tool] for tool in self.available_tools}
|
145 |
+
print(f"\nUpdated total number of available tools: {len(self.toolbox_metadata)}")
|
146 |
+
print(f"\nAvailable tools: {self.available_tools}")
|
147 |
+
|
148 |
+
return self.available_tools
|
149 |
+
|
150 |
+
def _set_up_tools(self) -> None:
|
151 |
+
print("Setting up tools...")
|
152 |
+
|
153 |
+
# Keep enabled tools
|
154 |
+
self.available_tools = [tool.lower().replace('_tool', '') for tool in self.enabled_tools]
|
155 |
+
|
156 |
+
# Load tools and get metadata
|
157 |
+
self.load_tools_and_get_metadata()
|
158 |
+
|
159 |
+
# Run demo commands to determine available tools
|
160 |
+
self.run_demo_commands()
|
161 |
+
|
162 |
+
# Filter toolbox_metadata to include only available tools
|
163 |
+
self.toolbox_metadata = {tool: self.toolbox_metadata[tool] for tool in self.available_tools}
|
164 |
+
|
165 |
+
print(f"\nTotal number of available tools: {len(self.available_tools)}")
|
166 |
+
print(f"Available tools: {self.available_tools}")
|
167 |
+
print(f"Enabled tools: {self.enabled_tools}")
|
168 |
+
|
169 |
+
|
170 |
+
if __name__ == "__main__":
|
171 |
+
enabled_tools = ["Generalist_Solution_Generator_Tool"]
|
172 |
+
initializer = Initializer(enabled_tools=enabled_tools)
|
173 |
+
|
174 |
+
print("\nAvailable tools:")
|
175 |
+
print(initializer.available_tools)
|
176 |
+
|
177 |
+
print("\nToolbox metadata for available tools:")
|
178 |
+
print(initializer.toolbox_metadata)
|
179 |
+
|
opentools/models/memory.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, Any, List, Union, Optional
|
2 |
+
import os
|
3 |
+
|
4 |
+
class Memory:
|
5 |
+
# TODO Need to fix this to support multiple data sources (e.g. images, pdf, txt, etc.)
|
6 |
+
|
7 |
+
def __init__(self):
|
8 |
+
self.query: Optional[str] = None
|
9 |
+
self.files: List[Dict[str, str]] = []
|
10 |
+
self.actions: Dict[str, Dict[str, Any]] = {}
|
11 |
+
self._init_file_types()
|
12 |
+
|
13 |
+
def set_query(self, query: str) -> None:
|
14 |
+
if not isinstance(query, str):
|
15 |
+
raise TypeError("Query must be a string")
|
16 |
+
self.query = query
|
17 |
+
|
18 |
+
def _init_file_types(self):
|
19 |
+
self.file_types = {
|
20 |
+
'image': ['.jpg', '.jpeg', '.png', '.gif', '.bmp'],
|
21 |
+
'text': ['.txt', '.md'],
|
22 |
+
'document': ['.pdf', '.doc', '.docx'],
|
23 |
+
'code': ['.py', '.js', '.java', '.cpp', '.h'],
|
24 |
+
'data': ['.json', '.csv', '.xml'],
|
25 |
+
'spreadsheet': ['.xlsx', '.xls'],
|
26 |
+
'presentation': ['.ppt', '.pptx'],
|
27 |
+
}
|
28 |
+
self.file_type_descriptions = {
|
29 |
+
'image': "An image file ({ext} format) provided as context for the query",
|
30 |
+
'text': "A text file ({ext} format) containing additional information related to the query",
|
31 |
+
'document': "A document ({ext} format) with content relevant to the query",
|
32 |
+
'code': "A source code file ({ext} format) potentially related to the query",
|
33 |
+
'data': "A data file ({ext} format) containing structured data pertinent to the query",
|
34 |
+
'spreadsheet': "A spreadsheet file ({ext} format) with tabular data relevant to the query",
|
35 |
+
'presentation': "A presentation file ({ext} format) with slides related to the query",
|
36 |
+
}
|
37 |
+
|
38 |
+
def _get_default_description(self, file_name: str) -> str:
|
39 |
+
_, ext = os.path.splitext(file_name)
|
40 |
+
ext = ext.lower()
|
41 |
+
|
42 |
+
for file_type, extensions in self.file_types.items():
|
43 |
+
if ext in extensions:
|
44 |
+
return self.file_type_descriptions[file_type].format(ext=ext[1:])
|
45 |
+
|
46 |
+
return f"A file with {ext[1:]} extension, provided as context for the query"
|
47 |
+
|
48 |
+
def add_file(self, file_name: Union[str, List[str]], description: Union[str, List[str], None] = None) -> None:
|
49 |
+
if isinstance(file_name, str):
|
50 |
+
file_name = [file_name]
|
51 |
+
|
52 |
+
if description is None:
|
53 |
+
description = [self._get_default_description(fname) for fname in file_name]
|
54 |
+
elif isinstance(description, str):
|
55 |
+
description = [description]
|
56 |
+
|
57 |
+
if len(file_name) != len(description):
|
58 |
+
raise ValueError("The number of files and descriptions must match.")
|
59 |
+
|
60 |
+
for fname, desc in zip(file_name, description):
|
61 |
+
self.files.append({
|
62 |
+
'file_name': fname,
|
63 |
+
'description': desc
|
64 |
+
})
|
65 |
+
|
66 |
+
def add_action(self, step_count: int, tool_name: str, sub_goal: str, command: str, result: Any) -> None:
|
67 |
+
action = {
|
68 |
+
'tool_name': tool_name,
|
69 |
+
'sub_goal': sub_goal,
|
70 |
+
'command': command,
|
71 |
+
'result': result,
|
72 |
+
}
|
73 |
+
step_name = f"Action Step {step_count}"
|
74 |
+
self.actions[step_name] = action
|
75 |
+
|
76 |
+
def get_query(self) -> Optional[str]:
|
77 |
+
return self.query
|
78 |
+
|
79 |
+
def get_files(self) -> List[Dict[str, str]]:
|
80 |
+
return self.files
|
81 |
+
|
82 |
+
def get_actions(self) -> Dict[str, Dict[str, Any]]:
|
83 |
+
return self.actions
|
84 |
+
|
opentools/models/planner.py
ADDED
@@ -0,0 +1,368 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
from PIL import Image
|
4 |
+
from io import BytesIO
|
5 |
+
from typing import Dict, Any, List, Tuple
|
6 |
+
|
7 |
+
from opentools.engine.openai import ChatOpenAI
|
8 |
+
from opentools.models.memory import Memory
|
9 |
+
from opentools.models.formatters import QueryAnalysis, NextStep, MemoryVerification
|
10 |
+
|
11 |
+
class Planner:
|
12 |
+
def __init__(self, llm_engine_name: str, toolbox_metadata: dict = None, available_tools: List = None):
|
13 |
+
self.llm_engine_name = llm_engine_name
|
14 |
+
self.llm_engine_mm = ChatOpenAI(model_string=llm_engine_name, is_multimodal=True)
|
15 |
+
self.llm_engine = ChatOpenAI(model_string=llm_engine_name, is_multimodal=False)
|
16 |
+
self.toolbox_metadata = toolbox_metadata if toolbox_metadata is not None else {}
|
17 |
+
self.available_tools = available_tools if available_tools is not None else []
|
18 |
+
|
19 |
+
def get_image_info(self, image_path: str) -> Dict[str, Any]:
|
20 |
+
image_info = {}
|
21 |
+
if image_path and os.path.isfile(image_path):
|
22 |
+
image_info["image_path"] = image_path
|
23 |
+
try:
|
24 |
+
with Image.open(image_path) as img:
|
25 |
+
width, height = img.size
|
26 |
+
image_info.update({
|
27 |
+
"width": width,
|
28 |
+
"height": height
|
29 |
+
})
|
30 |
+
except Exception as e:
|
31 |
+
print(f"Error processing image file: {str(e)}")
|
32 |
+
return image_info
|
33 |
+
|
34 |
+
def get_image_info_bytes(self, bytes: str) -> Dict[str, Any]:
|
35 |
+
image_info = {}
|
36 |
+
if bytes:
|
37 |
+
try:
|
38 |
+
with Image.open(BytesIO(bytes)) as img:
|
39 |
+
width, height = img.size
|
40 |
+
image_info.update({
|
41 |
+
"image_path": 'image.jpg', # generic image name
|
42 |
+
"width": width,
|
43 |
+
"height": height
|
44 |
+
})
|
45 |
+
except Exception as e:
|
46 |
+
print(f"Error processing image bytes: {str(e)}")
|
47 |
+
return image_info
|
48 |
+
|
49 |
+
def generate_base_response(self, question: str, image: str, max_tokens: str = 4000, bytes_mode: bool = False) -> str:
|
50 |
+
if bytes_mode:
|
51 |
+
image_info = self.get_image_info_bytes(image)
|
52 |
+
else:
|
53 |
+
image_info = self.get_image_info(image)
|
54 |
+
|
55 |
+
input_data = [question]
|
56 |
+
if image_info and "image_path" in image_info and not bytes_mode:
|
57 |
+
try:
|
58 |
+
with open(image_info["image_path"], 'rb') as file:
|
59 |
+
image_bytes = file.read()
|
60 |
+
input_data.append(image_bytes)
|
61 |
+
except Exception as e:
|
62 |
+
print(f"Error reading image file: {str(e)}")
|
63 |
+
|
64 |
+
self.base_response = self.llm_engine_mm(input_data, max_tokens=max_tokens)
|
65 |
+
|
66 |
+
return self.base_response
|
67 |
+
|
68 |
+
def analyze_query(self, question: str, image: str, bytes_mode: bool = False) -> str:
|
69 |
+
if bytes_mode:
|
70 |
+
image_info = self.get_image_info_bytes(image)
|
71 |
+
else:
|
72 |
+
image_info = self.get_image_info(image)
|
73 |
+
print("image_info: ", image_info)
|
74 |
+
|
75 |
+
query_prompt = f"""
|
76 |
+
Task: Analyze the given query with accompanying inputs and determine the skills and tools needed to address it effectively.
|
77 |
+
|
78 |
+
Available tools: {self.available_tools}
|
79 |
+
|
80 |
+
Metadata for the tools: {self.toolbox_metadata}
|
81 |
+
|
82 |
+
Image: {image_info}
|
83 |
+
|
84 |
+
Query: {question}
|
85 |
+
|
86 |
+
Instructions:
|
87 |
+
1. Carefully read and understand the query and any accompanying inputs.
|
88 |
+
2. Identify the main objectives or tasks within the query.
|
89 |
+
3. List the specific skills that would be necessary to address the query comprehensively.
|
90 |
+
4. Examine the available tools in the toolbox and determine which ones might relevant and useful for addressing the query. Make sure to consider the user metadata for each tool, including limitations and potential applications (if available).
|
91 |
+
5. Provide a brief explanation for each skill and tool you've identified, describing how it would contribute to answering the query.
|
92 |
+
|
93 |
+
Your response should include:
|
94 |
+
1. A concise summary of the query's main points and objectives, as well as content in any accompanying inputs.
|
95 |
+
2. A list of required skills, with a brief explanation for each.
|
96 |
+
3. A list of relevant tools from the toolbox, with a brief explanation of how each tool would be utilized and its potential limitations.
|
97 |
+
4. Any additional considerations that might be important for addressing the query effectively.
|
98 |
+
|
99 |
+
Please present your analysis in a clear, structured format.
|
100 |
+
"""
|
101 |
+
|
102 |
+
input_data = [query_prompt]
|
103 |
+
if bytes_mode:
|
104 |
+
image_bytes = image
|
105 |
+
else:
|
106 |
+
try:
|
107 |
+
with open(image_info["image_path"], 'rb') as file:
|
108 |
+
image_bytes = file.read()
|
109 |
+
input_data.append(image_bytes)
|
110 |
+
except Exception as e:
|
111 |
+
print(f"Error reading image file: {str(e)}")
|
112 |
+
|
113 |
+
self.query_analysis = self.llm_engine_mm(input_data, response_format=QueryAnalysis)
|
114 |
+
|
115 |
+
return str(self.query_analysis).strip()
|
116 |
+
|
117 |
+
def extract_context_subgoal_and_tool(self, response: NextStep) -> Tuple[str, str, str]:
|
118 |
+
|
119 |
+
def normalize_tool_name(tool_name: str) -> str:
|
120 |
+
# Normalize the tool name to match the available tools
|
121 |
+
for tool in self.available_tools:
|
122 |
+
if tool.lower() in tool_name.lower():
|
123 |
+
return tool
|
124 |
+
return "No matched tool given: " + tool_name
|
125 |
+
|
126 |
+
try:
|
127 |
+
context = response.context.strip()
|
128 |
+
sub_goal = response.sub_goal.strip()
|
129 |
+
tool_name = normalize_tool_name(response.tool_name.strip())
|
130 |
+
return context, sub_goal, tool_name
|
131 |
+
except Exception as e:
|
132 |
+
print(f"Error extracting context, sub-goal, and tool name: {str(e)}")
|
133 |
+
return None, None, None
|
134 |
+
|
135 |
+
def generate_next_step(self, question: str, image: str, query_analysis: str, memory: Memory, step_count: int, max_step_count: int, bytes_mode: bool = False) -> NextStep:
|
136 |
+
prompt_generate_next_step = f"""
|
137 |
+
Task: Determine the optimal next step to address the given query based on the provided analysis, available tools, and previous steps taken.
|
138 |
+
|
139 |
+
Context:
|
140 |
+
Query: {question}
|
141 |
+
Image: {image if not bytes_mode else 'image.jpg'}
|
142 |
+
Query Analysis: {query_analysis}
|
143 |
+
|
144 |
+
Available Tools:
|
145 |
+
{self.available_tools}
|
146 |
+
|
147 |
+
Tool Metadata:
|
148 |
+
{self.toolbox_metadata}
|
149 |
+
|
150 |
+
Previous Steps and Their Results:
|
151 |
+
{memory.get_actions()}
|
152 |
+
|
153 |
+
Current Step: {step_count} in {max_step_count} steps
|
154 |
+
Remaining Steps: {max_step_count - step_count}
|
155 |
+
|
156 |
+
Instructions:
|
157 |
+
1. Analyze the context thoroughly, including the query, its analysis, any image, available tools and their metadata, and previous steps taken.
|
158 |
+
|
159 |
+
2. Determine the most appropriate next step by considering:
|
160 |
+
- Key objectives from the query analysis
|
161 |
+
- Capabilities of available tools
|
162 |
+
- Logical progression of problem-solving
|
163 |
+
- Outcomes from previous steps
|
164 |
+
- Current step count and remaining steps
|
165 |
+
|
166 |
+
3. Select ONE tool best suited for the next step, keeping in mind the limited number of remaining steps.
|
167 |
+
|
168 |
+
4. Formulate a specific, achievable sub-goal for the selected tool that maximizes progress towards answering the query.
|
169 |
+
|
170 |
+
Output Format:
|
171 |
+
<justification>: detailed explanation of why the selected tool is the best choice for the next step, considering the context and previous outcomes.
|
172 |
+
<context>: MUST include ALL necessary information for the tool to function, structured as follows:
|
173 |
+
* Relevant data from previous steps
|
174 |
+
* File names or paths created or used in previous steps (list EACH ONE individually)
|
175 |
+
* Variable names and their values from previous steps' results
|
176 |
+
* Any other context-specific information required by the tool
|
177 |
+
<sub_goal>: a specific, achievable objective for the tool, based on its metadata and previous outcomes. It MUST contain any involved data, file names, and variables from Previous Steps and Their Results that the tool can act upon.
|
178 |
+
<tool_name>: MUST be the exact name of a tool from the available tools list.
|
179 |
+
|
180 |
+
Rules:
|
181 |
+
- Select only ONE tool for this step.
|
182 |
+
- The sub-goal MUST directly address the query and be achievable by the selected tool.
|
183 |
+
- The Context section MUST include ALL necessary information for the tool to function, including ALL relevant file paths, data, and variables from previous steps.
|
184 |
+
- The tool name MUST exactly match one from the available tools list: {self.available_tools}.
|
185 |
+
- Avoid redundancy by considering previous steps and building on prior results.
|
186 |
+
|
187 |
+
Example (do not copy, use only as reference):
|
188 |
+
<justification>: [Your detailed explanation here]
|
189 |
+
<context>: Image path: "example/image.jpg", Previous detection results: [list of objects]
|
190 |
+
<sub_goal>: Detect and count the number of specific objects in the image "example/image.jpg"
|
191 |
+
<tool_name>: Object_Detector_Tool
|
192 |
+
"""
|
193 |
+
next_step = self.llm_engine(prompt_generate_next_step, response_format=NextStep)
|
194 |
+
return next_step
|
195 |
+
|
196 |
+
def verificate_memory(self, question: str, image: str, query_analysis: str, memory: Memory, bytes_mode: bool = False) -> MemoryVerification:
|
197 |
+
if bytes_mode:
|
198 |
+
image_info = self.get_image_info_bytes(image)
|
199 |
+
else:
|
200 |
+
image_info = self.get_image_info(image)
|
201 |
+
|
202 |
+
prompt_memory_verification = f"""
|
203 |
+
Task: Thoroughly evaluate the completeness and accuracy of the memory for fulfilling the given query, considering the potential need for additional tool usage.
|
204 |
+
|
205 |
+
Context:
|
206 |
+
Query: {question}
|
207 |
+
Image: {image_info}
|
208 |
+
Available Tools: {self.available_tools}
|
209 |
+
Toolbox Metadata: {self.toolbox_metadata}
|
210 |
+
Initial Analysis: {query_analysis}
|
211 |
+
Memory (tools used and results): {memory.get_actions()}
|
212 |
+
|
213 |
+
Detailed Instructions:
|
214 |
+
1. Carefully analyze the query, initial analysis, and image (if provided):
|
215 |
+
- Identify the main objectives of the query.
|
216 |
+
- Note any specific requirements or constraints mentioned.
|
217 |
+
- If an image is provided, consider its relevance and what information it contributes.
|
218 |
+
|
219 |
+
2. Review the available tools and their metadata:
|
220 |
+
- Understand the capabilities and limitations and best practices of each tool.
|
221 |
+
- Consider how each tool might be applicable to the query.
|
222 |
+
|
223 |
+
3. Examine the memory content in detail:
|
224 |
+
- Review each tool used and its execution results.
|
225 |
+
- Assess how well each tool's output contributes to answering the query.
|
226 |
+
|
227 |
+
4. Critical Evaluation (address each point explicitly):
|
228 |
+
a) Completeness: Does the memory fully address all aspects of the query?
|
229 |
+
- Identify any parts of the query that remain unanswered.
|
230 |
+
- Consider if all relevant information has been extracted from the image (if applicable).
|
231 |
+
|
232 |
+
b) Unused Tools: Are there any unused tools that could provide additional relevant information?
|
233 |
+
- Specify which unused tools might be helpful and why.
|
234 |
+
|
235 |
+
c) Inconsistencies: Are there any contradictions or conflicts in the information provided?
|
236 |
+
- If yes, explain the inconsistencies and suggest how they might be resolved.
|
237 |
+
|
238 |
+
d) Verification Needs: Is there any information that requires further verification due to tool limitations?
|
239 |
+
- Identify specific pieces of information that need verification and explain why.
|
240 |
+
|
241 |
+
e) Ambiguities: Are there any unclear or ambiguous results that could be clarified by using another tool?
|
242 |
+
- Point out specific ambiguities and suggest which tools could help clarify them.
|
243 |
+
|
244 |
+
5. Final Determination:
|
245 |
+
Based on your thorough analysis, decide if the memory is complete and accurate enough to generate the final output, or if additional tool usage is necessary.
|
246 |
+
|
247 |
+
Response Format:
|
248 |
+
<analysis>: Provide a detailed analysis of why the memory is sufficient. Reference specific information from the memory and explain its relevance to each aspect of the task. Address how each main point of the query has been satisfied.
|
249 |
+
<stop_signal>: Whether to stop the problem solving process and proceed to generating the final output.
|
250 |
+
* "True": if the memory is sufficient for addressing the query to proceed and no additional available tools need to be used. If ONLY manual verification without tools is needed, choose "True".
|
251 |
+
* "False": if the memory is insufficient and needs more information from additional tool usage.
|
252 |
+
"""
|
253 |
+
|
254 |
+
input_data = [prompt_memory_verification]
|
255 |
+
if image_info:
|
256 |
+
try:
|
257 |
+
with open(image_info["image_path"], 'rb') as file:
|
258 |
+
image_bytes = file.read()
|
259 |
+
input_data.append(image_bytes)
|
260 |
+
except Exception as e:
|
261 |
+
print(f"Error reading image file: {str(e)}")
|
262 |
+
|
263 |
+
stop_verification = self.llm_engine_mm(input_data, response_format=MemoryVerification)
|
264 |
+
|
265 |
+
return stop_verification
|
266 |
+
|
267 |
+
def extract_conclusion(self, response: MemoryVerification) -> str:
|
268 |
+
if response.stop_signal:
|
269 |
+
return 'STOP'
|
270 |
+
else:
|
271 |
+
return 'CONTINUE'
|
272 |
+
|
273 |
+
def generate_final_output(self, question: str, image: str, memory: Memory, bytes_mode: bool = False) -> str:
|
274 |
+
if bytes_mode:
|
275 |
+
image_info = self.get_image_info_bytes(image)
|
276 |
+
else:
|
277 |
+
image_info = self.get_image_info(image)
|
278 |
+
|
279 |
+
prompt_generate_final_output = f"""
|
280 |
+
Task: Generate the final output based on the query, image, and tools used in the process.
|
281 |
+
|
282 |
+
Context:
|
283 |
+
Query: {question}
|
284 |
+
Image: {image_info}
|
285 |
+
Actions Taken:
|
286 |
+
{memory.get_actions()}
|
287 |
+
|
288 |
+
Instructions:
|
289 |
+
1. Review the query, image, and all actions taken during the process.
|
290 |
+
2. Consider the results obtained from each tool execution.
|
291 |
+
3. Incorporate the relevant information from the memory to generate the step-by-step final output.
|
292 |
+
4. The final output should be consistent and coherent using the results from the tools.
|
293 |
+
|
294 |
+
Output Structure:
|
295 |
+
Your response should be well-organized and include the following sections:
|
296 |
+
|
297 |
+
1. Summary:
|
298 |
+
- Provide a brief overview of the query and the main findings.
|
299 |
+
|
300 |
+
2. Detailed Analysis:
|
301 |
+
- Break down the process of answering the query step-by-step.
|
302 |
+
- For each step, mention the tool used, its purpose, and the key results obtained.
|
303 |
+
- Explain how each step contributed to addressing the query.
|
304 |
+
|
305 |
+
3. Key Findings:
|
306 |
+
- List the most important discoveries or insights gained from the analysis.
|
307 |
+
- Highlight any unexpected or particularly interesting results.
|
308 |
+
|
309 |
+
4. Answer to the Query:
|
310 |
+
- Directly address the original question with a clear and concise answer.
|
311 |
+
- If the query has multiple parts, ensure each part is answered separately.
|
312 |
+
|
313 |
+
5. Additional Insights (if applicable):
|
314 |
+
- Provide any relevant information or insights that go beyond the direct answer to the query.
|
315 |
+
- Discuss any limitations or areas of uncertainty in the analysis.
|
316 |
+
|
317 |
+
6. Conclusion:
|
318 |
+
- Summarize the main points and reinforce the answer to the query.
|
319 |
+
- If appropriate, suggest potential next steps or areas for further investigation.
|
320 |
+
"""
|
321 |
+
|
322 |
+
input_data = [prompt_generate_final_output]
|
323 |
+
if image_info:
|
324 |
+
try:
|
325 |
+
with open(image_info["image_path"], 'rb') as file:
|
326 |
+
image_bytes = file.read()
|
327 |
+
input_data.append(image_bytes)
|
328 |
+
except Exception as e:
|
329 |
+
print(f"Error reading image file: {str(e)}")
|
330 |
+
|
331 |
+
final_output = self.llm_engine_mm(input_data)
|
332 |
+
|
333 |
+
return final_output
|
334 |
+
|
335 |
+
|
336 |
+
def generate_direct_output(self, question: str, image: str, memory: Memory, bytes_mode: bool = False) -> str:
|
337 |
+
if bytes_mode:
|
338 |
+
image_info = self.get_image_info_bytes(image)
|
339 |
+
else:
|
340 |
+
image_info = self.get_image_info(image)
|
341 |
+
|
342 |
+
prompt_generate_final_output = f"""
|
343 |
+
Context:
|
344 |
+
Query: {question}
|
345 |
+
Image: {image_info}
|
346 |
+
Initial Analysis:
|
347 |
+
{self.query_analysis}
|
348 |
+
Actions Taken:
|
349 |
+
{memory.get_actions()}
|
350 |
+
|
351 |
+
Please generate the concise output based on the query, image information, initial analysis, and actions taken. Break down the process into clear, logical, and conherent steps. Conclude with a precise and direct answer to the query.
|
352 |
+
|
353 |
+
Answer:
|
354 |
+
"""
|
355 |
+
|
356 |
+
input_data = [prompt_generate_final_output]
|
357 |
+
if image_info:
|
358 |
+
try:
|
359 |
+
with open(image_info["image_path"], 'rb') as file:
|
360 |
+
image_bytes = file.read()
|
361 |
+
input_data.append(image_bytes)
|
362 |
+
except Exception as e:
|
363 |
+
print(f"Error reading image file: {str(e)}")
|
364 |
+
|
365 |
+
final_output = self.llm_engine_mm(input_data)
|
366 |
+
|
367 |
+
return final_output
|
368 |
+
|
opentools/models/utlis.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# import json
|
2 |
+
|
3 |
+
# def truncate_result(result, max_length: int = 100000, truncation_indicator: str = "...") -> str:
|
4 |
+
# """
|
5 |
+
# Truncate the result to specified length while preserving JSON structure when possible.
|
6 |
+
|
7 |
+
# Args:
|
8 |
+
# result: The result to truncate (can be str, list, dict, or other types)
|
9 |
+
# max_length: Maximum length of the output string (default: 1000)
|
10 |
+
# truncation_indicator: String to indicate truncation (default: "...")
|
11 |
+
|
12 |
+
# Returns:
|
13 |
+
# str: Truncated string representation of the result
|
14 |
+
# """
|
15 |
+
# if isinstance(result, (dict, list)):
|
16 |
+
# try:
|
17 |
+
# result_str = json.dumps(result, ensure_ascii=False)
|
18 |
+
# except:
|
19 |
+
# result_str = str(result)
|
20 |
+
# else:
|
21 |
+
# result_str = str(result)
|
22 |
+
|
23 |
+
# indicator_length = len(truncation_indicator)
|
24 |
+
|
25 |
+
# if len(result_str) > max_length:
|
26 |
+
# # For JSON-like strings, try to find the last complete structure
|
27 |
+
# if result_str.startswith('{') or result_str.startswith('['):
|
28 |
+
# # Find last complete element
|
29 |
+
# pos = max_length - indicator_length
|
30 |
+
# while pos > 0 and not (
|
31 |
+
# result_str[pos] in ',]}' and
|
32 |
+
# result_str[pos:].count('"') % 2 == 0
|
33 |
+
# ):
|
34 |
+
# pos -= 1
|
35 |
+
# if pos > 0:
|
36 |
+
# return result_str[:pos + 1] + truncation_indicator
|
37 |
+
|
38 |
+
# # Default truncation if not JSON or no suitable truncation point found
|
39 |
+
# return result_str[:max_length - indicator_length] + truncation_indicator
|
40 |
+
|
41 |
+
# return result_str
|
42 |
+
|
43 |
+
def make_json_serializable(obj):
|
44 |
+
if isinstance(obj, (str, int, float, bool, type(None))):
|
45 |
+
return obj
|
46 |
+
elif isinstance(obj, dict):
|
47 |
+
return {make_json_serializable(key): make_json_serializable(value) for key, value in obj.items()}
|
48 |
+
elif isinstance(obj, list):
|
49 |
+
return [make_json_serializable(element) for element in obj]
|
50 |
+
elif hasattr(obj, '__dict__'):
|
51 |
+
return make_json_serializable(obj.__dict__)
|
52 |
+
else:
|
53 |
+
return str(obj)
|
54 |
+
|
55 |
+
|
56 |
+
def make_json_serializable_truncated(obj, max_length: int = 100000):
|
57 |
+
if isinstance(obj, (int, float, bool, type(None))):
|
58 |
+
if isinstance(obj, (int, float)) and len(str(obj)) > max_length:
|
59 |
+
return str(obj)[:max_length - 3] + "..."
|
60 |
+
return obj
|
61 |
+
elif isinstance(obj, str):
|
62 |
+
return obj if len(obj) <= max_length else obj[:max_length - 3] + "..."
|
63 |
+
elif isinstance(obj, dict):
|
64 |
+
return {make_json_serializable_truncated(key, max_length): make_json_serializable_truncated(value, max_length)
|
65 |
+
for key, value in obj.items()}
|
66 |
+
elif isinstance(obj, list):
|
67 |
+
return [make_json_serializable_truncated(element, max_length) for element in obj]
|
68 |
+
elif hasattr(obj, '__dict__'):
|
69 |
+
return make_json_serializable_truncated(obj.__dict__, max_length)
|
70 |
+
else:
|
71 |
+
result = str(obj)
|
72 |
+
return result if len(result) <= max_length else result[:max_length - 3] + "..."
|
73 |
+
|
opentools/tools/README.md
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
## Testing the Tools
|
3 |
+
|
4 |
+
To test the text detection tool, follow these steps:
|
5 |
+
|
6 |
+
1. **Navigate to the Project Directory:**
|
7 |
+
|
8 |
+
Change your current directory to where the tools are located. Replace `your_path` with the actual path to your project directory.
|
9 |
+
|
10 |
+
```sh
|
11 |
+
cd your_path/toolbox-agent/opentools
|
12 |
+
```
|
13 |
+
|
14 |
+
2. **Run the Text Detection Tool:**
|
15 |
+
|
16 |
+
```sh
|
17 |
+
cd toolbox-agent
|
18 |
+
export PYTHONPATH=$(pwd)
|
19 |
+
```
|
20 |
+
|
21 |
+
|
22 |
+
Execute the tool using the following command:
|
23 |
+
|
24 |
+
```sh
|
25 |
+
python tools/text_detector/tool.py
|
26 |
+
|
27 |
+
python tools/object_detector/tool.py
|
28 |
+
|
29 |
+
```
|
30 |
+
|
31 |
+
## File Structure
|
32 |
+
|
33 |
+
The project is organized as follows:
|
34 |
+
|
35 |
+
```sh
|
36 |
+
├── __init__.py # Initializes the tools package and possibly exposes submodules
|
37 |
+
├── base.py # Base class for tools, providing common functionality
|
38 |
+
├── text_detector/ # Directory for the text detection tool
|
39 |
+
│ ├── readme.md # Documentation for the text detection tool
|
40 |
+
│ └── tool.py # Implementation of the text detection tool
|
41 |
+
├── object_detector/ # Directory for the object detection tool
|
42 |
+
│ ├── readme.md # Documentation for the object detection tool
|
43 |
+
│ └── tool.py # Implementation of the object detection tool
|
44 |
+
```
|
opentools/tools/__init__.py
ADDED
File without changes
|
opentools/tools/base.py
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# opentools/tools/base.py
|
2 |
+
|
3 |
+
from opentools.engine.openai import ChatOpenAI
|
4 |
+
|
5 |
+
class BaseTool:
|
6 |
+
"""
|
7 |
+
A base class for building tool classes that perform specific tasks, such as image processing or text detection.
|
8 |
+
"""
|
9 |
+
|
10 |
+
require_llm_engine = False # Default is False, tools that need LLM should set this to True
|
11 |
+
|
12 |
+
def __init__(self, tool_name=None, tool_description=None, tool_version=None, input_types=None, output_type=None, demo_commands=None, output_dir=None, user_metadata=None, model_string=None):
|
13 |
+
"""
|
14 |
+
Initialize the base tool with optional metadata.
|
15 |
+
|
16 |
+
Parameters:
|
17 |
+
tool_name (str): The name of the tool.
|
18 |
+
tool_description (str): A description of the tool.
|
19 |
+
tool_version (str): The version of the tool.
|
20 |
+
input_types (dict): The expected input types for the tool.
|
21 |
+
output_type (str): The expected output type for the tool.
|
22 |
+
demo_commands (list): A list of example commands for using the tool.
|
23 |
+
output_dir (str): The directory where the tool should save its output (optional).
|
24 |
+
user_metadata (dict): Additional metadata specific to user needs (optional).
|
25 |
+
model_string (str): The model string for the LLM engine (optional, only used if require_llm_engine is True).
|
26 |
+
"""
|
27 |
+
self.tool_name = tool_name
|
28 |
+
self.tool_description = tool_description
|
29 |
+
self.tool_version = tool_version
|
30 |
+
self.input_types = input_types
|
31 |
+
self.output_type = output_type
|
32 |
+
self.demo_commands = demo_commands
|
33 |
+
self.output_dir = output_dir
|
34 |
+
self.user_metadata = user_metadata
|
35 |
+
self.model_string = model_string
|
36 |
+
|
37 |
+
def set_metadata(self, tool_name, tool_description, tool_version, input_types, output_type, demo_commands, user_metadata=None):
|
38 |
+
"""
|
39 |
+
Set the metadata for the tool.
|
40 |
+
|
41 |
+
Parameters:
|
42 |
+
tool_name (str): The name of the tool.
|
43 |
+
tool_description (str): A description of the tool.
|
44 |
+
tool_version (str): The version of the tool.
|
45 |
+
input_types (dict): The expected input types for the tool.
|
46 |
+
output_type (str): The expected output type for the tool.
|
47 |
+
demo_commands (list): A list of example commands for using the tool.
|
48 |
+
user_metadata (dict): Additional metadata specific to user needs (optional).
|
49 |
+
"""
|
50 |
+
self.tool_name = tool_name
|
51 |
+
self.tool_description = tool_description
|
52 |
+
self.tool_version = tool_version
|
53 |
+
self.input_types = input_types
|
54 |
+
self.output_type = output_type
|
55 |
+
self.demo_commands = demo_commands
|
56 |
+
self.user_metadata = user_metadata
|
57 |
+
|
58 |
+
def get_metadata(self):
|
59 |
+
"""
|
60 |
+
Returns the metadata for the tool.
|
61 |
+
|
62 |
+
Returns:
|
63 |
+
dict: A dictionary containing the tool's metadata.
|
64 |
+
"""
|
65 |
+
metadata = {
|
66 |
+
"tool_name": self.tool_name,
|
67 |
+
"tool_description": self.tool_description,
|
68 |
+
"tool_version": self.tool_version,
|
69 |
+
"input_types": self.input_types,
|
70 |
+
"output_type": self.output_type,
|
71 |
+
"demo_commands": self.demo_commands,
|
72 |
+
"require_llm_engine": self.require_llm_engine,
|
73 |
+
}
|
74 |
+
if self.user_metadata:
|
75 |
+
metadata["user_metadata"] = self.user_metadata
|
76 |
+
return metadata
|
77 |
+
|
78 |
+
def set_custom_output_dir(self, output_dir):
|
79 |
+
"""
|
80 |
+
Set a custom output directory for the tool.
|
81 |
+
|
82 |
+
Parameters:
|
83 |
+
output_dir (str): The new output directory path.
|
84 |
+
"""
|
85 |
+
self.output_dir = output_dir
|
86 |
+
|
87 |
+
def set_llm_engine(self, model_string):
|
88 |
+
"""
|
89 |
+
Set the LLM engine for the tool.
|
90 |
+
|
91 |
+
Parameters:
|
92 |
+
model_string (str): The model string for the LLM engine.
|
93 |
+
"""
|
94 |
+
self.model_string = model_string
|
95 |
+
|
96 |
+
def execute(self, *args, **kwargs):
|
97 |
+
"""
|
98 |
+
Execute the tool's main functionality. This method should be overridden by subclasses.
|
99 |
+
|
100 |
+
Raises:
|
101 |
+
NotImplementedError: If the subclass does not implement this method.
|
102 |
+
"""
|
103 |
+
raise NotImplementedError("Subclasses must implement the execute method.")
|
opentools/tools/generalist_solution_generator/examples/mathvista_113.png
ADDED
![]() |
opentools/tools/generalist_solution_generator/tool.py
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from opentools.tools.base import BaseTool
|
3 |
+
from opentools.engine.openai import ChatOpenAI
|
4 |
+
|
5 |
+
class Generalist_Solution_Generator_Tool(BaseTool):
|
6 |
+
require_llm_engine = True
|
7 |
+
|
8 |
+
def __init__(self, model_string="gpt-4o-mini"):
|
9 |
+
super().__init__(
|
10 |
+
tool_name="Generalist_Solution_Generator_Tool",
|
11 |
+
tool_description="A generalized tool that takes query from the user as prompt, and answers the question step by step to the best of its ability. It can also accept an image.",
|
12 |
+
tool_version="1.0.0",
|
13 |
+
input_types={
|
14 |
+
"prompt": "str - The prompt that includes query from the user to guide the agent to generate response (Examples: 'Describe this image in detail').",
|
15 |
+
"image": "str - The path to the image file if applicable (default: None).",
|
16 |
+
},
|
17 |
+
output_type="str - The generated response to the original query prompt",
|
18 |
+
demo_commands=[
|
19 |
+
{
|
20 |
+
"command": 'execution = tool.execute(prompt="Summarize the following text in a few lines")',
|
21 |
+
"description": "Generate a short summary given the prompt from the user."
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"command": 'execution = tool.execute(prompt="Explain the mood of this scene.", image="path/to/image1.png")',
|
25 |
+
"description": "Generate a caption focusing on the mood using a specific prompt and image."
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"command": 'execution = tool.execute(prompt="Give your best coordinate estimate for the pacemaker in the image and return (x1, y1, x2, y2)", image="path/to/image2.png")',
|
29 |
+
"description": "Generate bounding box coordinates given the image and prompt from the user. The format should be (x1, y1, x2, y2)."
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"command": 'execution = tool.execute(prompt="Is the number of tiny objects that are behind the small metal jet less than the number of tiny things left of the tiny sedan?", image="path/to/image2.png")',
|
33 |
+
"description": "Answer a question step by step given the image."
|
34 |
+
}
|
35 |
+
],
|
36 |
+
# # vesion 0 (bowen) (Generalist: %; 6 Tools: %; Generalist + 6 Tools: %)
|
37 |
+
# user_metadata = {
|
38 |
+
# "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
|
39 |
+
# "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge. For optimal results: 1) Provide clear, specific prompts. 2) Use it as a starting point for complex tasks, then refine with specialized tools. 3) Verify important information from its responses. 4) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
|
40 |
+
# }
|
41 |
+
# vesion 2 (Generalist: 68%; 6 Tools: 66%; Generalist + 6 Tools: 54%)
|
42 |
+
user_metadata = {
|
43 |
+
"limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
|
44 |
+
"best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n"
|
45 |
+
"1) Provide clear, specific prompts.\n"
|
46 |
+
"2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n"
|
47 |
+
"3) For complex queries, break them down into subtasks and use the tool multiple times.\n"
|
48 |
+
"4) Use it as a starting point for complex tasks, then refine with specialized tools.\n"
|
49 |
+
"5) Verify important information from its responses.\n"
|
50 |
+
"6) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
|
51 |
+
}
|
52 |
+
# # vesion 6 (Generalist: 70%; 6 Tools: 66%; Generalist + 6 Tools: 60%)
|
53 |
+
# user_metadata = {
|
54 |
+
# "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
|
55 |
+
# "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n"
|
56 |
+
# "1) Provide clear, specific prompts.\n"
|
57 |
+
# "2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n"
|
58 |
+
# "3) For complex queries, break them down into smaller, focused sub-tasks and use the tool multiple times.\n"
|
59 |
+
# "4) Use it as a starting point for complex tasks, then refine with specialized tools.\n"
|
60 |
+
# "5) Verify important information from its responses.\n"
|
61 |
+
# "6) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
|
62 |
+
# }
|
63 |
+
# # vesion 8 (Generalist: 68%; 6 Tools: 66%; Generalist + 6 Tools: 60%)
|
64 |
+
# user_metadata = {
|
65 |
+
# "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
|
66 |
+
# "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n"
|
67 |
+
# "1) Provide clear, specific prompts.\n"
|
68 |
+
# "2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n"
|
69 |
+
# "3) Use it as a starting point for complex tasks, then refine with specialized tools.\n"
|
70 |
+
# "4) Verify important information from its responses.\n"
|
71 |
+
# "5) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
|
72 |
+
# }
|
73 |
+
)
|
74 |
+
self.model_string = model_string
|
75 |
+
|
76 |
+
def execute(self, prompt, image=None):
|
77 |
+
|
78 |
+
print(f"\nInitializing Generalist Tool with model: {self.model_string}")
|
79 |
+
multimodal = True if image else False
|
80 |
+
llm_engine = ChatOpenAI(model_string=self.model_string, is_multimodal=multimodal)
|
81 |
+
|
82 |
+
try:
|
83 |
+
input_data = [prompt]
|
84 |
+
if multimodal:
|
85 |
+
if not os.path.isfile(image):
|
86 |
+
return "Error: Invalid image file path."
|
87 |
+
try:
|
88 |
+
with open(image, 'rb') as file:
|
89 |
+
image_bytes = file.read()
|
90 |
+
input_data.append(image_bytes)
|
91 |
+
except Exception as e:
|
92 |
+
return f"Error reading image file: {str(e)}"
|
93 |
+
|
94 |
+
response = llm_engine(input_data)
|
95 |
+
else:
|
96 |
+
response = llm_engine(input_data[0])
|
97 |
+
return response
|
98 |
+
except Exception as e:
|
99 |
+
return f"Error generating response: {str(e)}"
|
100 |
+
|
101 |
+
def get_metadata(self):
|
102 |
+
metadata = super().get_metadata()
|
103 |
+
return metadata
|
104 |
+
|
105 |
+
if __name__ == "__main__":
|
106 |
+
# Test command:
|
107 |
+
"""
|
108 |
+
Run the following commands in the terminal to test the script:
|
109 |
+
|
110 |
+
cd opentools
|
111 |
+
python tools/default/tool.py
|
112 |
+
"""
|
113 |
+
|
114 |
+
# Get the directory of the current script
|
115 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
116 |
+
print(f"Script directory: {script_dir}")
|
117 |
+
|
118 |
+
# Example usage of the Generalist_Tool
|
119 |
+
tool = Generalist_Solution_Generator_Tool()
|
120 |
+
# tool = Generalist_Solution_Generator_Tool(model_string="gpt-4o-mini")
|
121 |
+
# tool = Generalist_Solution_Generator_Tool(model_string="gpt-4o")
|
122 |
+
|
123 |
+
# Get tool metadata
|
124 |
+
metadata = tool.get_metadata()
|
125 |
+
print(metadata)
|
126 |
+
|
127 |
+
# Construct the full path to the image using the script's directory
|
128 |
+
relative_image_path = "../../tasks/minitoolbench/data/mathvista_113.png"
|
129 |
+
relative_image_path = "examples/mathvista_113.png"
|
130 |
+
image_path = os.path.join(script_dir, relative_image_path)
|
131 |
+
prompt = "Describe the image in detail."
|
132 |
+
|
133 |
+
# Execute the tool with default prompt
|
134 |
+
try:
|
135 |
+
execution = tool.execute(prompt=prompt, image=image_path)
|
136 |
+
# execution = tool.execute(prompt=prompt)
|
137 |
+
print("Generated Response:")
|
138 |
+
print(execution)
|
139 |
+
except Exception as e:
|
140 |
+
print(f"Execution failed: {e}")
|
141 |
+
|
142 |
+
print("Done!")
|
requirements.txt
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
easyocr==1.7.1
|
2 |
+
openai==1.58.1
|
3 |
+
python-dotenv==1.0.1
|
4 |
+
wikipedia==1.4.0
|
5 |
+
pillow==10.4.0
|
6 |
+
platformdirs==4.2.2
|
7 |
+
sympy==1.13.2
|
8 |
+
tenacity==9.0.0
|
9 |
+
diskcache==5.6.3
|
10 |
+
transformers==4.44.2
|
11 |
+
pymed==0.8.9
|
12 |
+
metapub==0.5.12
|
13 |
+
-e ./opentools
|
setup.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from setuptools import setup, find_packages
|
2 |
+
|
3 |
+
setup(
|
4 |
+
name='opentools',
|
5 |
+
version='0.1.0',
|
6 |
+
# description='A flexible and versatile toolbox agent framework for complex tasks in both general and scientific scenarios.',
|
7 |
+
# long_description=open('README.md').read(),
|
8 |
+
# long_description_content_type='text/markdown',
|
9 |
+
# author='Pan Lu, Bowen Chen, Sheng Liu',
|
10 |
+
# author_email='[email protected]',
|
11 |
+
# url='', # You can add a GitHub or project URL here
|
12 |
+
packages=find_packages(),
|
13 |
+
# install_requires=open('requirements.txt').read().splitlines(),
|
14 |
+
# classifiers=[
|
15 |
+
# 'Programming Language :: Python :: 3',
|
16 |
+
# 'License :: OSI Approved :: MIT License',
|
17 |
+
# 'Operating System :: OS Independent',
|
18 |
+
# ],
|
19 |
+
# python_requires='>=3.10',
|
20 |
+
)
|