bowenchen118 commited on
Commit
d2beadd
·
1 Parent(s): f4ec98b
app.py CHANGED
@@ -1,57 +1,24 @@
1
- import gradio as gr
2
  import os
3
- import datetime
 
 
 
 
 
4
  from PIL import Image
 
 
 
 
 
 
 
5
 
6
- # Create a directory for uploaded images
7
- os.makedirs("uploaded_images", exist_ok=True)
8
-
9
- def save_image(image):
10
- # Generate a timestamped filename
11
- timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
12
- file_path = f"uploaded_images/image_{timestamp}.png"
13
-
14
- # Save the image
15
- image.save(file_path)
16
-
17
- # Check if saved
18
- print(f"Image saved to: {file_path} | Exists: {os.path.exists(file_path)}")
19
-
20
- # Open image and print dims
21
- img = Image.open(file_path)
22
- print(f"Image dimensions: {img.size}")
23
-
24
- return f"Image saved to: {file_path}"
25
-
26
- demo = gr.Interface(
27
- fn=save_image,
28
- inputs=gr.Image(type="pil"), # Accepts PIL Image objects
29
- outputs="text"
30
- )
31
-
32
- demo.launch()
33
-
34
- # import os
35
- # import sys
36
- # import json
37
- # import argparse
38
- # import time
39
- # import io
40
- # import uuid
41
- # from PIL import Image
42
- # from typing import List, Dict, Any, Iterator
43
- # import gradio as gr
44
-
45
- # # Add the project root to the Python path
46
- # current_dir = os.path.dirname(os.path.abspath(__file__))
47
- # project_root = os.path.dirname(os.path.dirname(os.path.dirname(current_dir)))
48
- # sys.path.insert(0, project_root)
49
-
50
- # from opentools.models.initializer import Initializer
51
- # from opentools.models.planner import Planner
52
- # from opentools.models.memory import Memory
53
- # from opentools.models.executor import Executor
54
- # from opentools.models.utlis import make_json_serializable
55
 
56
  # solver = None
57
 
 
 
1
  import os
2
+ import sys
3
+ import json
4
+ import argparse
5
+ import time
6
+ import io
7
+ import uuid
8
  from PIL import Image
9
+ from typing import List, Dict, Any, Iterator
10
+ import gradio as gr
11
+
12
+ # Add the project root to the Python path
13
+ current_dir = os.path.dirname(os.path.abspath(__file__))
14
+ project_root = os.path.dirname(os.path.dirname(os.path.dirname(current_dir)))
15
+ sys.path.insert(0, project_root)
16
 
17
+ from .opentools.models.initializer import Initializer
18
+ from .opentools.models.planner import Planner
19
+ from .opentools.models.memory import Memory
20
+ from .opentools.models.executor import Executor
21
+ from .opentools.models.utlis import make_json_serializable
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  # solver = None
24
 
opentools/__init__.py ADDED
File without changes
opentools/engine/base.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import hashlib
2
+ import diskcache as dc
3
+ from abc import ABC, abstractmethod
4
+
5
+ class EngineLM(ABC):
6
+ system_prompt: str = "You are a helpful, creative, and smart assistant."
7
+ model_string: str
8
+ @abstractmethod
9
+ def generate(self, prompt, system_prompt=None, **kwargs):
10
+ pass
11
+
12
+ def __call__(self, *args, **kwargs):
13
+ pass
14
+
15
+
16
+ class CachedEngine:
17
+ def __init__(self, cache_path):
18
+ super().__init__()
19
+ self.cache_path = cache_path
20
+ self.cache = dc.Cache(cache_path)
21
+
22
+ def _hash_prompt(self, prompt: str):
23
+ return hashlib.sha256(f"{prompt}".encode()).hexdigest()
24
+
25
+ def _check_cache(self, prompt: str):
26
+ if prompt in self.cache:
27
+ return self.cache[prompt]
28
+ else:
29
+ return None
30
+
31
+ def _save_cache(self, prompt: str, response: str):
32
+ self.cache[prompt] = response
33
+
34
+ def __getstate__(self):
35
+ # Remove the cache from the state before pickling
36
+ state = self.__dict__.copy()
37
+ del state['cache']
38
+ return state
39
+
40
+ def __setstate__(self, state):
41
+ # Restore the cache after unpickling
42
+ self.__dict__.update(state)
43
+ self.cache = dc.Cache(self.cache_path)
opentools/engine/openai.py ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ try:
2
+ from openai import OpenAI
3
+ except ImportError:
4
+ raise ImportError("If you'd like to use OpenAI models, please install the openai package by running `pip install openai`, and add 'OPENAI_API_KEY' to your environment variables.")
5
+
6
+ import os
7
+ import json
8
+ import base64
9
+ import platformdirs
10
+ from tenacity import (
11
+ retry,
12
+ stop_after_attempt,
13
+ wait_random_exponential,
14
+ )
15
+ from typing import List, Union
16
+
17
+ from .base import EngineLM, CachedEngine
18
+
19
+ import openai
20
+
21
+ from dotenv import load_dotenv
22
+ load_dotenv()
23
+
24
+ # Define global constant for structured models
25
+ # https://platform.openai.com/docs/guides/structured-outputs
26
+ # https://cookbook.openai.com/examples/structured_outputs_intro
27
+ from pydantic import BaseModel
28
+
29
+ class DefaultFormat(BaseModel):
30
+ response: str
31
+
32
+ # Define global constant for structured models
33
+ OPENAI_STRUCTURED_MODELS = ['gpt-4o', 'gpt-4o-2024-08-06','gpt-4o-mini', 'gpt-4o-mini-2024-07-18']
34
+
35
+
36
+ class ChatOpenAI(EngineLM, CachedEngine):
37
+ DEFAULT_SYSTEM_PROMPT = "You are a helpful, creative, and smart assistant."
38
+
39
+ def __init__(
40
+ self,
41
+ model_string="gpt-4o-mini-2024-07-18",
42
+ system_prompt=DEFAULT_SYSTEM_PROMPT,
43
+ is_multimodal: bool=False,
44
+ # enable_cache: bool=True,
45
+ enable_cache: bool=False, # NOTE: disable cache for now
46
+ **kwargs):
47
+ """
48
+ :param model_string:
49
+ :param system_prompt:
50
+ :param is_multimodal:
51
+ """
52
+ if enable_cache:
53
+ root = platformdirs.user_cache_dir("opentools")
54
+ cache_path = os.path.join(root, f"cache_openai_{model_string}.db")
55
+ # For example, cache_path = /root/.cache/opentools/cache_openai_gpt-4o-mini.db
56
+ # print(f"Cache path: {cache_path}")
57
+
58
+ self.image_cache_dir = os.path.join(root, "image_cache")
59
+ os.makedirs(self.image_cache_dir, exist_ok=True)
60
+
61
+ super().__init__(cache_path=cache_path)
62
+
63
+ self.system_prompt = system_prompt
64
+ if os.getenv("OPENAI_API_KEY") is None:
65
+ raise ValueError("Please set the OPENAI_API_KEY environment variable if you'd like to use OpenAI models.")
66
+
67
+ self.client = OpenAI(
68
+ api_key=os.getenv("OPENAI_API_KEY"),
69
+ )
70
+ self.model_string = model_string
71
+ self.is_multimodal = is_multimodal
72
+ self.enable_cache = enable_cache
73
+
74
+ if enable_cache:
75
+ print(f"!! Cache enabled for model: {self.model_string}")
76
+ else:
77
+ print(f"!! Cache disabled for model: {self.model_string}")
78
+
79
+ @retry(wait=wait_random_exponential(min=1, max=5), stop=stop_after_attempt(5))
80
+ def generate(self, content: Union[str, List[Union[str, bytes]]], system_prompt=None, **kwargs):
81
+ try:
82
+ # Print retry attempt information
83
+ attempt_number = self.generate.retry.statistics.get('attempt_number', 0) + 1
84
+ if attempt_number > 1:
85
+ print(f"Attempt {attempt_number} of 5")
86
+
87
+ if isinstance(content, str):
88
+ return self._generate_text(content, system_prompt=system_prompt, **kwargs)
89
+
90
+ elif isinstance(content, list):
91
+ if (not self.is_multimodal):
92
+ raise NotImplementedError("Multimodal generation is only supported for GPT-4 models.")
93
+
94
+ return self._generate_multimodal(content, system_prompt=system_prompt, **kwargs)
95
+
96
+ except openai.LengthFinishReasonError as e:
97
+ print(f"Token limit exceeded: {str(e)}")
98
+ print(f"Tokens used - Completion: {e.completion.usage.completion_tokens}, Prompt: {e.completion.usage.prompt_tokens}, Total: {e.completion.usage.total_tokens}")
99
+ return {
100
+ "error": "token_limit_exceeded",
101
+ "message": str(e),
102
+ "details": {
103
+ "completion_tokens": e.completion.usage.completion_tokens,
104
+ "prompt_tokens": e.completion.usage.prompt_tokens,
105
+ "total_tokens": e.completion.usage.total_tokens
106
+ }
107
+ }
108
+ except openai.RateLimitError as e:
109
+ print(f"Rate limit error encountered: {str(e)}")
110
+ return {
111
+ "error": "rate_limit",
112
+ "message": str(e),
113
+ "details": getattr(e, 'args', None)
114
+ }
115
+ except Exception as e:
116
+ print(f"Error in generate method: {str(e)}")
117
+ print(f"Error type: {type(e).__name__}")
118
+ print(f"Error details: {e.args}")
119
+ return {
120
+ "error": type(e).__name__,
121
+ "message": str(e),
122
+ "details": getattr(e, 'args', None)
123
+ }
124
+
125
+ def _generate_text(
126
+ self, prompt, system_prompt=None, temperature=0, max_tokens=4000, top_p=0.99, response_format=None
127
+ ):
128
+
129
+ sys_prompt_arg = system_prompt if system_prompt else self.system_prompt
130
+
131
+ if self.enable_cache:
132
+ cache_key = sys_prompt_arg + prompt
133
+ cache_or_none = self._check_cache(cache_key)
134
+ if cache_or_none is not None:
135
+ return cache_or_none
136
+
137
+ if self.model_string in ['o1', 'o1-mini']: # only supports base response currently
138
+ # print(f"Using structured model: {self.model_string}")
139
+ response = self.client.beta.chat.completions.parse(
140
+ model=self.model_string,
141
+ messages=[
142
+ {"role": "user", "content": prompt},
143
+ ],
144
+ max_completion_tokens=max_tokens
145
+ )
146
+ if response.choices[0].finishreason == "length":
147
+ response = "Token limit exceeded"
148
+ else:
149
+ response = response.choices[0].message.parsed
150
+ elif self.model_string in OPENAI_STRUCTURED_MODELS and response_format is not None:
151
+ # print(f"Using structured model: {self.model_string}")
152
+ response = self.client.beta.chat.completions.parse(
153
+ model=self.model_string,
154
+ messages=[
155
+ {"role": "system", "content": sys_prompt_arg},
156
+ {"role": "user", "content": prompt},
157
+ ],
158
+ frequency_penalty=0,
159
+ presence_penalty=0,
160
+ stop=None,
161
+ temperature=temperature,
162
+ max_tokens=max_tokens,
163
+ top_p=top_p,
164
+ response_format=response_format
165
+ )
166
+ response = response.choices[0].message.parsed
167
+ else:
168
+ # print(f"Using non-structured model: {self.model_string}")
169
+ response = self.client.chat.completions.create(
170
+ model=self.model_string,
171
+ messages=[
172
+ {"role": "system", "content": sys_prompt_arg},
173
+ {"role": "user", "content": prompt},
174
+ ],
175
+ frequency_penalty=0,
176
+ presence_penalty=0,
177
+ stop=None,
178
+ temperature=temperature,
179
+ max_tokens=max_tokens,
180
+ top_p=top_p,
181
+ )
182
+ response = response.choices[0].message.content
183
+
184
+ if self.enable_cache:
185
+ self._save_cache(cache_key, response)
186
+ return response
187
+
188
+ def __call__(self, prompt, **kwargs):
189
+ return self.generate(prompt, **kwargs)
190
+
191
+ def _format_content(self, content: List[Union[str, bytes]]) -> List[dict]:
192
+ formatted_content = []
193
+ for item in content:
194
+ if isinstance(item, bytes):
195
+ base64_image = base64.b64encode(item).decode('utf-8')
196
+ formatted_content.append({
197
+ "type": "image_url",
198
+ "image_url": {
199
+ "url": f"data:image/jpeg;base64,{base64_image}"
200
+ }
201
+ })
202
+ elif isinstance(item, str):
203
+ formatted_content.append({
204
+ "type": "text",
205
+ "text": item
206
+ })
207
+ else:
208
+ raise ValueError(f"Unsupported input type: {type(item)}")
209
+ return formatted_content
210
+
211
+ def _generate_multimodal(
212
+ self, content: List[Union[str, bytes]], system_prompt=None, temperature=0, max_tokens=4000, top_p=0.99, response_format=None
213
+ ):
214
+ sys_prompt_arg = system_prompt if system_prompt else self.system_prompt
215
+ formatted_content = self._format_content(content)
216
+
217
+ if self.enable_cache:
218
+ cache_key = sys_prompt_arg + json.dumps(formatted_content)
219
+ cache_or_none = self._check_cache(cache_key)
220
+ if cache_or_none is not None:
221
+ # print(f"Cache hit for prompt: {cache_key[:200]}")
222
+ return cache_or_none
223
+
224
+ if self.model_string in ['o1', 'o1-mini']: # only supports base response currently
225
+ # print(f"Using structured model: {self.model_string}")
226
+ print(f'Max tokens: {max_tokens}')
227
+ response = self.client.chat.completions.create(
228
+ model=self.model_string,
229
+ messages=[
230
+ {"role": "user", "content": formatted_content},
231
+ ],
232
+ max_completion_tokens=max_tokens
233
+ )
234
+ if response.choices[0].finish_reason == "length":
235
+ response_text = "Token limit exceeded"
236
+ else:
237
+ response_text = response.choices[0].message.content
238
+ elif self.model_string in OPENAI_STRUCTURED_MODELS and response_format is not None:
239
+ # print(f"Using structured model: {self.model_string}")
240
+ response = self.client.beta.chat.completions.parse(
241
+ model=self.model_string,
242
+ messages=[
243
+ {"role": "system", "content": sys_prompt_arg},
244
+ {"role": "user", "content": formatted_content},
245
+ ],
246
+ temperature=temperature,
247
+ max_tokens=max_tokens,
248
+ top_p=top_p,
249
+ response_format=response_format
250
+ )
251
+ response_text = response.choices[0].message.parsed
252
+ else:
253
+ # print(f"Using non-structured model: {self.model_string}")
254
+ response = self.client.chat.completions.create(
255
+ model=self.model_string,
256
+ messages=[
257
+ {"role": "system", "content": sys_prompt_arg},
258
+ {"role": "user", "content": formatted_content},
259
+ ],
260
+ temperature=temperature,
261
+ max_tokens=max_tokens,
262
+ top_p=top_p,
263
+ )
264
+ response_text = response.choices[0].message.content
265
+
266
+ if self.enable_cache:
267
+ self._save_cache(cache_key, response_text)
268
+ return response_text
opentools/models/executor.py ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ # import sys
3
+ import importlib
4
+ import re
5
+ from typing import Dict, Any, List
6
+ from datetime import datetime
7
+
8
+ from opentools.engine.openai import ChatOpenAI
9
+ from opentools.models.formatters import ToolCommand
10
+
11
+ import signal
12
+ from typing import Dict, Any, List, Optional
13
+
14
+ class TimeoutError(Exception):
15
+ pass
16
+
17
+ def timeout_handler(signum, frame):
18
+ raise TimeoutError("Function execution timed out")
19
+
20
+ class Executor:
21
+ def __init__(self, llm_engine_name: str, root_cache_dir: str = "solver_cache", num_threads: int = 1, max_time: int = 120, max_output_length: int = 100000, enable_signal: bool = True):
22
+ self.llm_engine_name = llm_engine_name
23
+ self.root_cache_dir = root_cache_dir
24
+ self.num_threads = num_threads
25
+ self.max_time = max_time
26
+ self.max_output_length = max_output_length
27
+ self.enable_signal = enable_signal
28
+
29
+ def set_query_cache_dir(self, query_cache_dir):
30
+ if query_cache_dir:
31
+ self.query_cache_dir = query_cache_dir
32
+ else:
33
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
34
+ self.query_cache_dir = os.path.join(self.root_cache_dir, timestamp)
35
+ os.makedirs(self.query_cache_dir, exist_ok=True)
36
+
37
+ def generate_tool_command(self, question: str, image: str, context: str, sub_goal: str, tool_name: str, tool_metadata: Dict[str, Any], bytes_mode:bool = False) -> ToolCommand:
38
+ prompt_generate_tool_command = f"""
39
+ Task: Generate a precise command to execute the selected tool based on the given information.
40
+
41
+ Query: {question}
42
+ Image: {image if not bytes_mode else 'image.jpg'}
43
+ Context: {context}
44
+ Sub-Goal: {sub_goal}
45
+ Selected Tool: {tool_name}
46
+ Tool Metadata: {tool_metadata}
47
+
48
+ Instructions:
49
+ 1. Carefully review all provided information: the query, image path, context, sub-goal, selected tool, and tool metadata.
50
+ 2. Analyze the tool's input_types from the metadata to understand required and optional parameters.
51
+ 3. Construct a command or series of commands that aligns with the tool's usage pattern and addresses the sub-goal.
52
+ 4. Ensure all required parameters are included and properly formatted.
53
+ 5. Use appropriate values for parameters based on the given context, particularly the `Context` field which may contain relevant information from previous steps.
54
+ 6. If multiple steps are needed to prepare data for the tool, include them in the command construction.
55
+
56
+ Output Format:
57
+ <analysis>: a step-by-step analysis of the context, sub-goal, and selected tool to guide the command construction.
58
+ <explanation>: a detailed explanation of the constructed command(s) and their parameters.
59
+ <command>: the Python code to execute the tool, which can be one of the following types:
60
+ a. A single line command with `execution = tool.execute()`.
61
+ b. A multi-line command with complex data preparation, ending with `execution = tool.execute()`.
62
+ c. Multiple lines of `execution = tool.execute()` calls for processing multiple items.
63
+ ```python
64
+ <your command here>
65
+ ```
66
+
67
+ Rules:
68
+ 1. The command MUST be valid Python code and include at least one call to `tool.execute()`.
69
+ 2. Each `tool.execute()` call MUST be assigned to the 'execution' variable in the format `execution = tool.execute(...)`.
70
+ 3. For multiple executions, use separate `execution = tool.execute()` calls for each execution.
71
+ 4. The final output MUST be assigned to the 'execution' variable, either directly from `tool.execute()` or as a processed form of multiple executions.
72
+ 5. Use the exact parameter names as specified in the tool's input_types.
73
+ 6. Enclose string values in quotes, use appropriate data types for other values (e.g., lists, numbers).
74
+ 7. Do not include any code or text that is not part of the actual command.
75
+ 8. Ensure the command directly addresses the sub-goal and query.
76
+ 9. Include ALL required parameters, data, and paths to execute the tool in the command itself.
77
+ 10. If preparation steps are needed, include them as separate Python statements before the `tool.execute()` calls.
78
+
79
+ Examples (Not to use directly unless relevant):
80
+
81
+ Example 1 (Single line command):
82
+ <analysis>: The tool requires an image path and a list of labels for object detection.
83
+ <explanation>: We pass the image path and a list containing "baseball" as the label to detect.
84
+ <command>:
85
+ ```python
86
+ execution = tool.execute(image="path/to/image", labels=["baseball"])
87
+ ```
88
+
89
+ Example 2 (Multi-line command with data preparation):
90
+ <analysis>: The tool requires an image path, multiple labels, and a threshold for object detection.
91
+ <explanation>: We prepare the data by defining variables for the image path, labels, and threshold, then pass these to the tool.execute() function.
92
+ <command>:
93
+ ```python
94
+ image = "path/to/image"
95
+ labels = ["baseball", "football", "basketball"]
96
+ threshold = 0.5
97
+ execution = tool.execute(image=image, labels=labels, threshold=threshold)
98
+ ```
99
+
100
+ Example 3 (Multiple executions):
101
+ <analysis>: We need to process multiple images for baseball detection.
102
+ <explanation>: We call the tool for each image path, using the same label and threshold for all.
103
+ <command>:
104
+ ```python
105
+ execution = tool.execute(image="path/to/image1", labels=["baseball"], threshold=0.5)
106
+ execution = tool.execute(image="path/to/image2", labels=["baseball"], threshold=0.5)
107
+ execution = tool.execute(image="path/to/image3", labels=["baseball"], threshold=0.5)
108
+ ```
109
+
110
+ Some Wrong Examples:
111
+ <command>:
112
+ ```python
113
+ execution1 = tool.execute(query="...")
114
+ execution2 = tool.execute(query="...")
115
+ ```
116
+ Reason: only `execution = tool.execute` is allowed, not `execution1` or `execution2`.
117
+
118
+ <command>:
119
+ ```python
120
+ urls = [
121
+ "https://example.com/article1",
122
+ "https://example.com/article2"
123
+ ]
124
+
125
+ execution = tool.execute(url=urls[0])
126
+ execution = tool.execute(url=urls[1])
127
+ ```
128
+ Reason: The command should process multiple items in a single execution, not separate executions for each item.
129
+
130
+ Remember: Your <command> field MUST be valid Python code including any necessary data preparation steps and one or more `execution = tool.execute(` calls, without any additional explanatory text. The format `execution = tool.execute` must be strictly followed, and the last line must begin with `execution = tool.execute` to capture the final output.
131
+ """
132
+
133
+ llm_generate_tool_command = ChatOpenAI(model_string=self.llm_engine_name, is_multimodal=False)
134
+ tool_command = llm_generate_tool_command(prompt_generate_tool_command, response_format=ToolCommand)
135
+
136
+ return tool_command
137
+
138
+ # def extract_explanation_and_command(self, text: str) -> tuple:
139
+ # # Extract explanation
140
+ # explanation_pattern = r"Command Explanation:(.*?)Generated Command:"
141
+ # explanation_match = re.search(explanation_pattern, text, re.DOTALL)
142
+ # explanation = explanation_match.group(1).strip() if explanation_match else "No explanation found."
143
+ # # Extract command
144
+ # command_pattern = r"Generated Command:.*?```python\n(.*?)```"
145
+ # command_match = re.search(command_pattern, text, re.DOTALL)
146
+ # command = command_match.group(1).strip() if command_match else "No command found."
147
+
148
+ def extract_explanation_and_command(self, response: ToolCommand) -> tuple:
149
+ def normarlize_code(code: str) -> str:
150
+ # Remove leading and trailing whitespace and triple backticks
151
+ return re.sub(r'^```python\s*', '', code).rstrip('```').strip()
152
+
153
+ explanation = response.explanation.strip()
154
+ command = normarlize_code(response.command.strip())
155
+ return explanation, command
156
+
157
+ def execute_tool_command(self, tool_name: str, command: str) -> Any:
158
+ """
159
+ Execute a tool command with timeout protection. If execution exceeds max_time seconds,
160
+ the function will be interrupted and return a timeout message.
161
+
162
+ Args:
163
+ tool_name (str): Name of the tool to execute
164
+ command (str): Command string containing tool.execute() calls
165
+
166
+ Returns:
167
+ Any: List of execution results or error message
168
+ """
169
+
170
+ def split_commands(command: str) -> List[str]:
171
+ # Use regex to find all tool.execute() commands and their surrounding code
172
+ pattern = r'.*?execution\s*=\s*tool\.execute\([^\n]*\)\s*(?:\n|$)'
173
+ blocks = re.findall(pattern, command, re.DOTALL)
174
+ return [block.strip() for block in blocks if block.strip()]
175
+
176
+ def execute_with_timeout(block: str, local_context: dict) -> Optional[str]:
177
+ if self.enable_signal:
178
+ # Set up the timeout handler
179
+ signal.signal(signal.SIGALRM, timeout_handler)
180
+ signal.alarm(self.max_time)
181
+
182
+ try:
183
+ # Execute the block in the local context
184
+ exec(block, globals(), local_context)
185
+ result = local_context.get('execution')
186
+ if self.enable_signal:
187
+ signal.alarm(0) # Disable the alarm
188
+ return result
189
+ except TimeoutError:
190
+ return f"Execution timed out after {self.max_time} seconds"
191
+ finally:
192
+ if self.enable_signal:
193
+ signal.alarm(0) # Ensure alarm is disabled even if other exceptions occur
194
+
195
+ # Import the tool module and instantiate it
196
+ module_name = f"tools.{tool_name.lower().replace('_tool', '')}.tool"
197
+
198
+ # print(f"Attempting to import module: {module_name}")
199
+ # print(f"Current sys.path: {sys.path}")
200
+
201
+ try:
202
+ # Dynamically import the module
203
+ module = importlib.import_module(module_name)
204
+
205
+ # Get the tool class
206
+ tool_class = getattr(module, tool_name)
207
+
208
+ # Check if the tool requires an LLM engine
209
+ # NOTE FIXME may need to refine base.py and tool.py to handle this better
210
+ if getattr(tool_class, 'require_llm_engine', False):
211
+ # Instantiate the tool with the model_string
212
+ tool = tool_class(model_string=self.llm_engine_name)
213
+ else:
214
+ # Instantiate the tool without model_string for tools that don't require it
215
+ tool = tool_class()
216
+
217
+ # Set the custom output directory
218
+ # NOTE FIXME: May have a better way to handle this
219
+ tool.set_custom_output_dir(self.query_cache_dir)
220
+
221
+ # Split the command into blocks, execute each one and store execution results
222
+ command_blocks = split_commands(command)
223
+ executions = []
224
+
225
+ for block in command_blocks:
226
+ # Create a local context to safely execute the block
227
+ local_context = {'tool': tool}
228
+
229
+ # Execute the block with timeout protection
230
+ result = execute_with_timeout(block, local_context)
231
+
232
+ if result is not None:
233
+ executions.append(result)
234
+ else:
235
+ executions.append(f"No execution captured from block: {block}")
236
+
237
+ # Return all the execution results
238
+ return executions
239
+ except Exception as e:
240
+ return f"Error in execute_tool_command: {str(e)}"
opentools/models/formatters.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+ # Planner: QueryAnalysis
4
+ class QueryAnalysis(BaseModel):
5
+ consice_summary: str
6
+ required_skills: str
7
+ relevant_tools: str
8
+ additional_considerations: str
9
+
10
+ def __str__(self):
11
+ return f"""
12
+ Consice Summary: {self.consice_summary}
13
+
14
+ Required Skills:
15
+ {self.required_skills}
16
+
17
+ Relevant Tools:
18
+ {self.relevant_tools}
19
+
20
+ Additional Considerations:
21
+ {self.additional_considerations}
22
+ """
23
+
24
+ # Planner: NextStep
25
+ class NextStep(BaseModel):
26
+ justification: str
27
+ context: str
28
+ sub_goal: str
29
+ tool_name: str
30
+
31
+ # Executor: MemoryVerification
32
+ class MemoryVerification(BaseModel):
33
+ analysis: str
34
+ stop_signal: bool
35
+
36
+ # Executor: ToolCommand
37
+ class ToolCommand(BaseModel):
38
+ analysis: str
39
+ explanation: str
40
+ command: str
opentools/models/initializer.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import importlib
4
+ import inspect
5
+ import traceback
6
+ from typing import Dict, Any, List, Tuple
7
+
8
+
9
+ class Initializer:
10
+ def __init__(self, enabled_tools: List[str] = [], model_string: str = None):
11
+ self.toolbox_metadata = {}
12
+ self.available_tools = []
13
+ self.enabled_tools = enabled_tools
14
+ self.model_string = model_string # llm model string
15
+
16
+ print("\nInitializing OpenTools...")
17
+ print(f"Enabled tools: {self.enabled_tools}")
18
+ print(f"LLM model string: {self.model_string}")
19
+ self._set_up_tools()
20
+
21
+ def get_project_root(self):
22
+ current_dir = os.path.dirname(os.path.abspath(__file__))
23
+ while current_dir != '/':
24
+ if os.path.exists(os.path.join(current_dir, 'opentools')):
25
+ return os.path.join(current_dir, 'opentools')
26
+ current_dir = os.path.dirname(current_dir)
27
+ raise Exception("Could not find project root")
28
+
29
+ def load_tools_and_get_metadata(self) -> Dict[str, Any]:
30
+ # Implementation of load_tools_and_get_metadata function
31
+ print("Loading tools and getting metadata...")
32
+ self.toolbox_metadata = {}
33
+ opentools_dir = self.get_project_root()
34
+ tools_dir = os.path.join(opentools_dir, 'tools')
35
+
36
+ print(f"OpenTools directory: {opentools_dir}")
37
+ print(f"Tools directory: {tools_dir}")
38
+
39
+ # Add the OpenTools directory and its parent to the Python path
40
+ sys.path.insert(0, opentools_dir)
41
+ sys.path.insert(0, os.path.dirname(opentools_dir))
42
+ print(f"Updated Python path: {sys.path}")
43
+
44
+ if not os.path.exists(tools_dir):
45
+ print(f"Error: Tools directory does not exist: {tools_dir}")
46
+ return self.toolbox_metadata
47
+
48
+ for root, dirs, files in os.walk(tools_dir):
49
+ # print(f"\nScanning directory: {root}")
50
+ if 'tool.py' in files and os.path.basename(root) in self.available_tools:
51
+ file = 'tool.py'
52
+ module_path = os.path.join(root, file)
53
+ module_name = os.path.splitext(file)[0]
54
+ relative_path = os.path.relpath(module_path, opentools_dir)
55
+ import_path = '.'.join(os.path.split(relative_path)).replace(os.sep, '.')[:-3]
56
+
57
+ print(f"\nAttempting to import: {import_path}")
58
+ try:
59
+ module = importlib.import_module(import_path)
60
+ for name, obj in inspect.getmembers(module):
61
+ if inspect.isclass(obj) and name.endswith('Tool') and name != 'BaseTool':
62
+ print(f"Found tool class: {name}")
63
+ # print(f"Class attributes: {dir(obj)}")
64
+ # print(f"Class __dict__: {obj.__dict__}")
65
+ try:
66
+ # Check if the tool requires an LLM engine
67
+ if hasattr(obj, 'require_llm_engine') and obj.require_llm_engine:
68
+ tool_instance = obj(model_string=self.model_string)
69
+ else:
70
+ tool_instance = obj()
71
+
72
+ # print(f"\nInstance attributes: {dir(tool_instance)}")
73
+ # print(f"\nInstance __dict__: {tool_instance.__dict__}")
74
+
75
+ self.toolbox_metadata[name] = {
76
+ 'tool_name': getattr(tool_instance, 'tool_name', 'Unknown'),
77
+ 'tool_description': getattr(tool_instance, 'tool_description', 'No description'),
78
+ 'tool_version': getattr(tool_instance, 'tool_version', 'Unknown'),
79
+ 'input_types': getattr(tool_instance, 'input_types', {}),
80
+ 'output_type': getattr(tool_instance, 'output_type', 'Unknown'),
81
+ 'demo_commands': getattr(tool_instance, 'demo_commands', []),
82
+ 'user_metadata': getattr(tool_instance, 'user_metadata', {}), # NOTE: This is a placeholder for user-defined metadata
83
+ 'require_llm_engine': getattr(obj, 'require_llm_engine', False),
84
+ }
85
+ print(f"\nMetadata for {name}: {self.toolbox_metadata[name]}")
86
+ except Exception as e:
87
+ print(f"Error instantiating {name}: {str(e)}")
88
+ except Exception as e:
89
+ print(f"Error loading module {module_name}: {str(e)}")
90
+
91
+ print(f"\nTotal number of tools loaded: {len(self.toolbox_metadata)}")
92
+
93
+ return self.toolbox_metadata
94
+
95
+ def run_demo_commands(self) -> List[str]:
96
+ print("\nRunning demo commands for each tool...")
97
+ self.available_tools = []
98
+
99
+ for tool_name, tool_data in self.toolbox_metadata.items():
100
+ print(f"\nChecking availability of {tool_name}...")
101
+
102
+ try:
103
+ # Import the tool module
104
+ module_name = f"tools.{tool_name.lower().replace('_tool', '')}.tool"
105
+ module = importlib.import_module(module_name)
106
+
107
+ # Get the tool class
108
+ tool_class = getattr(module, tool_name)
109
+
110
+ # Instantiate the tool
111
+ tool_instance = tool_class()
112
+
113
+ # FIXME This is a temporary workaround to avoid running demo commands
114
+ self.available_tools.append(tool_name)
115
+
116
+ # # TODO Run the first demo command if available
117
+ # demo_commands = tool_data.get('demo_commands', [])
118
+ # if demo_commands:
119
+ # print(f"Running demo command: {demo_commands[0]['command']}")
120
+ # # Extract the arguments from the demo command
121
+ # command = demo_commands[0]['command']
122
+ # args_start = command.index('(') + 1
123
+ # args_end = command.rindex(')')
124
+ # args_str = command[args_start:args_end]
125
+
126
+ # # Create a dictionary of arguments
127
+ # args_dict = eval(f"dict({args_str})")
128
+
129
+ # # Execute the demo command
130
+ # result = tool_instance.execute(**args_dict)
131
+ # print(f"Demo command executed successfully. Result: {result}")
132
+
133
+ # self.available_tools.append(tool_name)
134
+ # else:
135
+ # print(f"No demo commands available for {tool_name}")
136
+ # # If no demo commands, we'll assume the tool is available
137
+ # self.available_tools.append(tool_name)
138
+
139
+ except Exception as e:
140
+ print(f"Error checking availability of {tool_name}: {str(e)}")
141
+ print(traceback.format_exc())
142
+
143
+ # update the toolmetadata with the available tools
144
+ self.toolbox_metadata = {tool: self.toolbox_metadata[tool] for tool in self.available_tools}
145
+ print(f"\nUpdated total number of available tools: {len(self.toolbox_metadata)}")
146
+ print(f"\nAvailable tools: {self.available_tools}")
147
+
148
+ return self.available_tools
149
+
150
+ def _set_up_tools(self) -> None:
151
+ print("Setting up tools...")
152
+
153
+ # Keep enabled tools
154
+ self.available_tools = [tool.lower().replace('_tool', '') for tool in self.enabled_tools]
155
+
156
+ # Load tools and get metadata
157
+ self.load_tools_and_get_metadata()
158
+
159
+ # Run demo commands to determine available tools
160
+ self.run_demo_commands()
161
+
162
+ # Filter toolbox_metadata to include only available tools
163
+ self.toolbox_metadata = {tool: self.toolbox_metadata[tool] for tool in self.available_tools}
164
+
165
+ print(f"\nTotal number of available tools: {len(self.available_tools)}")
166
+ print(f"Available tools: {self.available_tools}")
167
+ print(f"Enabled tools: {self.enabled_tools}")
168
+
169
+
170
+ if __name__ == "__main__":
171
+ enabled_tools = ["Generalist_Solution_Generator_Tool"]
172
+ initializer = Initializer(enabled_tools=enabled_tools)
173
+
174
+ print("\nAvailable tools:")
175
+ print(initializer.available_tools)
176
+
177
+ print("\nToolbox metadata for available tools:")
178
+ print(initializer.toolbox_metadata)
179
+
opentools/models/memory.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Any, List, Union, Optional
2
+ import os
3
+
4
+ class Memory:
5
+ # TODO Need to fix this to support multiple data sources (e.g. images, pdf, txt, etc.)
6
+
7
+ def __init__(self):
8
+ self.query: Optional[str] = None
9
+ self.files: List[Dict[str, str]] = []
10
+ self.actions: Dict[str, Dict[str, Any]] = {}
11
+ self._init_file_types()
12
+
13
+ def set_query(self, query: str) -> None:
14
+ if not isinstance(query, str):
15
+ raise TypeError("Query must be a string")
16
+ self.query = query
17
+
18
+ def _init_file_types(self):
19
+ self.file_types = {
20
+ 'image': ['.jpg', '.jpeg', '.png', '.gif', '.bmp'],
21
+ 'text': ['.txt', '.md'],
22
+ 'document': ['.pdf', '.doc', '.docx'],
23
+ 'code': ['.py', '.js', '.java', '.cpp', '.h'],
24
+ 'data': ['.json', '.csv', '.xml'],
25
+ 'spreadsheet': ['.xlsx', '.xls'],
26
+ 'presentation': ['.ppt', '.pptx'],
27
+ }
28
+ self.file_type_descriptions = {
29
+ 'image': "An image file ({ext} format) provided as context for the query",
30
+ 'text': "A text file ({ext} format) containing additional information related to the query",
31
+ 'document': "A document ({ext} format) with content relevant to the query",
32
+ 'code': "A source code file ({ext} format) potentially related to the query",
33
+ 'data': "A data file ({ext} format) containing structured data pertinent to the query",
34
+ 'spreadsheet': "A spreadsheet file ({ext} format) with tabular data relevant to the query",
35
+ 'presentation': "A presentation file ({ext} format) with slides related to the query",
36
+ }
37
+
38
+ def _get_default_description(self, file_name: str) -> str:
39
+ _, ext = os.path.splitext(file_name)
40
+ ext = ext.lower()
41
+
42
+ for file_type, extensions in self.file_types.items():
43
+ if ext in extensions:
44
+ return self.file_type_descriptions[file_type].format(ext=ext[1:])
45
+
46
+ return f"A file with {ext[1:]} extension, provided as context for the query"
47
+
48
+ def add_file(self, file_name: Union[str, List[str]], description: Union[str, List[str], None] = None) -> None:
49
+ if isinstance(file_name, str):
50
+ file_name = [file_name]
51
+
52
+ if description is None:
53
+ description = [self._get_default_description(fname) for fname in file_name]
54
+ elif isinstance(description, str):
55
+ description = [description]
56
+
57
+ if len(file_name) != len(description):
58
+ raise ValueError("The number of files and descriptions must match.")
59
+
60
+ for fname, desc in zip(file_name, description):
61
+ self.files.append({
62
+ 'file_name': fname,
63
+ 'description': desc
64
+ })
65
+
66
+ def add_action(self, step_count: int, tool_name: str, sub_goal: str, command: str, result: Any) -> None:
67
+ action = {
68
+ 'tool_name': tool_name,
69
+ 'sub_goal': sub_goal,
70
+ 'command': command,
71
+ 'result': result,
72
+ }
73
+ step_name = f"Action Step {step_count}"
74
+ self.actions[step_name] = action
75
+
76
+ def get_query(self) -> Optional[str]:
77
+ return self.query
78
+
79
+ def get_files(self) -> List[Dict[str, str]]:
80
+ return self.files
81
+
82
+ def get_actions(self) -> Dict[str, Dict[str, Any]]:
83
+ return self.actions
84
+
opentools/models/planner.py ADDED
@@ -0,0 +1,368 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ from PIL import Image
4
+ from io import BytesIO
5
+ from typing import Dict, Any, List, Tuple
6
+
7
+ from opentools.engine.openai import ChatOpenAI
8
+ from opentools.models.memory import Memory
9
+ from opentools.models.formatters import QueryAnalysis, NextStep, MemoryVerification
10
+
11
+ class Planner:
12
+ def __init__(self, llm_engine_name: str, toolbox_metadata: dict = None, available_tools: List = None):
13
+ self.llm_engine_name = llm_engine_name
14
+ self.llm_engine_mm = ChatOpenAI(model_string=llm_engine_name, is_multimodal=True)
15
+ self.llm_engine = ChatOpenAI(model_string=llm_engine_name, is_multimodal=False)
16
+ self.toolbox_metadata = toolbox_metadata if toolbox_metadata is not None else {}
17
+ self.available_tools = available_tools if available_tools is not None else []
18
+
19
+ def get_image_info(self, image_path: str) -> Dict[str, Any]:
20
+ image_info = {}
21
+ if image_path and os.path.isfile(image_path):
22
+ image_info["image_path"] = image_path
23
+ try:
24
+ with Image.open(image_path) as img:
25
+ width, height = img.size
26
+ image_info.update({
27
+ "width": width,
28
+ "height": height
29
+ })
30
+ except Exception as e:
31
+ print(f"Error processing image file: {str(e)}")
32
+ return image_info
33
+
34
+ def get_image_info_bytes(self, bytes: str) -> Dict[str, Any]:
35
+ image_info = {}
36
+ if bytes:
37
+ try:
38
+ with Image.open(BytesIO(bytes)) as img:
39
+ width, height = img.size
40
+ image_info.update({
41
+ "image_path": 'image.jpg', # generic image name
42
+ "width": width,
43
+ "height": height
44
+ })
45
+ except Exception as e:
46
+ print(f"Error processing image bytes: {str(e)}")
47
+ return image_info
48
+
49
+ def generate_base_response(self, question: str, image: str, max_tokens: str = 4000, bytes_mode: bool = False) -> str:
50
+ if bytes_mode:
51
+ image_info = self.get_image_info_bytes(image)
52
+ else:
53
+ image_info = self.get_image_info(image)
54
+
55
+ input_data = [question]
56
+ if image_info and "image_path" in image_info and not bytes_mode:
57
+ try:
58
+ with open(image_info["image_path"], 'rb') as file:
59
+ image_bytes = file.read()
60
+ input_data.append(image_bytes)
61
+ except Exception as e:
62
+ print(f"Error reading image file: {str(e)}")
63
+
64
+ self.base_response = self.llm_engine_mm(input_data, max_tokens=max_tokens)
65
+
66
+ return self.base_response
67
+
68
+ def analyze_query(self, question: str, image: str, bytes_mode: bool = False) -> str:
69
+ if bytes_mode:
70
+ image_info = self.get_image_info_bytes(image)
71
+ else:
72
+ image_info = self.get_image_info(image)
73
+ print("image_info: ", image_info)
74
+
75
+ query_prompt = f"""
76
+ Task: Analyze the given query with accompanying inputs and determine the skills and tools needed to address it effectively.
77
+
78
+ Available tools: {self.available_tools}
79
+
80
+ Metadata for the tools: {self.toolbox_metadata}
81
+
82
+ Image: {image_info}
83
+
84
+ Query: {question}
85
+
86
+ Instructions:
87
+ 1. Carefully read and understand the query and any accompanying inputs.
88
+ 2. Identify the main objectives or tasks within the query.
89
+ 3. List the specific skills that would be necessary to address the query comprehensively.
90
+ 4. Examine the available tools in the toolbox and determine which ones might relevant and useful for addressing the query. Make sure to consider the user metadata for each tool, including limitations and potential applications (if available).
91
+ 5. Provide a brief explanation for each skill and tool you've identified, describing how it would contribute to answering the query.
92
+
93
+ Your response should include:
94
+ 1. A concise summary of the query's main points and objectives, as well as content in any accompanying inputs.
95
+ 2. A list of required skills, with a brief explanation for each.
96
+ 3. A list of relevant tools from the toolbox, with a brief explanation of how each tool would be utilized and its potential limitations.
97
+ 4. Any additional considerations that might be important for addressing the query effectively.
98
+
99
+ Please present your analysis in a clear, structured format.
100
+ """
101
+
102
+ input_data = [query_prompt]
103
+ if bytes_mode:
104
+ image_bytes = image
105
+ else:
106
+ try:
107
+ with open(image_info["image_path"], 'rb') as file:
108
+ image_bytes = file.read()
109
+ input_data.append(image_bytes)
110
+ except Exception as e:
111
+ print(f"Error reading image file: {str(e)}")
112
+
113
+ self.query_analysis = self.llm_engine_mm(input_data, response_format=QueryAnalysis)
114
+
115
+ return str(self.query_analysis).strip()
116
+
117
+ def extract_context_subgoal_and_tool(self, response: NextStep) -> Tuple[str, str, str]:
118
+
119
+ def normalize_tool_name(tool_name: str) -> str:
120
+ # Normalize the tool name to match the available tools
121
+ for tool in self.available_tools:
122
+ if tool.lower() in tool_name.lower():
123
+ return tool
124
+ return "No matched tool given: " + tool_name
125
+
126
+ try:
127
+ context = response.context.strip()
128
+ sub_goal = response.sub_goal.strip()
129
+ tool_name = normalize_tool_name(response.tool_name.strip())
130
+ return context, sub_goal, tool_name
131
+ except Exception as e:
132
+ print(f"Error extracting context, sub-goal, and tool name: {str(e)}")
133
+ return None, None, None
134
+
135
+ def generate_next_step(self, question: str, image: str, query_analysis: str, memory: Memory, step_count: int, max_step_count: int, bytes_mode: bool = False) -> NextStep:
136
+ prompt_generate_next_step = f"""
137
+ Task: Determine the optimal next step to address the given query based on the provided analysis, available tools, and previous steps taken.
138
+
139
+ Context:
140
+ Query: {question}
141
+ Image: {image if not bytes_mode else 'image.jpg'}
142
+ Query Analysis: {query_analysis}
143
+
144
+ Available Tools:
145
+ {self.available_tools}
146
+
147
+ Tool Metadata:
148
+ {self.toolbox_metadata}
149
+
150
+ Previous Steps and Their Results:
151
+ {memory.get_actions()}
152
+
153
+ Current Step: {step_count} in {max_step_count} steps
154
+ Remaining Steps: {max_step_count - step_count}
155
+
156
+ Instructions:
157
+ 1. Analyze the context thoroughly, including the query, its analysis, any image, available tools and their metadata, and previous steps taken.
158
+
159
+ 2. Determine the most appropriate next step by considering:
160
+ - Key objectives from the query analysis
161
+ - Capabilities of available tools
162
+ - Logical progression of problem-solving
163
+ - Outcomes from previous steps
164
+ - Current step count and remaining steps
165
+
166
+ 3. Select ONE tool best suited for the next step, keeping in mind the limited number of remaining steps.
167
+
168
+ 4. Formulate a specific, achievable sub-goal for the selected tool that maximizes progress towards answering the query.
169
+
170
+ Output Format:
171
+ <justification>: detailed explanation of why the selected tool is the best choice for the next step, considering the context and previous outcomes.
172
+ <context>: MUST include ALL necessary information for the tool to function, structured as follows:
173
+ * Relevant data from previous steps
174
+ * File names or paths created or used in previous steps (list EACH ONE individually)
175
+ * Variable names and their values from previous steps' results
176
+ * Any other context-specific information required by the tool
177
+ <sub_goal>: a specific, achievable objective for the tool, based on its metadata and previous outcomes. It MUST contain any involved data, file names, and variables from Previous Steps and Their Results that the tool can act upon.
178
+ <tool_name>: MUST be the exact name of a tool from the available tools list.
179
+
180
+ Rules:
181
+ - Select only ONE tool for this step.
182
+ - The sub-goal MUST directly address the query and be achievable by the selected tool.
183
+ - The Context section MUST include ALL necessary information for the tool to function, including ALL relevant file paths, data, and variables from previous steps.
184
+ - The tool name MUST exactly match one from the available tools list: {self.available_tools}.
185
+ - Avoid redundancy by considering previous steps and building on prior results.
186
+
187
+ Example (do not copy, use only as reference):
188
+ <justification>: [Your detailed explanation here]
189
+ <context>: Image path: "example/image.jpg", Previous detection results: [list of objects]
190
+ <sub_goal>: Detect and count the number of specific objects in the image "example/image.jpg"
191
+ <tool_name>: Object_Detector_Tool
192
+ """
193
+ next_step = self.llm_engine(prompt_generate_next_step, response_format=NextStep)
194
+ return next_step
195
+
196
+ def verificate_memory(self, question: str, image: str, query_analysis: str, memory: Memory, bytes_mode: bool = False) -> MemoryVerification:
197
+ if bytes_mode:
198
+ image_info = self.get_image_info_bytes(image)
199
+ else:
200
+ image_info = self.get_image_info(image)
201
+
202
+ prompt_memory_verification = f"""
203
+ Task: Thoroughly evaluate the completeness and accuracy of the memory for fulfilling the given query, considering the potential need for additional tool usage.
204
+
205
+ Context:
206
+ Query: {question}
207
+ Image: {image_info}
208
+ Available Tools: {self.available_tools}
209
+ Toolbox Metadata: {self.toolbox_metadata}
210
+ Initial Analysis: {query_analysis}
211
+ Memory (tools used and results): {memory.get_actions()}
212
+
213
+ Detailed Instructions:
214
+ 1. Carefully analyze the query, initial analysis, and image (if provided):
215
+ - Identify the main objectives of the query.
216
+ - Note any specific requirements or constraints mentioned.
217
+ - If an image is provided, consider its relevance and what information it contributes.
218
+
219
+ 2. Review the available tools and their metadata:
220
+ - Understand the capabilities and limitations and best practices of each tool.
221
+ - Consider how each tool might be applicable to the query.
222
+
223
+ 3. Examine the memory content in detail:
224
+ - Review each tool used and its execution results.
225
+ - Assess how well each tool's output contributes to answering the query.
226
+
227
+ 4. Critical Evaluation (address each point explicitly):
228
+ a) Completeness: Does the memory fully address all aspects of the query?
229
+ - Identify any parts of the query that remain unanswered.
230
+ - Consider if all relevant information has been extracted from the image (if applicable).
231
+
232
+ b) Unused Tools: Are there any unused tools that could provide additional relevant information?
233
+ - Specify which unused tools might be helpful and why.
234
+
235
+ c) Inconsistencies: Are there any contradictions or conflicts in the information provided?
236
+ - If yes, explain the inconsistencies and suggest how they might be resolved.
237
+
238
+ d) Verification Needs: Is there any information that requires further verification due to tool limitations?
239
+ - Identify specific pieces of information that need verification and explain why.
240
+
241
+ e) Ambiguities: Are there any unclear or ambiguous results that could be clarified by using another tool?
242
+ - Point out specific ambiguities and suggest which tools could help clarify them.
243
+
244
+ 5. Final Determination:
245
+ Based on your thorough analysis, decide if the memory is complete and accurate enough to generate the final output, or if additional tool usage is necessary.
246
+
247
+ Response Format:
248
+ <analysis>: Provide a detailed analysis of why the memory is sufficient. Reference specific information from the memory and explain its relevance to each aspect of the task. Address how each main point of the query has been satisfied.
249
+ <stop_signal>: Whether to stop the problem solving process and proceed to generating the final output.
250
+ * "True": if the memory is sufficient for addressing the query to proceed and no additional available tools need to be used. If ONLY manual verification without tools is needed, choose "True".
251
+ * "False": if the memory is insufficient and needs more information from additional tool usage.
252
+ """
253
+
254
+ input_data = [prompt_memory_verification]
255
+ if image_info:
256
+ try:
257
+ with open(image_info["image_path"], 'rb') as file:
258
+ image_bytes = file.read()
259
+ input_data.append(image_bytes)
260
+ except Exception as e:
261
+ print(f"Error reading image file: {str(e)}")
262
+
263
+ stop_verification = self.llm_engine_mm(input_data, response_format=MemoryVerification)
264
+
265
+ return stop_verification
266
+
267
+ def extract_conclusion(self, response: MemoryVerification) -> str:
268
+ if response.stop_signal:
269
+ return 'STOP'
270
+ else:
271
+ return 'CONTINUE'
272
+
273
+ def generate_final_output(self, question: str, image: str, memory: Memory, bytes_mode: bool = False) -> str:
274
+ if bytes_mode:
275
+ image_info = self.get_image_info_bytes(image)
276
+ else:
277
+ image_info = self.get_image_info(image)
278
+
279
+ prompt_generate_final_output = f"""
280
+ Task: Generate the final output based on the query, image, and tools used in the process.
281
+
282
+ Context:
283
+ Query: {question}
284
+ Image: {image_info}
285
+ Actions Taken:
286
+ {memory.get_actions()}
287
+
288
+ Instructions:
289
+ 1. Review the query, image, and all actions taken during the process.
290
+ 2. Consider the results obtained from each tool execution.
291
+ 3. Incorporate the relevant information from the memory to generate the step-by-step final output.
292
+ 4. The final output should be consistent and coherent using the results from the tools.
293
+
294
+ Output Structure:
295
+ Your response should be well-organized and include the following sections:
296
+
297
+ 1. Summary:
298
+ - Provide a brief overview of the query and the main findings.
299
+
300
+ 2. Detailed Analysis:
301
+ - Break down the process of answering the query step-by-step.
302
+ - For each step, mention the tool used, its purpose, and the key results obtained.
303
+ - Explain how each step contributed to addressing the query.
304
+
305
+ 3. Key Findings:
306
+ - List the most important discoveries or insights gained from the analysis.
307
+ - Highlight any unexpected or particularly interesting results.
308
+
309
+ 4. Answer to the Query:
310
+ - Directly address the original question with a clear and concise answer.
311
+ - If the query has multiple parts, ensure each part is answered separately.
312
+
313
+ 5. Additional Insights (if applicable):
314
+ - Provide any relevant information or insights that go beyond the direct answer to the query.
315
+ - Discuss any limitations or areas of uncertainty in the analysis.
316
+
317
+ 6. Conclusion:
318
+ - Summarize the main points and reinforce the answer to the query.
319
+ - If appropriate, suggest potential next steps or areas for further investigation.
320
+ """
321
+
322
+ input_data = [prompt_generate_final_output]
323
+ if image_info:
324
+ try:
325
+ with open(image_info["image_path"], 'rb') as file:
326
+ image_bytes = file.read()
327
+ input_data.append(image_bytes)
328
+ except Exception as e:
329
+ print(f"Error reading image file: {str(e)}")
330
+
331
+ final_output = self.llm_engine_mm(input_data)
332
+
333
+ return final_output
334
+
335
+
336
+ def generate_direct_output(self, question: str, image: str, memory: Memory, bytes_mode: bool = False) -> str:
337
+ if bytes_mode:
338
+ image_info = self.get_image_info_bytes(image)
339
+ else:
340
+ image_info = self.get_image_info(image)
341
+
342
+ prompt_generate_final_output = f"""
343
+ Context:
344
+ Query: {question}
345
+ Image: {image_info}
346
+ Initial Analysis:
347
+ {self.query_analysis}
348
+ Actions Taken:
349
+ {memory.get_actions()}
350
+
351
+ Please generate the concise output based on the query, image information, initial analysis, and actions taken. Break down the process into clear, logical, and conherent steps. Conclude with a precise and direct answer to the query.
352
+
353
+ Answer:
354
+ """
355
+
356
+ input_data = [prompt_generate_final_output]
357
+ if image_info:
358
+ try:
359
+ with open(image_info["image_path"], 'rb') as file:
360
+ image_bytes = file.read()
361
+ input_data.append(image_bytes)
362
+ except Exception as e:
363
+ print(f"Error reading image file: {str(e)}")
364
+
365
+ final_output = self.llm_engine_mm(input_data)
366
+
367
+ return final_output
368
+
opentools/models/utlis.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import json
2
+
3
+ # def truncate_result(result, max_length: int = 100000, truncation_indicator: str = "...") -> str:
4
+ # """
5
+ # Truncate the result to specified length while preserving JSON structure when possible.
6
+
7
+ # Args:
8
+ # result: The result to truncate (can be str, list, dict, or other types)
9
+ # max_length: Maximum length of the output string (default: 1000)
10
+ # truncation_indicator: String to indicate truncation (default: "...")
11
+
12
+ # Returns:
13
+ # str: Truncated string representation of the result
14
+ # """
15
+ # if isinstance(result, (dict, list)):
16
+ # try:
17
+ # result_str = json.dumps(result, ensure_ascii=False)
18
+ # except:
19
+ # result_str = str(result)
20
+ # else:
21
+ # result_str = str(result)
22
+
23
+ # indicator_length = len(truncation_indicator)
24
+
25
+ # if len(result_str) > max_length:
26
+ # # For JSON-like strings, try to find the last complete structure
27
+ # if result_str.startswith('{') or result_str.startswith('['):
28
+ # # Find last complete element
29
+ # pos = max_length - indicator_length
30
+ # while pos > 0 and not (
31
+ # result_str[pos] in ',]}' and
32
+ # result_str[pos:].count('"') % 2 == 0
33
+ # ):
34
+ # pos -= 1
35
+ # if pos > 0:
36
+ # return result_str[:pos + 1] + truncation_indicator
37
+
38
+ # # Default truncation if not JSON or no suitable truncation point found
39
+ # return result_str[:max_length - indicator_length] + truncation_indicator
40
+
41
+ # return result_str
42
+
43
+ def make_json_serializable(obj):
44
+ if isinstance(obj, (str, int, float, bool, type(None))):
45
+ return obj
46
+ elif isinstance(obj, dict):
47
+ return {make_json_serializable(key): make_json_serializable(value) for key, value in obj.items()}
48
+ elif isinstance(obj, list):
49
+ return [make_json_serializable(element) for element in obj]
50
+ elif hasattr(obj, '__dict__'):
51
+ return make_json_serializable(obj.__dict__)
52
+ else:
53
+ return str(obj)
54
+
55
+
56
+ def make_json_serializable_truncated(obj, max_length: int = 100000):
57
+ if isinstance(obj, (int, float, bool, type(None))):
58
+ if isinstance(obj, (int, float)) and len(str(obj)) > max_length:
59
+ return str(obj)[:max_length - 3] + "..."
60
+ return obj
61
+ elif isinstance(obj, str):
62
+ return obj if len(obj) <= max_length else obj[:max_length - 3] + "..."
63
+ elif isinstance(obj, dict):
64
+ return {make_json_serializable_truncated(key, max_length): make_json_serializable_truncated(value, max_length)
65
+ for key, value in obj.items()}
66
+ elif isinstance(obj, list):
67
+ return [make_json_serializable_truncated(element, max_length) for element in obj]
68
+ elif hasattr(obj, '__dict__'):
69
+ return make_json_serializable_truncated(obj.__dict__, max_length)
70
+ else:
71
+ result = str(obj)
72
+ return result if len(result) <= max_length else result[:max_length - 3] + "..."
73
+
opentools/tools/README.md ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ## Testing the Tools
3
+
4
+ To test the text detection tool, follow these steps:
5
+
6
+ 1. **Navigate to the Project Directory:**
7
+
8
+ Change your current directory to where the tools are located. Replace `your_path` with the actual path to your project directory.
9
+
10
+ ```sh
11
+ cd your_path/toolbox-agent/opentools
12
+ ```
13
+
14
+ 2. **Run the Text Detection Tool:**
15
+
16
+ ```sh
17
+ cd toolbox-agent
18
+ export PYTHONPATH=$(pwd)
19
+ ```
20
+
21
+
22
+ Execute the tool using the following command:
23
+
24
+ ```sh
25
+ python tools/text_detector/tool.py
26
+
27
+ python tools/object_detector/tool.py
28
+
29
+ ```
30
+
31
+ ## File Structure
32
+
33
+ The project is organized as follows:
34
+
35
+ ```sh
36
+ ├── __init__.py # Initializes the tools package and possibly exposes submodules
37
+ ├── base.py # Base class for tools, providing common functionality
38
+ ├── text_detector/ # Directory for the text detection tool
39
+ │ ├── readme.md # Documentation for the text detection tool
40
+ │ └── tool.py # Implementation of the text detection tool
41
+ ├── object_detector/ # Directory for the object detection tool
42
+ │ ├── readme.md # Documentation for the object detection tool
43
+ │ └── tool.py # Implementation of the object detection tool
44
+ ```
opentools/tools/__init__.py ADDED
File without changes
opentools/tools/base.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # opentools/tools/base.py
2
+
3
+ from opentools.engine.openai import ChatOpenAI
4
+
5
+ class BaseTool:
6
+ """
7
+ A base class for building tool classes that perform specific tasks, such as image processing or text detection.
8
+ """
9
+
10
+ require_llm_engine = False # Default is False, tools that need LLM should set this to True
11
+
12
+ def __init__(self, tool_name=None, tool_description=None, tool_version=None, input_types=None, output_type=None, demo_commands=None, output_dir=None, user_metadata=None, model_string=None):
13
+ """
14
+ Initialize the base tool with optional metadata.
15
+
16
+ Parameters:
17
+ tool_name (str): The name of the tool.
18
+ tool_description (str): A description of the tool.
19
+ tool_version (str): The version of the tool.
20
+ input_types (dict): The expected input types for the tool.
21
+ output_type (str): The expected output type for the tool.
22
+ demo_commands (list): A list of example commands for using the tool.
23
+ output_dir (str): The directory where the tool should save its output (optional).
24
+ user_metadata (dict): Additional metadata specific to user needs (optional).
25
+ model_string (str): The model string for the LLM engine (optional, only used if require_llm_engine is True).
26
+ """
27
+ self.tool_name = tool_name
28
+ self.tool_description = tool_description
29
+ self.tool_version = tool_version
30
+ self.input_types = input_types
31
+ self.output_type = output_type
32
+ self.demo_commands = demo_commands
33
+ self.output_dir = output_dir
34
+ self.user_metadata = user_metadata
35
+ self.model_string = model_string
36
+
37
+ def set_metadata(self, tool_name, tool_description, tool_version, input_types, output_type, demo_commands, user_metadata=None):
38
+ """
39
+ Set the metadata for the tool.
40
+
41
+ Parameters:
42
+ tool_name (str): The name of the tool.
43
+ tool_description (str): A description of the tool.
44
+ tool_version (str): The version of the tool.
45
+ input_types (dict): The expected input types for the tool.
46
+ output_type (str): The expected output type for the tool.
47
+ demo_commands (list): A list of example commands for using the tool.
48
+ user_metadata (dict): Additional metadata specific to user needs (optional).
49
+ """
50
+ self.tool_name = tool_name
51
+ self.tool_description = tool_description
52
+ self.tool_version = tool_version
53
+ self.input_types = input_types
54
+ self.output_type = output_type
55
+ self.demo_commands = demo_commands
56
+ self.user_metadata = user_metadata
57
+
58
+ def get_metadata(self):
59
+ """
60
+ Returns the metadata for the tool.
61
+
62
+ Returns:
63
+ dict: A dictionary containing the tool's metadata.
64
+ """
65
+ metadata = {
66
+ "tool_name": self.tool_name,
67
+ "tool_description": self.tool_description,
68
+ "tool_version": self.tool_version,
69
+ "input_types": self.input_types,
70
+ "output_type": self.output_type,
71
+ "demo_commands": self.demo_commands,
72
+ "require_llm_engine": self.require_llm_engine,
73
+ }
74
+ if self.user_metadata:
75
+ metadata["user_metadata"] = self.user_metadata
76
+ return metadata
77
+
78
+ def set_custom_output_dir(self, output_dir):
79
+ """
80
+ Set a custom output directory for the tool.
81
+
82
+ Parameters:
83
+ output_dir (str): The new output directory path.
84
+ """
85
+ self.output_dir = output_dir
86
+
87
+ def set_llm_engine(self, model_string):
88
+ """
89
+ Set the LLM engine for the tool.
90
+
91
+ Parameters:
92
+ model_string (str): The model string for the LLM engine.
93
+ """
94
+ self.model_string = model_string
95
+
96
+ def execute(self, *args, **kwargs):
97
+ """
98
+ Execute the tool's main functionality. This method should be overridden by subclasses.
99
+
100
+ Raises:
101
+ NotImplementedError: If the subclass does not implement this method.
102
+ """
103
+ raise NotImplementedError("Subclasses must implement the execute method.")
opentools/tools/generalist_solution_generator/examples/mathvista_113.png ADDED
opentools/tools/generalist_solution_generator/tool.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from opentools.tools.base import BaseTool
3
+ from opentools.engine.openai import ChatOpenAI
4
+
5
+ class Generalist_Solution_Generator_Tool(BaseTool):
6
+ require_llm_engine = True
7
+
8
+ def __init__(self, model_string="gpt-4o-mini"):
9
+ super().__init__(
10
+ tool_name="Generalist_Solution_Generator_Tool",
11
+ tool_description="A generalized tool that takes query from the user as prompt, and answers the question step by step to the best of its ability. It can also accept an image.",
12
+ tool_version="1.0.0",
13
+ input_types={
14
+ "prompt": "str - The prompt that includes query from the user to guide the agent to generate response (Examples: 'Describe this image in detail').",
15
+ "image": "str - The path to the image file if applicable (default: None).",
16
+ },
17
+ output_type="str - The generated response to the original query prompt",
18
+ demo_commands=[
19
+ {
20
+ "command": 'execution = tool.execute(prompt="Summarize the following text in a few lines")',
21
+ "description": "Generate a short summary given the prompt from the user."
22
+ },
23
+ {
24
+ "command": 'execution = tool.execute(prompt="Explain the mood of this scene.", image="path/to/image1.png")',
25
+ "description": "Generate a caption focusing on the mood using a specific prompt and image."
26
+ },
27
+ {
28
+ "command": 'execution = tool.execute(prompt="Give your best coordinate estimate for the pacemaker in the image and return (x1, y1, x2, y2)", image="path/to/image2.png")',
29
+ "description": "Generate bounding box coordinates given the image and prompt from the user. The format should be (x1, y1, x2, y2)."
30
+ },
31
+ {
32
+ "command": 'execution = tool.execute(prompt="Is the number of tiny objects that are behind the small metal jet less than the number of tiny things left of the tiny sedan?", image="path/to/image2.png")',
33
+ "description": "Answer a question step by step given the image."
34
+ }
35
+ ],
36
+ # # vesion 0 (bowen) (Generalist: %; 6 Tools: %; Generalist + 6 Tools: %)
37
+ # user_metadata = {
38
+ # "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
39
+ # "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge. For optimal results: 1) Provide clear, specific prompts. 2) Use it as a starting point for complex tasks, then refine with specialized tools. 3) Verify important information from its responses. 4) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
40
+ # }
41
+ # vesion 2 (Generalist: 68%; 6 Tools: 66%; Generalist + 6 Tools: 54%)
42
+ user_metadata = {
43
+ "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
44
+ "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n"
45
+ "1) Provide clear, specific prompts.\n"
46
+ "2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n"
47
+ "3) For complex queries, break them down into subtasks and use the tool multiple times.\n"
48
+ "4) Use it as a starting point for complex tasks, then refine with specialized tools.\n"
49
+ "5) Verify important information from its responses.\n"
50
+ "6) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
51
+ }
52
+ # # vesion 6 (Generalist: 70%; 6 Tools: 66%; Generalist + 6 Tools: 60%)
53
+ # user_metadata = {
54
+ # "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
55
+ # "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n"
56
+ # "1) Provide clear, specific prompts.\n"
57
+ # "2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n"
58
+ # "3) For complex queries, break them down into smaller, focused sub-tasks and use the tool multiple times.\n"
59
+ # "4) Use it as a starting point for complex tasks, then refine with specialized tools.\n"
60
+ # "5) Verify important information from its responses.\n"
61
+ # "6) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
62
+ # }
63
+ # # vesion 8 (Generalist: 68%; 6 Tools: 66%; Generalist + 6 Tools: 60%)
64
+ # user_metadata = {
65
+ # "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
66
+ # "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n"
67
+ # "1) Provide clear, specific prompts.\n"
68
+ # "2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n"
69
+ # "3) Use it as a starting point for complex tasks, then refine with specialized tools.\n"
70
+ # "4) Verify important information from its responses.\n"
71
+ # "5) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
72
+ # }
73
+ )
74
+ self.model_string = model_string
75
+
76
+ def execute(self, prompt, image=None):
77
+
78
+ print(f"\nInitializing Generalist Tool with model: {self.model_string}")
79
+ multimodal = True if image else False
80
+ llm_engine = ChatOpenAI(model_string=self.model_string, is_multimodal=multimodal)
81
+
82
+ try:
83
+ input_data = [prompt]
84
+ if multimodal:
85
+ if not os.path.isfile(image):
86
+ return "Error: Invalid image file path."
87
+ try:
88
+ with open(image, 'rb') as file:
89
+ image_bytes = file.read()
90
+ input_data.append(image_bytes)
91
+ except Exception as e:
92
+ return f"Error reading image file: {str(e)}"
93
+
94
+ response = llm_engine(input_data)
95
+ else:
96
+ response = llm_engine(input_data[0])
97
+ return response
98
+ except Exception as e:
99
+ return f"Error generating response: {str(e)}"
100
+
101
+ def get_metadata(self):
102
+ metadata = super().get_metadata()
103
+ return metadata
104
+
105
+ if __name__ == "__main__":
106
+ # Test command:
107
+ """
108
+ Run the following commands in the terminal to test the script:
109
+
110
+ cd opentools
111
+ python tools/default/tool.py
112
+ """
113
+
114
+ # Get the directory of the current script
115
+ script_dir = os.path.dirname(os.path.abspath(__file__))
116
+ print(f"Script directory: {script_dir}")
117
+
118
+ # Example usage of the Generalist_Tool
119
+ tool = Generalist_Solution_Generator_Tool()
120
+ # tool = Generalist_Solution_Generator_Tool(model_string="gpt-4o-mini")
121
+ # tool = Generalist_Solution_Generator_Tool(model_string="gpt-4o")
122
+
123
+ # Get tool metadata
124
+ metadata = tool.get_metadata()
125
+ print(metadata)
126
+
127
+ # Construct the full path to the image using the script's directory
128
+ relative_image_path = "../../tasks/minitoolbench/data/mathvista_113.png"
129
+ relative_image_path = "examples/mathvista_113.png"
130
+ image_path = os.path.join(script_dir, relative_image_path)
131
+ prompt = "Describe the image in detail."
132
+
133
+ # Execute the tool with default prompt
134
+ try:
135
+ execution = tool.execute(prompt=prompt, image=image_path)
136
+ # execution = tool.execute(prompt=prompt)
137
+ print("Generated Response:")
138
+ print(execution)
139
+ except Exception as e:
140
+ print(f"Execution failed: {e}")
141
+
142
+ print("Done!")
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ easyocr==1.7.1
2
+ openai==1.58.1
3
+ python-dotenv==1.0.1
4
+ wikipedia==1.4.0
5
+ pillow==10.4.0
6
+ platformdirs==4.2.2
7
+ sympy==1.13.2
8
+ tenacity==9.0.0
9
+ diskcache==5.6.3
10
+ transformers==4.44.2
11
+ pymed==0.8.9
12
+ metapub==0.5.12
13
+ -e ./opentools
setup.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name='opentools',
5
+ version='0.1.0',
6
+ # description='A flexible and versatile toolbox agent framework for complex tasks in both general and scientific scenarios.',
7
+ # long_description=open('README.md').read(),
8
+ # long_description_content_type='text/markdown',
9
+ # author='Pan Lu, Bowen Chen, Sheng Liu',
10
+ # author_email='[email protected]',
11
+ # url='', # You can add a GitHub or project URL here
12
+ packages=find_packages(),
13
+ # install_requires=open('requirements.txt').read().splitlines(),
14
+ # classifiers=[
15
+ # 'Programming Language :: Python :: 3',
16
+ # 'License :: OSI Approved :: MIT License',
17
+ # 'Operating System :: OS Independent',
18
+ # ],
19
+ # python_requires='>=3.10',
20
+ )