bowenchen118 commited on
Commit
1f529ba
Β·
1 Parent(s): aed6fa7
app.py CHANGED
@@ -20,289 +20,289 @@ from opentools.models.memory import Memory
20
  from opentools.models.executor import Executor
21
  from opentools.models.utlis import make_json_serializable
22
 
23
- # solver = None
24
-
25
- # class ChatMessage:
26
- # def __init__(self, role: str, content: str, metadata: dict = None):
27
- # self.role = role
28
- # self.content = content
29
- # self.metadata = metadata or {}
30
-
31
- # class Solver:
32
- # def __init__(
33
- # self,
34
- # planner,
35
- # memory,
36
- # executor,
37
- # task: str,
38
- # task_description: str,
39
- # output_types: str = "base,final,direct",
40
- # index: int = 0,
41
- # verbose: bool = True,
42
- # max_steps: int = 10,
43
- # max_time: int = 60,
44
- # output_json_dir: str = "results",
45
- # root_cache_dir: str = "cache"
46
- # ):
47
- # self.planner = planner
48
- # self.memory = memory
49
- # self.executor = executor
50
- # self.task = task
51
- # self.task_description = task_description
52
- # self.index = index
53
- # self.verbose = verbose
54
- # self.max_steps = max_steps
55
- # self.max_time = max_time
56
- # self.output_json_dir = output_json_dir
57
- # self.root_cache_dir = root_cache_dir
58
-
59
- # self.output_types = output_types.lower().split(',')
60
- # assert all(output_type in ["base", "final", "direct"] for output_type in self.output_types), "Invalid output type. Supported types are 'base', 'final', 'direct'."
61
-
62
- # # self.benchmark_data = self.load_benchmark_data()
63
-
64
-
65
-
66
- # def stream_solve_user_problem(self, user_query: str, user_image: Image.Image, messages: List[ChatMessage]) -> Iterator[List[ChatMessage]]:
67
- # """
68
- # Streams intermediate thoughts and final responses for the problem-solving process based on user input.
69
 
70
- # Args:
71
- # user_query (str): The text query input from the user.
72
- # user_image (Image.Image): The uploaded image from the user (PIL Image object).
73
- # messages (list): A list of ChatMessage objects to store the streamed responses.
74
- # """
75
-
76
- # if user_image:
77
- # # # Convert PIL Image to bytes (for processing)
78
- # # img_bytes_io = io.BytesIO()
79
- # # user_image.save(img_bytes_io, format="PNG") # Convert image to PNG bytes
80
- # # img_bytes = img_bytes_io.getvalue() # Get bytes
81
 
82
- # # Use image paths instead of bytes,
83
- # os.makedirs(os.path.join(self.root_cache_dir, 'images'), exist_ok=True)
84
- # img_path = os.path.join(self.root_cache_dir, 'images', str(uuid.uuid4()) + '.jpg')
85
- # user_image.save(img_path)
86
- # else:
87
- # img_path = None
88
-
89
- # # Set query cache
90
- # _cache_dir = os.path.join(self.root_cache_dir)
91
- # self.executor.set_query_cache_dir(_cache_dir)
92
 
93
- # # Step 1: Display the received inputs
94
- # if user_image:
95
- # messages.append(ChatMessage(role="assistant", content=f"πŸ“ Received Query: {user_query}\nπŸ–ΌοΈ Image Uploaded"))
96
- # else:
97
- # messages.append(ChatMessage(role="assistant", content=f"πŸ“ Received Query: {user_query}"))
98
- # yield messages
99
-
100
- # # Step 2: Add "thinking" status while processing
101
- # messages.append(ChatMessage(
102
- # role="assistant",
103
- # content="",
104
- # metadata={"title": "⏳ Thinking: Processing input..."}
105
- # ))
106
-
107
- # # Step 3: Initialize problem-solving state
108
- # start_time = time.time()
109
- # step_count = 0
110
- # json_data = {"query": user_query, "image": "Image received as bytes"}
111
-
112
- # # Step 4: Query Analysis
113
- # import pdb; pdb.set_trace()
114
- # query_analysis = self.planner.analyze_query(user_query, img_path)
115
- # json_data["query_analysis"] = query_analysis
116
- # messages.append(ChatMessage(role="assistant", content=f"πŸ” Query Analysis:\n{query_analysis}"))
117
- # yield messages
118
-
119
- # # Step 5: Execution loop (similar to your step-by-step solver)
120
- # while step_count < self.max_steps and (time.time() - start_time) < self.max_time:
121
- # step_count += 1
122
- # messages.append(ChatMessage(role="assistant", content=f"πŸ”„ Step {step_count}: Generating next step..."))
123
- # yield messages
124
-
125
- # # Generate the next step
126
- # next_step = self.planner.generate_next_step(
127
- # user_query, img_path, query_analysis, self.memory, step_count, self.max_steps
128
- # )
129
- # context, sub_goal, tool_name = self.planner.extract_context_subgoal_and_tool(next_step)
130
-
131
- # # Display the step information
132
- # messages.append(ChatMessage(
133
- # role="assistant",
134
- # content=f"πŸ“Œ Step {step_count} Details:\n- Context: {context}\n- Sub-goal: {sub_goal}\n- Tool: {tool_name}"
135
- # ))
136
- # yield messages
137
-
138
- # # Handle tool execution or errors
139
- # if tool_name not in self.planner.available_tools:
140
- # messages.append(ChatMessage(role="assistant", content=f"⚠️ Error: Tool '{tool_name}' is not available."))
141
- # yield messages
142
- # continue
143
-
144
- # # Execute the tool command
145
- # tool_command = self.executor.generate_tool_command(
146
- # user_query, img_path, context, sub_goal, tool_name, self.planner.toolbox_metadata[tool_name]
147
- # )
148
- # explanation, command = self.executor.extract_explanation_and_command(tool_command)
149
- # result = self.executor.execute_tool_command(tool_name, command)
150
- # result = make_json_serializable(result)
151
-
152
- # messages.append(ChatMessage(role="assistant", content=f"βœ… Step {step_count} Result:\n{json.dumps(result, indent=4)}"))
153
- # yield messages
154
-
155
- # # Step 6: Memory update and stopping condition
156
- # self.memory.add_action(step_count, tool_name, sub_goal, tool_command, result)
157
- # stop_verification = self.planner.verificate_memory(user_query, img_path, query_analysis, self.memory)
158
- # conclusion = self.planner.extract_conclusion(stop_verification)
159
-
160
- # messages.append(ChatMessage(role="assistant", content=f"πŸ›‘ Step {step_count} Conclusion: {conclusion}"))
161
- # yield messages
162
-
163
- # if conclusion == 'STOP':
164
- # break
165
-
166
- # # Step 7: Generate Final Output (if needed)
167
- # if 'final' in self.output_types:
168
- # final_output = self.planner.generate_final_output(user_query, img_path, self.memory)
169
- # messages.append(ChatMessage(role="assistant", content=f"🎯 Final Output:\n{final_output}"))
170
- # yield messages
171
-
172
- # if 'direct' in self.output_types:
173
- # direct_output = self.planner.generate_direct_output(user_query, img_path, self.memory)
174
- # messages.append(ChatMessage(role="assistant", content=f"πŸ”Ή Direct Output:\n{direct_output}"))
175
- # yield messages
176
-
177
- # # Step 8: Completion Message
178
- # messages.append(ChatMessage(role="assistant", content="βœ… Problem-solving process complete."))
179
- # yield messages
180
 
181
- # def parse_arguments():
182
- # parser = argparse.ArgumentParser(description="Run the OpenTools demo with specified parameters.")
183
- # parser.add_argument("--llm_engine_name", default="gpt-4o", help="LLM engine name.")
184
- # parser.add_argument("--max_tokens", type=int, default=2000, help="Maximum tokens for LLM generation.")
185
- # parser.add_argument("--run_baseline_only", type=bool, default=False, help="Run only the baseline (no toolbox).")
186
- # parser.add_argument("--task", default="minitoolbench", help="Task to run.")
187
- # parser.add_argument("--task_description", default="", help="Task description.")
188
- # parser.add_argument(
189
- # "--output_types",
190
- # default="base,final,direct",
191
- # help="Comma-separated list of required outputs (base,final,direct)"
192
- # )
193
- # parser.add_argument("--enabled_tools", default="Generalist_Solution_Generator_Tool", help="List of enabled tools.")
194
- # parser.add_argument("--root_cache_dir", default="demo_solver_cache", help="Path to solver cache directory.")
195
- # parser.add_argument("--output_json_dir", default="demo_results", help="Path to output JSON directory.")
196
- # parser.add_argument("--max_steps", type=int, default=10, help="Maximum number of steps to execute.")
197
- # parser.add_argument("--max_time", type=int, default=60, help="Maximum time allowed in seconds.")
198
- # parser.add_argument("--verbose", type=bool, default=True, help="Enable verbose output.")
199
- # return parser.parse_args()
200
-
201
-
202
- # def solve_problem_gradio(user_query, user_image):
203
- # """
204
- # Wrapper function to connect the solver to Gradio.
205
- # Streams responses from `solver.stream_solve_user_problem` for real-time UI updates.
206
- # """
207
- # global solver # Ensure we're using the globally defined solver
208
-
209
- # if solver is None:
210
- # return [["assistant", "⚠️ Error: Solver is not initialized. Please restart the application."]]
211
-
212
- # messages = [] # Initialize message list
213
- # for message_batch in solver.stream_solve_user_problem(user_query, user_image, messages):
214
- # yield [[msg.role, msg.content] for msg in message_batch] # Ensure correct format for Gradio Chatbot
215
-
216
-
217
-
218
- # def main(args):
219
- # global solver
220
- # # Initialize Tools
221
- # enabled_tools = args.enabled_tools.split(",") if args.enabled_tools else []
222
-
223
-
224
- # # Instantiate Initializer
225
- # initializer = Initializer(
226
- # enabled_tools=enabled_tools,
227
- # model_string=args.llm_engine_name
228
- # )
229
-
230
- # # Instantiate Planner
231
- # planner = Planner(
232
- # llm_engine_name=args.llm_engine_name,
233
- # toolbox_metadata=initializer.toolbox_metadata,
234
- # available_tools=initializer.available_tools
235
- # )
236
-
237
- # # Instantiate Memory
238
- # memory = Memory()
239
-
240
- # # Instantiate Executor
241
- # executor = Executor(
242
- # llm_engine_name=args.llm_engine_name,
243
- # root_cache_dir=args.root_cache_dir,
244
- # enable_signal=False
245
- # )
246
-
247
- # # Instantiate Solver
248
- # solver = Solver(
249
- # planner=planner,
250
- # memory=memory,
251
- # executor=executor,
252
- # task=args.task,
253
- # task_description=args.task_description,
254
- # output_types=args.output_types, # Add new parameter
255
- # verbose=args.verbose,
256
- # max_steps=args.max_steps,
257
- # max_time=args.max_time,
258
- # output_json_dir=args.output_json_dir,
259
- # root_cache_dir=args.root_cache_dir
260
- # )
261
-
262
- # # Test Inputs
263
- # # user_query = "How many balls are there in the image?"
264
- # # user_image_path = "/home/sheng/toolbox-agent/mathvista_113.png" # Replace with your actual image path
265
-
266
- # # # Load the image as a PIL object
267
- # # user_image = Image.open(user_image_path).convert("RGB") # Ensure it's in RGB mode
268
-
269
- # # print("\n=== Starting Problem Solving ===\n")
270
- # # messages = []
271
- # # for message_batch in solver.stream_solve_user_problem(user_query, user_image, messages):
272
- # # for message in message_batch:
273
- # # print(f"{message.role}: {message.content}")
274
-
275
- # # messages = []
276
- # # solver.stream_solve_user_problem(user_query, user_image, messages)
277
-
278
-
279
- # # def solve_problem_stream(user_query, user_image):
280
- # # messages = [] # Ensure it's a list of [role, content] pairs
281
-
282
- # # for message_batch in solver.stream_solve_user_problem(user_query, user_image, messages):
283
- # # yield message_batch # Stream messages correctly in tuple format
284
-
285
- # # solve_problem_stream(user_query, user_image)
286
-
287
- # # ========== Gradio Interface ==========
288
- # with gr.Blocks() as demo:
289
- # gr.Markdown("# 🧠 OctoTools AI Solver") # Title
290
 
291
- # with gr.Row():
292
- # user_query = gr.Textbox(label="Enter your query", placeholder="Type your question here...")
293
- # user_image = gr.Image(type="pil", label="Upload an image") # Accepts multiple formats
294
 
295
- # run_button = gr.Button("Run") # Run button
296
- # chatbot_output = gr.Chatbot(label="Problem-Solving Output")
297
 
298
- # # Link button click to function
299
- # run_button.click(fn=solve_problem_gradio, inputs=[user_query, user_image], outputs=chatbot_output)
300
 
301
- # # Launch the Gradio app
302
- # demo.launch()
303
 
304
 
305
 
306
- # if __name__ == "__main__":
307
- # args = parse_arguments()
308
- # main(args)
 
20
  from opentools.models.executor import Executor
21
  from opentools.models.utlis import make_json_serializable
22
 
23
+ solver = None
24
+
25
+ class ChatMessage:
26
+ def __init__(self, role: str, content: str, metadata: dict = None):
27
+ self.role = role
28
+ self.content = content
29
+ self.metadata = metadata or {}
30
+
31
+ class Solver:
32
+ def __init__(
33
+ self,
34
+ planner,
35
+ memory,
36
+ executor,
37
+ task: str,
38
+ task_description: str,
39
+ output_types: str = "base,final,direct",
40
+ index: int = 0,
41
+ verbose: bool = True,
42
+ max_steps: int = 10,
43
+ max_time: int = 60,
44
+ output_json_dir: str = "results",
45
+ root_cache_dir: str = "cache"
46
+ ):
47
+ self.planner = planner
48
+ self.memory = memory
49
+ self.executor = executor
50
+ self.task = task
51
+ self.task_description = task_description
52
+ self.index = index
53
+ self.verbose = verbose
54
+ self.max_steps = max_steps
55
+ self.max_time = max_time
56
+ self.output_json_dir = output_json_dir
57
+ self.root_cache_dir = root_cache_dir
58
+
59
+ self.output_types = output_types.lower().split(',')
60
+ assert all(output_type in ["base", "final", "direct"] for output_type in self.output_types), "Invalid output type. Supported types are 'base', 'final', 'direct'."
61
+
62
+ # self.benchmark_data = self.load_benchmark_data()
63
+
64
+
65
+
66
+ def stream_solve_user_problem(self, user_query: str, user_image: Image.Image, messages: List[ChatMessage]) -> Iterator[List[ChatMessage]]:
67
+ """
68
+ Streams intermediate thoughts and final responses for the problem-solving process based on user input.
69
 
70
+ Args:
71
+ user_query (str): The text query input from the user.
72
+ user_image (Image.Image): The uploaded image from the user (PIL Image object).
73
+ messages (list): A list of ChatMessage objects to store the streamed responses.
74
+ """
75
+
76
+ if user_image:
77
+ # # Convert PIL Image to bytes (for processing)
78
+ # img_bytes_io = io.BytesIO()
79
+ # user_image.save(img_bytes_io, format="PNG") # Convert image to PNG bytes
80
+ # img_bytes = img_bytes_io.getvalue() # Get bytes
81
 
82
+ # Use image paths instead of bytes,
83
+ os.makedirs(os.path.join(self.root_cache_dir, 'images'), exist_ok=True)
84
+ img_path = os.path.join(self.root_cache_dir, 'images', str(uuid.uuid4()) + '.jpg')
85
+ user_image.save(img_path)
86
+ else:
87
+ img_path = None
88
+
89
+ # Set query cache
90
+ _cache_dir = os.path.join(self.root_cache_dir)
91
+ self.executor.set_query_cache_dir(_cache_dir)
92
 
93
+ # Step 1: Display the received inputs
94
+ if user_image:
95
+ messages.append(ChatMessage(role="assistant", content=f"πŸ“ Received Query: {user_query}\nπŸ–ΌοΈ Image Uploaded"))
96
+ else:
97
+ messages.append(ChatMessage(role="assistant", content=f"πŸ“ Received Query: {user_query}"))
98
+ yield messages
99
+
100
+ # Step 2: Add "thinking" status while processing
101
+ messages.append(ChatMessage(
102
+ role="assistant",
103
+ content="",
104
+ metadata={"title": "⏳ Thinking: Processing input..."}
105
+ ))
106
+
107
+ # Step 3: Initialize problem-solving state
108
+ start_time = time.time()
109
+ step_count = 0
110
+ json_data = {"query": user_query, "image": "Image received as bytes"}
111
+
112
+ # Step 4: Query Analysis
113
+ import pdb; pdb.set_trace()
114
+ query_analysis = self.planner.analyze_query(user_query, img_path)
115
+ json_data["query_analysis"] = query_analysis
116
+ messages.append(ChatMessage(role="assistant", content=f"πŸ” Query Analysis:\n{query_analysis}"))
117
+ yield messages
118
+
119
+ # Step 5: Execution loop (similar to your step-by-step solver)
120
+ while step_count < self.max_steps and (time.time() - start_time) < self.max_time:
121
+ step_count += 1
122
+ messages.append(ChatMessage(role="assistant", content=f"πŸ”„ Step {step_count}: Generating next step..."))
123
+ yield messages
124
+
125
+ # Generate the next step
126
+ next_step = self.planner.generate_next_step(
127
+ user_query, img_path, query_analysis, self.memory, step_count, self.max_steps
128
+ )
129
+ context, sub_goal, tool_name = self.planner.extract_context_subgoal_and_tool(next_step)
130
+
131
+ # Display the step information
132
+ messages.append(ChatMessage(
133
+ role="assistant",
134
+ content=f"πŸ“Œ Step {step_count} Details:\n- Context: {context}\n- Sub-goal: {sub_goal}\n- Tool: {tool_name}"
135
+ ))
136
+ yield messages
137
+
138
+ # Handle tool execution or errors
139
+ if tool_name not in self.planner.available_tools:
140
+ messages.append(ChatMessage(role="assistant", content=f"⚠️ Error: Tool '{tool_name}' is not available."))
141
+ yield messages
142
+ continue
143
+
144
+ # Execute the tool command
145
+ tool_command = self.executor.generate_tool_command(
146
+ user_query, img_path, context, sub_goal, tool_name, self.planner.toolbox_metadata[tool_name]
147
+ )
148
+ explanation, command = self.executor.extract_explanation_and_command(tool_command)
149
+ result = self.executor.execute_tool_command(tool_name, command)
150
+ result = make_json_serializable(result)
151
+
152
+ messages.append(ChatMessage(role="assistant", content=f"βœ… Step {step_count} Result:\n{json.dumps(result, indent=4)}"))
153
+ yield messages
154
+
155
+ # Step 6: Memory update and stopping condition
156
+ self.memory.add_action(step_count, tool_name, sub_goal, tool_command, result)
157
+ stop_verification = self.planner.verificate_memory(user_query, img_path, query_analysis, self.memory)
158
+ conclusion = self.planner.extract_conclusion(stop_verification)
159
+
160
+ messages.append(ChatMessage(role="assistant", content=f"πŸ›‘ Step {step_count} Conclusion: {conclusion}"))
161
+ yield messages
162
+
163
+ if conclusion == 'STOP':
164
+ break
165
+
166
+ # Step 7: Generate Final Output (if needed)
167
+ if 'final' in self.output_types:
168
+ final_output = self.planner.generate_final_output(user_query, img_path, self.memory)
169
+ messages.append(ChatMessage(role="assistant", content=f"🎯 Final Output:\n{final_output}"))
170
+ yield messages
171
+
172
+ if 'direct' in self.output_types:
173
+ direct_output = self.planner.generate_direct_output(user_query, img_path, self.memory)
174
+ messages.append(ChatMessage(role="assistant", content=f"πŸ”Ή Direct Output:\n{direct_output}"))
175
+ yield messages
176
+
177
+ # Step 8: Completion Message
178
+ messages.append(ChatMessage(role="assistant", content="βœ… Problem-solving process complete."))
179
+ yield messages
180
 
181
+ def parse_arguments():
182
+ parser = argparse.ArgumentParser(description="Run the OpenTools demo with specified parameters.")
183
+ parser.add_argument("--llm_engine_name", default="gpt-4o", help="LLM engine name.")
184
+ parser.add_argument("--max_tokens", type=int, default=2000, help="Maximum tokens for LLM generation.")
185
+ parser.add_argument("--run_baseline_only", type=bool, default=False, help="Run only the baseline (no toolbox).")
186
+ parser.add_argument("--task", default="minitoolbench", help="Task to run.")
187
+ parser.add_argument("--task_description", default="", help="Task description.")
188
+ parser.add_argument(
189
+ "--output_types",
190
+ default="base,final,direct",
191
+ help="Comma-separated list of required outputs (base,final,direct)"
192
+ )
193
+ parser.add_argument("--enabled_tools", default="Generalist_Solution_Generator_Tool", help="List of enabled tools.")
194
+ parser.add_argument("--root_cache_dir", default="demo_solver_cache", help="Path to solver cache directory.")
195
+ parser.add_argument("--output_json_dir", default="demo_results", help="Path to output JSON directory.")
196
+ parser.add_argument("--max_steps", type=int, default=10, help="Maximum number of steps to execute.")
197
+ parser.add_argument("--max_time", type=int, default=60, help="Maximum time allowed in seconds.")
198
+ parser.add_argument("--verbose", type=bool, default=True, help="Enable verbose output.")
199
+ return parser.parse_args()
200
+
201
+
202
+ def solve_problem_gradio(user_query, user_image):
203
+ """
204
+ Wrapper function to connect the solver to Gradio.
205
+ Streams responses from `solver.stream_solve_user_problem` for real-time UI updates.
206
+ """
207
+ global solver # Ensure we're using the globally defined solver
208
+
209
+ if solver is None:
210
+ return [["assistant", "⚠️ Error: Solver is not initialized. Please restart the application."]]
211
+
212
+ messages = [] # Initialize message list
213
+ for message_batch in solver.stream_solve_user_problem(user_query, user_image, messages):
214
+ yield [[msg.role, msg.content] for msg in message_batch] # Ensure correct format for Gradio Chatbot
215
+
216
+
217
+
218
+ def main(args):
219
+ global solver
220
+ # Initialize Tools
221
+ enabled_tools = args.enabled_tools.split(",") if args.enabled_tools else []
222
+
223
+
224
+ # Instantiate Initializer
225
+ initializer = Initializer(
226
+ enabled_tools=enabled_tools,
227
+ model_string=args.llm_engine_name
228
+ )
229
+
230
+ # Instantiate Planner
231
+ planner = Planner(
232
+ llm_engine_name=args.llm_engine_name,
233
+ toolbox_metadata=initializer.toolbox_metadata,
234
+ available_tools=initializer.available_tools
235
+ )
236
+
237
+ # Instantiate Memory
238
+ memory = Memory()
239
+
240
+ # Instantiate Executor
241
+ executor = Executor(
242
+ llm_engine_name=args.llm_engine_name,
243
+ root_cache_dir=args.root_cache_dir,
244
+ enable_signal=False
245
+ )
246
+
247
+ # Instantiate Solver
248
+ solver = Solver(
249
+ planner=planner,
250
+ memory=memory,
251
+ executor=executor,
252
+ task=args.task,
253
+ task_description=args.task_description,
254
+ output_types=args.output_types, # Add new parameter
255
+ verbose=args.verbose,
256
+ max_steps=args.max_steps,
257
+ max_time=args.max_time,
258
+ output_json_dir=args.output_json_dir,
259
+ root_cache_dir=args.root_cache_dir
260
+ )
261
+
262
+ # Test Inputs
263
+ # user_query = "How many balls are there in the image?"
264
+ # user_image_path = "/home/sheng/toolbox-agent/mathvista_113.png" # Replace with your actual image path
265
+
266
+ # # Load the image as a PIL object
267
+ # user_image = Image.open(user_image_path).convert("RGB") # Ensure it's in RGB mode
268
+
269
+ # print("\n=== Starting Problem Solving ===\n")
270
+ # messages = []
271
+ # for message_batch in solver.stream_solve_user_problem(user_query, user_image, messages):
272
+ # for message in message_batch:
273
+ # print(f"{message.role}: {message.content}")
274
+
275
+ # messages = []
276
+ # solver.stream_solve_user_problem(user_query, user_image, messages)
277
+
278
+
279
+ # def solve_problem_stream(user_query, user_image):
280
+ # messages = [] # Ensure it's a list of [role, content] pairs
281
+
282
+ # for message_batch in solver.stream_solve_user_problem(user_query, user_image, messages):
283
+ # yield message_batch # Stream messages correctly in tuple format
284
+
285
+ # solve_problem_stream(user_query, user_image)
286
+
287
+ # ========== Gradio Interface ==========
288
+ with gr.Blocks() as demo:
289
+ gr.Markdown("# 🧠 OctoTools AI Solver") # Title
290
 
291
+ with gr.Row():
292
+ user_query = gr.Textbox(label="Enter your query", placeholder="Type your question here...")
293
+ user_image = gr.Image(type="pil", label="Upload an image") # Accepts multiple formats
294
 
295
+ run_button = gr.Button("Run") # Run button
296
+ chatbot_output = gr.Chatbot(label="Problem-Solving Output")
297
 
298
+ # Link button click to function
299
+ run_button.click(fn=solve_problem_gradio, inputs=[user_query, user_image], outputs=chatbot_output)
300
 
301
+ # Launch the Gradio app
302
+ demo.launch()
303
 
304
 
305
 
306
+ if __name__ == "__main__":
307
+ args = parse_arguments()
308
+ main(args)
opentools/engine/__init__.py ADDED
File without changes
opentools/models/__init__.py ADDED
File without changes
opentools/tools/generalist_solution_generator/__pycache__/tool.cpython-310.pyc DELETED
Binary file (4.6 kB)
 
opentools/tools/generalist_solution_generator/examples/mathvista_113.png DELETED
Binary file (88.6 kB)