Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Remove delay in ms
Browse files- app.py +1 -1
- e2bqwen.py +31 -6
app.py
CHANGED
@@ -465,7 +465,7 @@ def chat_message_to_json(obj):
|
|
465 |
|
466 |
def save_final_status(folder, status: str, summary, error_message = None) -> None:
|
467 |
metadata_path = os.path.join(folder, "metadata.json")
|
468 |
-
output_file = open(metadata_path,"w")
|
469 |
output_file.write(json.dumps({"status":status, "summary":summary, "error_message": error_message}, default=chat_message_to_json))
|
470 |
output_file.close()
|
471 |
|
|
|
465 |
|
466 |
def save_final_status(folder, status: str, summary, error_message = None) -> None:
|
467 |
metadata_path = os.path.join(folder, "metadata.json")
|
468 |
+
output_file = open(metadata_path, "w")
|
469 |
output_file.write(json.dumps({"status":status, "summary":summary, "error_message": error_message}, default=chat_message_to_json))
|
470 |
output_file.close()
|
471 |
|
e2bqwen.py
CHANGED
@@ -160,6 +160,15 @@ def draw_marker_on_image(image_copy, click_coordinates):
|
|
160 |
draw.ellipse((x - cross_size * 2, y - cross_size * 2, x + cross_size * 2, y + cross_size * 2), outline="red", width=linewidth)
|
161 |
return image_copy
|
162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
class E2BVisionAgent(CodeAgent):
|
164 |
"""Agent for e2b desktop automation with Qwen2.5VL vision capabilities"""
|
165 |
def __init__(
|
@@ -194,7 +203,6 @@ class E2BVisionAgent(CodeAgent):
|
|
194 |
**kwargs
|
195 |
)
|
196 |
self.prompt_templates["system_prompt"] = E2B_SYSTEM_PROMPT_TEMPLATE.replace("<<resolution_x>>", str(self.width)).replace("<<resolution_y>>", str(self.height))
|
197 |
-
print("PROMPT TEMPLATE:", self.prompt_templates["system_prompt"])
|
198 |
|
199 |
# Add screen info to state
|
200 |
self.state["screen_width"] = self.width
|
@@ -206,6 +214,23 @@ class E2BVisionAgent(CodeAgent):
|
|
206 |
self._setup_desktop_tools()
|
207 |
self.step_callbacks.append(self.take_screenshot_callback)
|
208 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
def _setup_desktop_tools(self):
|
210 |
"""Register all desktop tools"""
|
211 |
@tool
|
@@ -266,15 +291,14 @@ class E2BVisionAgent(CodeAgent):
|
|
266 |
return ''.join(c for c in unicodedata.normalize('NFD', text) if not unicodedata.combining(c))
|
267 |
|
268 |
@tool
|
269 |
-
def type_text(text: str
|
270 |
"""
|
271 |
Types the specified text at the current cursor position.
|
272 |
Args:
|
273 |
text: The text to type
|
274 |
-
delay_in_ms: Delay between keystrokes in milliseconds
|
275 |
"""
|
276 |
clean_text = normalize_text(text)
|
277 |
-
self.desktop.write(clean_text, delay_in_ms=
|
278 |
self.logger.log(f"Typed text: '{clean_text}'")
|
279 |
return f"Typed text: '{clean_text}'"
|
280 |
|
@@ -426,8 +450,9 @@ class E2BVisionAgent(CodeAgent):
|
|
426 |
isinstance(previous_memory_step, ActionStep)
|
427 |
and previous_memory_step.step_number == current_step - 1
|
428 |
):
|
429 |
-
if previous_memory_step.tool_calls[0]
|
430 |
-
|
|
|
431 |
|
432 |
# Add the marker-edited image to the current memory step
|
433 |
memory_step.observations_images = [image_copy]
|
|
|
160 |
draw.ellipse((x - cross_size * 2, y - cross_size * 2, x + cross_size * 2, y + cross_size * 2), outline="red", width=linewidth)
|
161 |
return image_copy
|
162 |
|
163 |
+
from jinja2 import StrictUndefined, Template
|
164 |
+
|
165 |
+
|
166 |
+
def populate_template(template: str, variables: Dict[str, Any]) -> str:
|
167 |
+
compiled_template = Template(template, undefined=StrictUndefined)
|
168 |
+
return compiled_template.render(**variables)
|
169 |
+
|
170 |
+
|
171 |
+
|
172 |
class E2BVisionAgent(CodeAgent):
|
173 |
"""Agent for e2b desktop automation with Qwen2.5VL vision capabilities"""
|
174 |
def __init__(
|
|
|
203 |
**kwargs
|
204 |
)
|
205 |
self.prompt_templates["system_prompt"] = E2B_SYSTEM_PROMPT_TEMPLATE.replace("<<resolution_x>>", str(self.width)).replace("<<resolution_y>>", str(self.height))
|
|
|
206 |
|
207 |
# Add screen info to state
|
208 |
self.state["screen_width"] = self.width
|
|
|
214 |
self._setup_desktop_tools()
|
215 |
self.step_callbacks.append(self.take_screenshot_callback)
|
216 |
|
217 |
+
def initialize_system_prompt(self) -> str:
|
218 |
+
print("v2 PROMPT TEMPLATE:", self.prompt_templates["system_prompt"])
|
219 |
+
system_prompt = populate_template(
|
220 |
+
self.prompt_templates["system_prompt"],
|
221 |
+
variables={
|
222 |
+
"tools": self.tools,
|
223 |
+
"managed_agents": self.managed_agents,
|
224 |
+
"authorized_imports": (
|
225 |
+
"You can import from any package you want."
|
226 |
+
if "*" in self.authorized_imports
|
227 |
+
else str(self.authorized_imports)
|
228 |
+
),
|
229 |
+
},
|
230 |
+
)
|
231 |
+
print("v3 PROMPT TEMPLATE:", self.prompt_templates["system_prompt"])
|
232 |
+
return system_prompt
|
233 |
+
|
234 |
def _setup_desktop_tools(self):
|
235 |
"""Register all desktop tools"""
|
236 |
@tool
|
|
|
291 |
return ''.join(c for c in unicodedata.normalize('NFD', text) if not unicodedata.combining(c))
|
292 |
|
293 |
@tool
|
294 |
+
def type_text(text: str) -> str:
|
295 |
"""
|
296 |
Types the specified text at the current cursor position.
|
297 |
Args:
|
298 |
text: The text to type
|
|
|
299 |
"""
|
300 |
clean_text = normalize_text(text)
|
301 |
+
self.desktop.write(clean_text, delay_in_ms=75)
|
302 |
self.logger.log(f"Typed text: '{clean_text}'")
|
303 |
return f"Typed text: '{clean_text}'"
|
304 |
|
|
|
450 |
isinstance(previous_memory_step, ActionStep)
|
451 |
and previous_memory_step.step_number == current_step - 1
|
452 |
):
|
453 |
+
if previous_memory_step.tool_calls and getattr(previous_memory_step.tool_calls[0], "arguments", None) and memory_step.tool_calls and getattr(memory_step.tool_calls[0], "arguments", None):
|
454 |
+
if previous_memory_step.tool_calls[0].arguments == memory_step.tool_calls[0].arguments:
|
455 |
+
memory_step.observations += "\nWARNING: You've executed the same action several times in a row. MAKE SURE TO NOT UNNECESSARILY REPEAT ACTIONS."
|
456 |
|
457 |
# Add the marker-edited image to the current memory step
|
458 |
memory_step.observations_images = [image_copy]
|