Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Remove delay in ms
Browse files- app.py +1 -1
- e2bqwen.py +31 -6
app.py
CHANGED
|
@@ -465,7 +465,7 @@ def chat_message_to_json(obj):
|
|
| 465 |
|
| 466 |
def save_final_status(folder, status: str, summary, error_message = None) -> None:
|
| 467 |
metadata_path = os.path.join(folder, "metadata.json")
|
| 468 |
-
output_file = open(metadata_path,"w")
|
| 469 |
output_file.write(json.dumps({"status":status, "summary":summary, "error_message": error_message}, default=chat_message_to_json))
|
| 470 |
output_file.close()
|
| 471 |
|
|
|
|
| 465 |
|
| 466 |
def save_final_status(folder, status: str, summary, error_message = None) -> None:
|
| 467 |
metadata_path = os.path.join(folder, "metadata.json")
|
| 468 |
+
output_file = open(metadata_path, "w")
|
| 469 |
output_file.write(json.dumps({"status":status, "summary":summary, "error_message": error_message}, default=chat_message_to_json))
|
| 470 |
output_file.close()
|
| 471 |
|
e2bqwen.py
CHANGED
|
@@ -160,6 +160,15 @@ def draw_marker_on_image(image_copy, click_coordinates):
|
|
| 160 |
draw.ellipse((x - cross_size * 2, y - cross_size * 2, x + cross_size * 2, y + cross_size * 2), outline="red", width=linewidth)
|
| 161 |
return image_copy
|
| 162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
class E2BVisionAgent(CodeAgent):
|
| 164 |
"""Agent for e2b desktop automation with Qwen2.5VL vision capabilities"""
|
| 165 |
def __init__(
|
|
@@ -194,7 +203,6 @@ class E2BVisionAgent(CodeAgent):
|
|
| 194 |
**kwargs
|
| 195 |
)
|
| 196 |
self.prompt_templates["system_prompt"] = E2B_SYSTEM_PROMPT_TEMPLATE.replace("<<resolution_x>>", str(self.width)).replace("<<resolution_y>>", str(self.height))
|
| 197 |
-
print("PROMPT TEMPLATE:", self.prompt_templates["system_prompt"])
|
| 198 |
|
| 199 |
# Add screen info to state
|
| 200 |
self.state["screen_width"] = self.width
|
|
@@ -206,6 +214,23 @@ class E2BVisionAgent(CodeAgent):
|
|
| 206 |
self._setup_desktop_tools()
|
| 207 |
self.step_callbacks.append(self.take_screenshot_callback)
|
| 208 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
def _setup_desktop_tools(self):
|
| 210 |
"""Register all desktop tools"""
|
| 211 |
@tool
|
|
@@ -266,15 +291,14 @@ class E2BVisionAgent(CodeAgent):
|
|
| 266 |
return ''.join(c for c in unicodedata.normalize('NFD', text) if not unicodedata.combining(c))
|
| 267 |
|
| 268 |
@tool
|
| 269 |
-
def type_text(text: str
|
| 270 |
"""
|
| 271 |
Types the specified text at the current cursor position.
|
| 272 |
Args:
|
| 273 |
text: The text to type
|
| 274 |
-
delay_in_ms: Delay between keystrokes in milliseconds
|
| 275 |
"""
|
| 276 |
clean_text = normalize_text(text)
|
| 277 |
-
self.desktop.write(clean_text, delay_in_ms=
|
| 278 |
self.logger.log(f"Typed text: '{clean_text}'")
|
| 279 |
return f"Typed text: '{clean_text}'"
|
| 280 |
|
|
@@ -426,8 +450,9 @@ class E2BVisionAgent(CodeAgent):
|
|
| 426 |
isinstance(previous_memory_step, ActionStep)
|
| 427 |
and previous_memory_step.step_number == current_step - 1
|
| 428 |
):
|
| 429 |
-
if previous_memory_step.tool_calls[0]
|
| 430 |
-
|
|
|
|
| 431 |
|
| 432 |
# Add the marker-edited image to the current memory step
|
| 433 |
memory_step.observations_images = [image_copy]
|
|
|
|
| 160 |
draw.ellipse((x - cross_size * 2, y - cross_size * 2, x + cross_size * 2, y + cross_size * 2), outline="red", width=linewidth)
|
| 161 |
return image_copy
|
| 162 |
|
| 163 |
+
from jinja2 import StrictUndefined, Template
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
def populate_template(template: str, variables: Dict[str, Any]) -> str:
|
| 167 |
+
compiled_template = Template(template, undefined=StrictUndefined)
|
| 168 |
+
return compiled_template.render(**variables)
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
|
| 172 |
class E2BVisionAgent(CodeAgent):
|
| 173 |
"""Agent for e2b desktop automation with Qwen2.5VL vision capabilities"""
|
| 174 |
def __init__(
|
|
|
|
| 203 |
**kwargs
|
| 204 |
)
|
| 205 |
self.prompt_templates["system_prompt"] = E2B_SYSTEM_PROMPT_TEMPLATE.replace("<<resolution_x>>", str(self.width)).replace("<<resolution_y>>", str(self.height))
|
|
|
|
| 206 |
|
| 207 |
# Add screen info to state
|
| 208 |
self.state["screen_width"] = self.width
|
|
|
|
| 214 |
self._setup_desktop_tools()
|
| 215 |
self.step_callbacks.append(self.take_screenshot_callback)
|
| 216 |
|
| 217 |
+
def initialize_system_prompt(self) -> str:
|
| 218 |
+
print("v2 PROMPT TEMPLATE:", self.prompt_templates["system_prompt"])
|
| 219 |
+
system_prompt = populate_template(
|
| 220 |
+
self.prompt_templates["system_prompt"],
|
| 221 |
+
variables={
|
| 222 |
+
"tools": self.tools,
|
| 223 |
+
"managed_agents": self.managed_agents,
|
| 224 |
+
"authorized_imports": (
|
| 225 |
+
"You can import from any package you want."
|
| 226 |
+
if "*" in self.authorized_imports
|
| 227 |
+
else str(self.authorized_imports)
|
| 228 |
+
),
|
| 229 |
+
},
|
| 230 |
+
)
|
| 231 |
+
print("v3 PROMPT TEMPLATE:", self.prompt_templates["system_prompt"])
|
| 232 |
+
return system_prompt
|
| 233 |
+
|
| 234 |
def _setup_desktop_tools(self):
|
| 235 |
"""Register all desktop tools"""
|
| 236 |
@tool
|
|
|
|
| 291 |
return ''.join(c for c in unicodedata.normalize('NFD', text) if not unicodedata.combining(c))
|
| 292 |
|
| 293 |
@tool
|
| 294 |
+
def type_text(text: str) -> str:
|
| 295 |
"""
|
| 296 |
Types the specified text at the current cursor position.
|
| 297 |
Args:
|
| 298 |
text: The text to type
|
|
|
|
| 299 |
"""
|
| 300 |
clean_text = normalize_text(text)
|
| 301 |
+
self.desktop.write(clean_text, delay_in_ms=75)
|
| 302 |
self.logger.log(f"Typed text: '{clean_text}'")
|
| 303 |
return f"Typed text: '{clean_text}'"
|
| 304 |
|
|
|
|
| 450 |
isinstance(previous_memory_step, ActionStep)
|
| 451 |
and previous_memory_step.step_number == current_step - 1
|
| 452 |
):
|
| 453 |
+
if previous_memory_step.tool_calls and getattr(previous_memory_step.tool_calls[0], "arguments", None) and memory_step.tool_calls and getattr(memory_step.tool_calls[0], "arguments", None):
|
| 454 |
+
if previous_memory_step.tool_calls[0].arguments == memory_step.tool_calls[0].arguments:
|
| 455 |
+
memory_step.observations += "\nWARNING: You've executed the same action several times in a row. MAKE SURE TO NOT UNNECESSARILY REPEAT ACTIONS."
|
| 456 |
|
| 457 |
# Add the marker-edited image to the current memory step
|
| 458 |
memory_step.observations_images = [image_copy]
|