m-ric HF Staff commited on
Commit
3c21bb9
·
1 Parent(s): bf50fd5

Remove delay in ms

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. e2bqwen.py +31 -6
app.py CHANGED
@@ -465,7 +465,7 @@ def chat_message_to_json(obj):
465
 
466
  def save_final_status(folder, status: str, summary, error_message = None) -> None:
467
  metadata_path = os.path.join(folder, "metadata.json")
468
- output_file = open(metadata_path,"w")
469
  output_file.write(json.dumps({"status":status, "summary":summary, "error_message": error_message}, default=chat_message_to_json))
470
  output_file.close()
471
 
 
465
 
466
  def save_final_status(folder, status: str, summary, error_message = None) -> None:
467
  metadata_path = os.path.join(folder, "metadata.json")
468
+ output_file = open(metadata_path, "w")
469
  output_file.write(json.dumps({"status":status, "summary":summary, "error_message": error_message}, default=chat_message_to_json))
470
  output_file.close()
471
 
e2bqwen.py CHANGED
@@ -160,6 +160,15 @@ def draw_marker_on_image(image_copy, click_coordinates):
160
  draw.ellipse((x - cross_size * 2, y - cross_size * 2, x + cross_size * 2, y + cross_size * 2), outline="red", width=linewidth)
161
  return image_copy
162
 
 
 
 
 
 
 
 
 
 
163
  class E2BVisionAgent(CodeAgent):
164
  """Agent for e2b desktop automation with Qwen2.5VL vision capabilities"""
165
  def __init__(
@@ -194,7 +203,6 @@ class E2BVisionAgent(CodeAgent):
194
  **kwargs
195
  )
196
  self.prompt_templates["system_prompt"] = E2B_SYSTEM_PROMPT_TEMPLATE.replace("<<resolution_x>>", str(self.width)).replace("<<resolution_y>>", str(self.height))
197
- print("PROMPT TEMPLATE:", self.prompt_templates["system_prompt"])
198
 
199
  # Add screen info to state
200
  self.state["screen_width"] = self.width
@@ -206,6 +214,23 @@ class E2BVisionAgent(CodeAgent):
206
  self._setup_desktop_tools()
207
  self.step_callbacks.append(self.take_screenshot_callback)
208
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  def _setup_desktop_tools(self):
210
  """Register all desktop tools"""
211
  @tool
@@ -266,15 +291,14 @@ class E2BVisionAgent(CodeAgent):
266
  return ''.join(c for c in unicodedata.normalize('NFD', text) if not unicodedata.combining(c))
267
 
268
  @tool
269
- def type_text(text: str, delay_in_ms: int = 75) -> str:
270
  """
271
  Types the specified text at the current cursor position.
272
  Args:
273
  text: The text to type
274
- delay_in_ms: Delay between keystrokes in milliseconds
275
  """
276
  clean_text = normalize_text(text)
277
- self.desktop.write(clean_text, delay_in_ms=delay_in_ms)
278
  self.logger.log(f"Typed text: '{clean_text}'")
279
  return f"Typed text: '{clean_text}'"
280
 
@@ -426,8 +450,9 @@ class E2BVisionAgent(CodeAgent):
426
  isinstance(previous_memory_step, ActionStep)
427
  and previous_memory_step.step_number == current_step - 1
428
  ):
429
- if previous_memory_step.tool_calls[0].arguments == memory_step.tool_calls[0].arguments:
430
- memory_step.observations += "\nWARNING: You've executed the same action several times in a row. MAKE SURE TO NOT UNNECESSARILY REPEAT ACTIONS."
 
431
 
432
  # Add the marker-edited image to the current memory step
433
  memory_step.observations_images = [image_copy]
 
160
  draw.ellipse((x - cross_size * 2, y - cross_size * 2, x + cross_size * 2, y + cross_size * 2), outline="red", width=linewidth)
161
  return image_copy
162
 
163
+ from jinja2 import StrictUndefined, Template
164
+
165
+
166
+ def populate_template(template: str, variables: Dict[str, Any]) -> str:
167
+ compiled_template = Template(template, undefined=StrictUndefined)
168
+ return compiled_template.render(**variables)
169
+
170
+
171
+
172
  class E2BVisionAgent(CodeAgent):
173
  """Agent for e2b desktop automation with Qwen2.5VL vision capabilities"""
174
  def __init__(
 
203
  **kwargs
204
  )
205
  self.prompt_templates["system_prompt"] = E2B_SYSTEM_PROMPT_TEMPLATE.replace("<<resolution_x>>", str(self.width)).replace("<<resolution_y>>", str(self.height))
 
206
 
207
  # Add screen info to state
208
  self.state["screen_width"] = self.width
 
214
  self._setup_desktop_tools()
215
  self.step_callbacks.append(self.take_screenshot_callback)
216
 
217
+ def initialize_system_prompt(self) -> str:
218
+ print("v2 PROMPT TEMPLATE:", self.prompt_templates["system_prompt"])
219
+ system_prompt = populate_template(
220
+ self.prompt_templates["system_prompt"],
221
+ variables={
222
+ "tools": self.tools,
223
+ "managed_agents": self.managed_agents,
224
+ "authorized_imports": (
225
+ "You can import from any package you want."
226
+ if "*" in self.authorized_imports
227
+ else str(self.authorized_imports)
228
+ ),
229
+ },
230
+ )
231
+ print("v3 PROMPT TEMPLATE:", self.prompt_templates["system_prompt"])
232
+ return system_prompt
233
+
234
  def _setup_desktop_tools(self):
235
  """Register all desktop tools"""
236
  @tool
 
291
  return ''.join(c for c in unicodedata.normalize('NFD', text) if not unicodedata.combining(c))
292
 
293
  @tool
294
+ def type_text(text: str) -> str:
295
  """
296
  Types the specified text at the current cursor position.
297
  Args:
298
  text: The text to type
 
299
  """
300
  clean_text = normalize_text(text)
301
+ self.desktop.write(clean_text, delay_in_ms=75)
302
  self.logger.log(f"Typed text: '{clean_text}'")
303
  return f"Typed text: '{clean_text}'"
304
 
 
450
  isinstance(previous_memory_step, ActionStep)
451
  and previous_memory_step.step_number == current_step - 1
452
  ):
453
+ if previous_memory_step.tool_calls and getattr(previous_memory_step.tool_calls[0], "arguments", None) and memory_step.tool_calls and getattr(memory_step.tool_calls[0], "arguments", None):
454
+ if previous_memory_step.tool_calls[0].arguments == memory_step.tool_calls[0].arguments:
455
+ memory_step.observations += "\nWARNING: You've executed the same action several times in a row. MAKE SURE TO NOT UNNECESSARILY REPEAT ACTIONS."
456
 
457
  # Add the marker-edited image to the current memory step
458
  memory_step.observations_images = [image_copy]