Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Simplify agent code
Browse files- e2bqwen.py +3 -74
e2bqwen.py
CHANGED
@@ -320,77 +320,6 @@ REMEMBER TO ALWAYS CLICK IN THE MIDDLE OF THE TEXT, NOT ON THE SIDE, NOT UNDER.
|
|
320 |
a.write(json.dumps(output_memory))
|
321 |
a.close()
|
322 |
|
323 |
-
def write_memory_to_messages(self, summary_mode: Optional[bool] = False) -> List[Dict[str, Any]]:
|
324 |
-
"""Convert memory to messages for the model"""
|
325 |
-
messages = [{"role": MessageRole.SYSTEM, "content": [{"type": "text", "text": self.system_prompt}]}]
|
326 |
-
# Get the last memory step
|
327 |
-
last_step = self.memory.steps[-1] if self.memory.steps else None
|
328 |
-
for memory_step in self.memory.steps:
|
329 |
-
if hasattr(memory_step, "task") and memory_step.task:
|
330 |
-
# Add task message if it exists
|
331 |
-
messages.append({
|
332 |
-
"role": MessageRole.USER,
|
333 |
-
"content": [{"type": "text", "text": memory_step.task}]
|
334 |
-
})
|
335 |
-
continue # Skip to next step after adding task
|
336 |
-
if hasattr(memory_step, "model_output_message_plan") and memory_step.model_output_message_plan:
|
337 |
-
messages.append({
|
338 |
-
"role": MessageRole.ASSISTANT,
|
339 |
-
"content": [{"type": "text", "text": memory_step.model_output_message_plan.content, "agent_state": "plan"}]
|
340 |
-
})
|
341 |
-
# Process model output message if it exists
|
342 |
-
if hasattr(memory_step, "model_output") and memory_step.model_output:
|
343 |
-
messages.append({
|
344 |
-
"role": MessageRole.ASSISTANT,
|
345 |
-
"content": [{"type": "text", "text": memory_step.model_output}]
|
346 |
-
})
|
347 |
-
|
348 |
-
# Process observations and images
|
349 |
-
observation_content = []
|
350 |
-
|
351 |
-
# Add screenshot image paths if present
|
352 |
-
if memory_step is last_step and hasattr(memory_step, "observations_images") and memory_step.observations_images:
|
353 |
-
self.logger.log(f"Found {len(memory_step.observations_images)} image paths in step", level=LogLevel.DEBUG)
|
354 |
-
for img_path in memory_step.observations_images:
|
355 |
-
if isinstance(img_path, str) and os.path.exists(img_path):
|
356 |
-
observation_content.append({"type": "image", "image": img_path})
|
357 |
-
elif isinstance(img_path, Image.Image):
|
358 |
-
screenshot_path = f"screenshot_{int(time.time() * 1000)}.png"
|
359 |
-
img_path.save(screenshot_path)
|
360 |
-
observation_content.append({"type": "image", "image": screenshot_path})
|
361 |
-
else:
|
362 |
-
self.logger.log(f" - Skipping invalid image: {type(img_path)}", level=LogLevel.ERROR)
|
363 |
-
|
364 |
-
# Add text observations if any
|
365 |
-
if hasattr(memory_step, "observations") and memory_step.observations:
|
366 |
-
self.logger.log(f" - Adding text observation", level=LogLevel.DEBUG)
|
367 |
-
observation_content.append({"type": "text", "text": f"Observation: {memory_step.observations}"})
|
368 |
-
|
369 |
-
# Add error if present and didn't already add observations
|
370 |
-
if hasattr(memory_step, "error") and memory_step.error:
|
371 |
-
self.logger.log(f" - Adding error message", level=LogLevel.DEBUG)
|
372 |
-
observation_content.append({"type": "text", "text": f"Error: {memory_step.error}"})
|
373 |
-
|
374 |
-
# Add user message with content if we have any
|
375 |
-
if observation_content:
|
376 |
-
self.logger.log(f" - Adding user message with {len(observation_content)} content items", level=LogLevel.DEBUG)
|
377 |
-
messages.append({
|
378 |
-
"role": MessageRole.USER,
|
379 |
-
"content": observation_content
|
380 |
-
})
|
381 |
-
|
382 |
-
# # Check for images in final message list
|
383 |
-
# image_count = 0
|
384 |
-
# for msg in messages:
|
385 |
-
# if isinstance(msg.get("content"), list):
|
386 |
-
# for item in msg["content"]:
|
387 |
-
# if isinstance(item, dict) and item.get("type") == "image":
|
388 |
-
# image_count += 1
|
389 |
-
|
390 |
-
# print(f"Created {len(messages)} messages with {image_count} image paths")
|
391 |
-
|
392 |
-
return messages
|
393 |
-
|
394 |
|
395 |
def take_snapshot_callback(self, memory_step: ActionStep, agent=None) -> None:
|
396 |
"""Callback that takes a screenshot + memory snapshot after a step completes"""
|
@@ -529,7 +458,7 @@ class QwenVLAPIModel(Model):
|
|
529 |
img_byte_arr = io.BytesIO()
|
530 |
item["image"].save(img_byte_arr, format="PNG")
|
531 |
base64_image = base64.b64encode(img_byte_arr.getvalue()).decode("utf-8")
|
532 |
-
|
533 |
content.append({
|
534 |
"type": "image_url",
|
535 |
"image_url": {
|
@@ -543,10 +472,10 @@ class QwenVLAPIModel(Model):
|
|
543 |
formatted_messages.append({"role": role, "content": content})
|
544 |
|
545 |
return formatted_messages
|
546 |
-
|
547 |
def _call_hf_endpoint(self, formatted_messages, stop_sequences=None, **kwargs):
|
548 |
"""Call the Hugging Face OpenAI-compatible endpoint"""
|
549 |
-
|
550 |
# Extract parameters with defaults
|
551 |
max_tokens = kwargs.get("max_new_tokens", 512)
|
552 |
temperature = kwargs.get("temperature", 0.7)
|
|
|
320 |
a.write(json.dumps(output_memory))
|
321 |
a.close()
|
322 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
323 |
|
324 |
def take_snapshot_callback(self, memory_step: ActionStep, agent=None) -> None:
|
325 |
"""Callback that takes a screenshot + memory snapshot after a step completes"""
|
|
|
458 |
img_byte_arr = io.BytesIO()
|
459 |
item["image"].save(img_byte_arr, format="PNG")
|
460 |
base64_image = base64.b64encode(img_byte_arr.getvalue()).decode("utf-8")
|
461 |
+
|
462 |
content.append({
|
463 |
"type": "image_url",
|
464 |
"image_url": {
|
|
|
472 |
formatted_messages.append({"role": role, "content": content})
|
473 |
|
474 |
return formatted_messages
|
475 |
+
|
476 |
def _call_hf_endpoint(self, formatted_messages, stop_sequences=None, **kwargs):
|
477 |
"""Call the Hugging Face OpenAI-compatible endpoint"""
|
478 |
+
|
479 |
# Extract parameters with defaults
|
480 |
max_tokens = kwargs.get("max_new_tokens", 512)
|
481 |
temperature = kwargs.get("temperature", 0.7)
|