Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Repair save + new prompts
Browse files- app.py +9 -47
- e2bqwen.py +14 -5
- eval.py +11 -19
app.py
CHANGED
@@ -15,7 +15,7 @@ from dotenv import load_dotenv
|
|
15 |
from smolagents import CodeAgent
|
16 |
from smolagents.gradio_ui import GradioUI, stream_to_gradio
|
17 |
|
18 |
-
from e2bqwen import QwenVLAPIModel, E2BVisionAgent
|
19 |
|
20 |
load_dotenv(override=True)
|
21 |
|
@@ -420,44 +420,13 @@ def generate_interaction_id(session_uuid):
|
|
420 |
return f"{session_uuid}_{int(time.time())}"
|
421 |
|
422 |
|
423 |
-
def chat_message_to_json(obj):
|
424 |
-
"""Custom JSON serializer for ChatMessage and related objects"""
|
425 |
-
if hasattr(obj, "__dict__"):
|
426 |
-
# Create a copy of the object's __dict__ to avoid modifying the original
|
427 |
-
result = obj.__dict__.copy()
|
428 |
-
|
429 |
-
# Remove the 'raw' field which may contain non-serializable data
|
430 |
-
if "raw" in result:
|
431 |
-
del result["raw"]
|
432 |
-
|
433 |
-
# Process the content or tool_calls if they exist
|
434 |
-
if "content" in result and result["content"] is not None:
|
435 |
-
if hasattr(result["content"], "__dict__"):
|
436 |
-
result["content"] = chat_message_to_json(result["content"])
|
437 |
-
|
438 |
-
if "tool_calls" in result and result["tool_calls"] is not None:
|
439 |
-
result["tool_calls"] = [
|
440 |
-
chat_message_to_json(tc) for tc in result["tool_calls"]
|
441 |
-
]
|
442 |
-
|
443 |
-
return result
|
444 |
-
elif isinstance(obj, (list, tuple)):
|
445 |
-
return [chat_message_to_json(item) for item in obj]
|
446 |
-
else:
|
447 |
-
return obj
|
448 |
-
|
449 |
-
|
450 |
def save_final_status(folder, status: str, summary, error_message=None) -> None:
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
default=chat_message_to_json,
|
457 |
)
|
458 |
-
)
|
459 |
-
output_file.close()
|
460 |
-
|
461 |
|
462 |
def extract_browser_uuid(js_uuid):
|
463 |
print(f"[BROWSER] Got browser UUID from JS: {js_uuid}")
|
@@ -494,13 +463,6 @@ def create_agent(data_dir, desktop):
|
|
494 |
)
|
495 |
|
496 |
|
497 |
-
def get_agent_summary_erase_images(agent):
|
498 |
-
for memory_step in agent.memory.steps:
|
499 |
-
if getattr(memory_step, "observations_images", None):
|
500 |
-
memory_step.observations_images = None
|
501 |
-
return agent.memory.get_succinct_steps()
|
502 |
-
|
503 |
-
|
504 |
class EnrichedGradioUI(GradioUI):
|
505 |
def log_user_message(self, text_input):
|
506 |
import gradio as gr
|
@@ -563,9 +525,9 @@ class EnrichedGradioUI(GradioUI):
|
|
563 |
yield stored_messages
|
564 |
|
565 |
# THIS ERASES IMAGES FROM AGENT MEMORY, USE WITH CAUTION
|
566 |
-
|
567 |
-
|
568 |
-
|
569 |
yield stored_messages
|
570 |
|
571 |
except Exception as e:
|
|
|
15 |
from smolagents import CodeAgent
|
16 |
from smolagents.gradio_ui import GradioUI, stream_to_gradio
|
17 |
|
18 |
+
from e2bqwen import QwenVLAPIModel, E2BVisionAgent, get_agent_summary_erase_images
|
19 |
|
20 |
load_dotenv(override=True)
|
21 |
|
|
|
420 |
return f"{session_uuid}_{int(time.time())}"
|
421 |
|
422 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
423 |
def save_final_status(folder, status: str, summary, error_message=None) -> None:
|
424 |
+
with open(os.path.join(folder, "metadata.json"), "w") as output_file:
|
425 |
+
output_file.write(
|
426 |
+
json.dumps(
|
427 |
+
{"status": status, "summary": summary, "error_message": error_message},
|
428 |
+
)
|
|
|
429 |
)
|
|
|
|
|
|
|
430 |
|
431 |
def extract_browser_uuid(js_uuid):
|
432 |
print(f"[BROWSER] Got browser UUID from JS: {js_uuid}")
|
|
|
463 |
)
|
464 |
|
465 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
466 |
class EnrichedGradioUI(GradioUI):
|
467 |
def log_user_message(self, text_input):
|
468 |
import gradio as gr
|
|
|
525 |
yield stored_messages
|
526 |
|
527 |
# THIS ERASES IMAGES FROM AGENT MEMORY, USE WITH CAUTION
|
528 |
+
if consent_storage:
|
529 |
+
summary = get_agent_summary_erase_images(session_state["agent"])
|
530 |
+
save_final_status(data_dir, "completed", summary = summary)
|
531 |
yield stored_messages
|
532 |
|
533 |
except Exception as e:
|
e2bqwen.py
CHANGED
@@ -170,6 +170,15 @@ def draw_marker_on_image(image_copy, click_coordinates):
|
|
170 |
return image_copy
|
171 |
|
172 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
class E2BVisionAgent(CodeAgent):
|
174 |
"""Agent for e2b desktop automation with Qwen2.5VL vision capabilities"""
|
175 |
|
@@ -220,7 +229,7 @@ class E2BVisionAgent(CodeAgent):
|
|
220 |
self.step_callbacks.append(self.take_screenshot_callback)
|
221 |
|
222 |
def initialize_system_prompt(self) -> str:
|
223 |
-
if
|
224 |
return """You are a desktop automation assistant that can control a remote desktop environment.
|
225 |
You only have access to the following tools to interact with the desktop, no additional ones:
|
226 |
- click(x, y): Performs a left-click at the specified coordinates
|
@@ -509,9 +518,9 @@ REMEMBER TO ALWAYS CLICK IN THE MIDDLE OF THE TEXT, NOT ON THE SIDE, NOT UNDER.
|
|
509 |
|
510 |
image_copy = image.copy()
|
511 |
|
512 |
-
|
513 |
-
|
514 |
-
|
515 |
|
516 |
self.last_marked_screenshot = AgentImage(screenshot_path)
|
517 |
print(f"Saved screenshot for step {current_step} to {screenshot_path}")
|
@@ -570,7 +579,7 @@ class QwenVLAPIModel(Model):
|
|
570 |
super().__init__()
|
571 |
self.model_id = model_id
|
572 |
self.base_model = HfApiModel(
|
573 |
-
model_id="https://
|
574 |
token=hf_token,
|
575 |
max_tokens=4096,
|
576 |
)
|
|
|
170 |
return image_copy
|
171 |
|
172 |
|
173 |
+
def get_agent_summary_erase_images(agent):
|
174 |
+
for memory_step in agent.memory.steps:
|
175 |
+
if hasattr(memory_step, "observations_images"):
|
176 |
+
memory_step.observations_images = None
|
177 |
+
if hasattr(memory_step, "task_images"):
|
178 |
+
memory_step.task_images = None
|
179 |
+
return agent.write_memory_to_messages()
|
180 |
+
|
181 |
+
|
182 |
class E2BVisionAgent(CodeAgent):
|
183 |
"""Agent for e2b desktop automation with Qwen2.5VL vision capabilities"""
|
184 |
|
|
|
229 |
self.step_callbacks.append(self.take_screenshot_callback)
|
230 |
|
231 |
def initialize_system_prompt(self) -> str:
|
232 |
+
if False:
|
233 |
return """You are a desktop automation assistant that can control a remote desktop environment.
|
234 |
You only have access to the following tools to interact with the desktop, no additional ones:
|
235 |
- click(x, y): Performs a left-click at the specified coordinates
|
|
|
518 |
|
519 |
image_copy = image.copy()
|
520 |
|
521 |
+
if getattr(self, "click_coordinates", None):
|
522 |
+
print("DRAWING MARKER")
|
523 |
+
image_copy = draw_marker_on_image(image_copy, self.click_coordinates)
|
524 |
|
525 |
self.last_marked_screenshot = AgentImage(screenshot_path)
|
526 |
print(f"Saved screenshot for step {current_step} to {screenshot_path}")
|
|
|
579 |
super().__init__()
|
580 |
self.model_id = model_id
|
581 |
self.base_model = HfApiModel(
|
582 |
+
model_id="https://ahbeihft09ulicbf.us-east-1.aws.endpoints.huggingface.cloud",
|
583 |
token=hf_token,
|
584 |
max_tokens=4096,
|
585 |
)
|
eval.py
CHANGED
@@ -9,7 +9,7 @@ from e2b_desktop import Sandbox
|
|
9 |
from huggingface_hub import get_token
|
10 |
from io import BytesIO
|
11 |
from PIL import Image
|
12 |
-
from e2bqwen import QwenVLAPIModel, E2BVisionAgent
|
13 |
|
14 |
from dotenv import load_dotenv
|
15 |
|
@@ -78,14 +78,6 @@ def create_agent(data_dir, desktop, max_steps: int):
|
|
78 |
)
|
79 |
|
80 |
|
81 |
-
def get_agent_summary_erase_images(agent):
|
82 |
-
"""Get agent summary and erase images to save space"""
|
83 |
-
for memory_step in agent.memory.steps:
|
84 |
-
if getattr(memory_step, "observations_images", None):
|
85 |
-
memory_step.observations_images = None
|
86 |
-
return agent.memory.get_succinct_steps()
|
87 |
-
|
88 |
-
|
89 |
def chat_message_to_json(obj):
|
90 |
"""Custom JSON serializer for ChatMessage and related objects"""
|
91 |
if hasattr(obj, "__dict__"):
|
@@ -179,6 +171,7 @@ def run_example_once(example_name, example_text, run_index, example_dir, max_ste
|
|
179 |
)
|
180 |
result = {"status": "failed", "run_dir": run_dir, "error": error_message}
|
181 |
except Exception as e:
|
|
|
182 |
error_message = f"Error setting up sandbox: {str(e)}"
|
183 |
thread_safe_print(
|
184 |
f" ✗ Example '{example_name}' run {run_index} failed: {error_message}"
|
@@ -195,6 +188,7 @@ def run_example_once(example_name, example_text, run_index, example_dir, max_ste
|
|
195 |
|
196 |
return result
|
197 |
|
|
|
198 |
|
199 |
def run_example(example_name, example_text, num_runs, example_dir, max_steps):
|
200 |
"""Run a single example multiple times using threads for each run"""
|
@@ -217,8 +211,9 @@ def run_example(example_name, example_text, num_runs, example_dir, max_steps):
|
|
217 |
result = future.result()
|
218 |
results.append(result)
|
219 |
except Exception as exc:
|
|
|
220 |
thread_safe_print(
|
221 |
-
f" ✗ Run {run_index} for '{example_name}' generated an exception
|
222 |
)
|
223 |
results.append(
|
224 |
{"status": "error", "run_index": run_index, "error": str(exc)}
|
@@ -347,15 +342,12 @@ def main():
|
|
347 |
|
348 |
# Examples from the original code
|
349 |
examples = {
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
"
|
355 |
-
|
356 |
-
# "pond": "What's the name of the pond just south of Château de Fontainebleau in Google maps?",
|
357 |
-
# "flux": "Go on the Hugging Face Hub, find a Space for FLUX1.dev, and generate a picture of the Golden Gate bridge.",
|
358 |
-
# "hf": "Download me a picture of a puppy from Google, then head to Hugging Face, find a Space dedicated to background removal, and use it to remove the puppy picture's background",
|
359 |
}
|
360 |
|
361 |
# Create output directory if it doesn't exist
|
|
|
9 |
from huggingface_hub import get_token
|
10 |
from io import BytesIO
|
11 |
from PIL import Image
|
12 |
+
from e2bqwen import QwenVLAPIModel, E2BVisionAgent, get_agent_summary_erase_images
|
13 |
|
14 |
from dotenv import load_dotenv
|
15 |
|
|
|
78 |
)
|
79 |
|
80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
def chat_message_to_json(obj):
|
82 |
"""Custom JSON serializer for ChatMessage and related objects"""
|
83 |
if hasattr(obj, "__dict__"):
|
|
|
171 |
)
|
172 |
result = {"status": "failed", "run_dir": run_dir, "error": error_message}
|
173 |
except Exception as e:
|
174 |
+
raise e
|
175 |
error_message = f"Error setting up sandbox: {str(e)}"
|
176 |
thread_safe_print(
|
177 |
f" ✗ Example '{example_name}' run {run_index} failed: {error_message}"
|
|
|
188 |
|
189 |
return result
|
190 |
|
191 |
+
import traceback
|
192 |
|
193 |
def run_example(example_name, example_text, num_runs, example_dir, max_steps):
|
194 |
"""Run a single example multiple times using threads for each run"""
|
|
|
211 |
result = future.result()
|
212 |
results.append(result)
|
213 |
except Exception as exc:
|
214 |
+
error_traceback = traceback.format_exc()
|
215 |
thread_safe_print(
|
216 |
+
f" ✗ Run {run_index} for '{example_name}' generated an exception:\n{error_traceback}"
|
217 |
)
|
218 |
results.append(
|
219 |
{"status": "error", "run_index": run_index, "error": str(exc)}
|
|
|
342 |
|
343 |
# Examples from the original code
|
344 |
examples = {
|
345 |
+
"puppies": "Find me pictures of cute puppies",
|
346 |
+
"gmaps": "Use Google Maps to find the Hugging Face HQ in Paris",
|
347 |
+
"wiki": "Go to Wikipedia and find what happend on April 4th",
|
348 |
+
"hello": "Write 'Hello World' in a text editor",
|
349 |
+
"commute": "Find out how long it takes to travel by train from Bern and Basel",
|
350 |
+
"hf_space": "Go to Hugging Face Spaces and then find the Space flux.1 schnell. Use the space to generate an image of a GPU",
|
|
|
|
|
|
|
351 |
}
|
352 |
|
353 |
# Create output directory if it doesn't exist
|