M-Rique commited on
Commit
ee08a04
·
1 Parent(s): e73b85c

Repair save + new prompts

Browse files
Files changed (3) hide show
  1. app.py +9 -47
  2. e2bqwen.py +14 -5
  3. eval.py +11 -19
app.py CHANGED
@@ -15,7 +15,7 @@ from dotenv import load_dotenv
15
  from smolagents import CodeAgent
16
  from smolagents.gradio_ui import GradioUI, stream_to_gradio
17
 
18
- from e2bqwen import QwenVLAPIModel, E2BVisionAgent
19
 
20
  load_dotenv(override=True)
21
 
@@ -420,44 +420,13 @@ def generate_interaction_id(session_uuid):
420
  return f"{session_uuid}_{int(time.time())}"
421
 
422
 
423
- def chat_message_to_json(obj):
424
- """Custom JSON serializer for ChatMessage and related objects"""
425
- if hasattr(obj, "__dict__"):
426
- # Create a copy of the object's __dict__ to avoid modifying the original
427
- result = obj.__dict__.copy()
428
-
429
- # Remove the 'raw' field which may contain non-serializable data
430
- if "raw" in result:
431
- del result["raw"]
432
-
433
- # Process the content or tool_calls if they exist
434
- if "content" in result and result["content"] is not None:
435
- if hasattr(result["content"], "__dict__"):
436
- result["content"] = chat_message_to_json(result["content"])
437
-
438
- if "tool_calls" in result and result["tool_calls"] is not None:
439
- result["tool_calls"] = [
440
- chat_message_to_json(tc) for tc in result["tool_calls"]
441
- ]
442
-
443
- return result
444
- elif isinstance(obj, (list, tuple)):
445
- return [chat_message_to_json(item) for item in obj]
446
- else:
447
- return obj
448
-
449
-
450
  def save_final_status(folder, status: str, summary, error_message=None) -> None:
451
- metadata_path = os.path.join(folder, "metadata.json")
452
- output_file = open(metadata_path, "w")
453
- output_file.write(
454
- json.dumps(
455
- {"status": status, "summary": summary, "error_message": error_message},
456
- default=chat_message_to_json,
457
  )
458
- )
459
- output_file.close()
460
-
461
 
462
  def extract_browser_uuid(js_uuid):
463
  print(f"[BROWSER] Got browser UUID from JS: {js_uuid}")
@@ -494,13 +463,6 @@ def create_agent(data_dir, desktop):
494
  )
495
 
496
 
497
- def get_agent_summary_erase_images(agent):
498
- for memory_step in agent.memory.steps:
499
- if getattr(memory_step, "observations_images", None):
500
- memory_step.observations_images = None
501
- return agent.memory.get_succinct_steps()
502
-
503
-
504
  class EnrichedGradioUI(GradioUI):
505
  def log_user_message(self, text_input):
506
  import gradio as gr
@@ -563,9 +525,9 @@ class EnrichedGradioUI(GradioUI):
563
  yield stored_messages
564
 
565
  # THIS ERASES IMAGES FROM AGENT MEMORY, USE WITH CAUTION
566
- # if consent_storage:
567
- # summary = get_agent_summary_erase_images(session_state["agent"])
568
- # save_final_status(data_dir, "completed", summary = summary)
569
  yield stored_messages
570
 
571
  except Exception as e:
 
15
  from smolagents import CodeAgent
16
  from smolagents.gradio_ui import GradioUI, stream_to_gradio
17
 
18
+ from e2bqwen import QwenVLAPIModel, E2BVisionAgent, get_agent_summary_erase_images
19
 
20
  load_dotenv(override=True)
21
 
 
420
  return f"{session_uuid}_{int(time.time())}"
421
 
422
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
423
  def save_final_status(folder, status: str, summary, error_message=None) -> None:
424
+ with open(os.path.join(folder, "metadata.json"), "w") as output_file:
425
+ output_file.write(
426
+ json.dumps(
427
+ {"status": status, "summary": summary, "error_message": error_message},
428
+ )
 
429
  )
 
 
 
430
 
431
  def extract_browser_uuid(js_uuid):
432
  print(f"[BROWSER] Got browser UUID from JS: {js_uuid}")
 
463
  )
464
 
465
 
 
 
 
 
 
 
 
466
  class EnrichedGradioUI(GradioUI):
467
  def log_user_message(self, text_input):
468
  import gradio as gr
 
525
  yield stored_messages
526
 
527
  # THIS ERASES IMAGES FROM AGENT MEMORY, USE WITH CAUTION
528
+ if consent_storage:
529
+ summary = get_agent_summary_erase_images(session_state["agent"])
530
+ save_final_status(data_dir, "completed", summary = summary)
531
  yield stored_messages
532
 
533
  except Exception as e:
e2bqwen.py CHANGED
@@ -170,6 +170,15 @@ def draw_marker_on_image(image_copy, click_coordinates):
170
  return image_copy
171
 
172
 
 
 
 
 
 
 
 
 
 
173
  class E2BVisionAgent(CodeAgent):
174
  """Agent for e2b desktop automation with Qwen2.5VL vision capabilities"""
175
 
@@ -220,7 +229,7 @@ class E2BVisionAgent(CodeAgent):
220
  self.step_callbacks.append(self.take_screenshot_callback)
221
 
222
  def initialize_system_prompt(self) -> str:
223
- if True:
224
  return """You are a desktop automation assistant that can control a remote desktop environment.
225
  You only have access to the following tools to interact with the desktop, no additional ones:
226
  - click(x, y): Performs a left-click at the specified coordinates
@@ -509,9 +518,9 @@ REMEMBER TO ALWAYS CLICK IN THE MIDDLE OF THE TEXT, NOT ON THE SIDE, NOT UNDER.
509
 
510
  image_copy = image.copy()
511
 
512
- # if getattr(self, "click_coordinates", None):
513
- # print("DRAWING MARKER")
514
- # image_copy = draw_marker_on_image(image_copy, self.click_coordinates)
515
 
516
  self.last_marked_screenshot = AgentImage(screenshot_path)
517
  print(f"Saved screenshot for step {current_step} to {screenshot_path}")
@@ -570,7 +579,7 @@ class QwenVLAPIModel(Model):
570
  super().__init__()
571
  self.model_id = model_id
572
  self.base_model = HfApiModel(
573
- model_id="https://n5wr7lfx6wp94tvl.us-east-1.aws.endpoints.huggingface.cloud",
574
  token=hf_token,
575
  max_tokens=4096,
576
  )
 
170
  return image_copy
171
 
172
 
173
+ def get_agent_summary_erase_images(agent):
174
+ for memory_step in agent.memory.steps:
175
+ if hasattr(memory_step, "observations_images"):
176
+ memory_step.observations_images = None
177
+ if hasattr(memory_step, "task_images"):
178
+ memory_step.task_images = None
179
+ return agent.write_memory_to_messages()
180
+
181
+
182
  class E2BVisionAgent(CodeAgent):
183
  """Agent for e2b desktop automation with Qwen2.5VL vision capabilities"""
184
 
 
229
  self.step_callbacks.append(self.take_screenshot_callback)
230
 
231
  def initialize_system_prompt(self) -> str:
232
+ if False:
233
  return """You are a desktop automation assistant that can control a remote desktop environment.
234
  You only have access to the following tools to interact with the desktop, no additional ones:
235
  - click(x, y): Performs a left-click at the specified coordinates
 
518
 
519
  image_copy = image.copy()
520
 
521
+ if getattr(self, "click_coordinates", None):
522
+ print("DRAWING MARKER")
523
+ image_copy = draw_marker_on_image(image_copy, self.click_coordinates)
524
 
525
  self.last_marked_screenshot = AgentImage(screenshot_path)
526
  print(f"Saved screenshot for step {current_step} to {screenshot_path}")
 
579
  super().__init__()
580
  self.model_id = model_id
581
  self.base_model = HfApiModel(
582
+ model_id="https://ahbeihft09ulicbf.us-east-1.aws.endpoints.huggingface.cloud",
583
  token=hf_token,
584
  max_tokens=4096,
585
  )
eval.py CHANGED
@@ -9,7 +9,7 @@ from e2b_desktop import Sandbox
9
  from huggingface_hub import get_token
10
  from io import BytesIO
11
  from PIL import Image
12
- from e2bqwen import QwenVLAPIModel, E2BVisionAgent
13
 
14
  from dotenv import load_dotenv
15
 
@@ -78,14 +78,6 @@ def create_agent(data_dir, desktop, max_steps: int):
78
  )
79
 
80
 
81
- def get_agent_summary_erase_images(agent):
82
- """Get agent summary and erase images to save space"""
83
- for memory_step in agent.memory.steps:
84
- if getattr(memory_step, "observations_images", None):
85
- memory_step.observations_images = None
86
- return agent.memory.get_succinct_steps()
87
-
88
-
89
  def chat_message_to_json(obj):
90
  """Custom JSON serializer for ChatMessage and related objects"""
91
  if hasattr(obj, "__dict__"):
@@ -179,6 +171,7 @@ def run_example_once(example_name, example_text, run_index, example_dir, max_ste
179
  )
180
  result = {"status": "failed", "run_dir": run_dir, "error": error_message}
181
  except Exception as e:
 
182
  error_message = f"Error setting up sandbox: {str(e)}"
183
  thread_safe_print(
184
  f" ✗ Example '{example_name}' run {run_index} failed: {error_message}"
@@ -195,6 +188,7 @@ def run_example_once(example_name, example_text, run_index, example_dir, max_ste
195
 
196
  return result
197
 
 
198
 
199
  def run_example(example_name, example_text, num_runs, example_dir, max_steps):
200
  """Run a single example multiple times using threads for each run"""
@@ -217,8 +211,9 @@ def run_example(example_name, example_text, num_runs, example_dir, max_steps):
217
  result = future.result()
218
  results.append(result)
219
  except Exception as exc:
 
220
  thread_safe_print(
221
- f" ✗ Run {run_index} for '{example_name}' generated an exception: {exc}"
222
  )
223
  results.append(
224
  {"status": "error", "run_index": run_index, "error": str(exc)}
@@ -347,15 +342,12 @@ def main():
347
 
348
  # Examples from the original code
349
  examples = {
350
- # "puppies": "Find me pictures of cute puppies",
351
- # "commute": "Check the commuting time between Bern and Zurich on Google maps",
352
- # "hello": "Write 'Hello World' in a text editor",
353
- # "wiki": "When was Temple Grandin introduced to the American Academy of Arts and Sciences, according to Wikipedia?",
354
- "quote": "Can you give me Bertrand Russel's 'Teapot analogy' as stated in his entry on Stanford Encyclopedia of Philosophy?",
355
- # "flight": "Search a flight from Rome to Berlin for May 3rd, 2025.",
356
- # "pond": "What's the name of the pond just south of Château de Fontainebleau in Google maps?",
357
- # "flux": "Go on the Hugging Face Hub, find a Space for FLUX1.dev, and generate a picture of the Golden Gate bridge.",
358
- # "hf": "Download me a picture of a puppy from Google, then head to Hugging Face, find a Space dedicated to background removal, and use it to remove the puppy picture's background",
359
  }
360
 
361
  # Create output directory if it doesn't exist
 
9
  from huggingface_hub import get_token
10
  from io import BytesIO
11
  from PIL import Image
12
+ from e2bqwen import QwenVLAPIModel, E2BVisionAgent, get_agent_summary_erase_images
13
 
14
  from dotenv import load_dotenv
15
 
 
78
  )
79
 
80
 
 
 
 
 
 
 
 
 
81
  def chat_message_to_json(obj):
82
  """Custom JSON serializer for ChatMessage and related objects"""
83
  if hasattr(obj, "__dict__"):
 
171
  )
172
  result = {"status": "failed", "run_dir": run_dir, "error": error_message}
173
  except Exception as e:
174
+ raise e
175
  error_message = f"Error setting up sandbox: {str(e)}"
176
  thread_safe_print(
177
  f" ✗ Example '{example_name}' run {run_index} failed: {error_message}"
 
188
 
189
  return result
190
 
191
+ import traceback
192
 
193
  def run_example(example_name, example_text, num_runs, example_dir, max_steps):
194
  """Run a single example multiple times using threads for each run"""
 
211
  result = future.result()
212
  results.append(result)
213
  except Exception as exc:
214
+ error_traceback = traceback.format_exc()
215
  thread_safe_print(
216
+ f" ✗ Run {run_index} for '{example_name}' generated an exception:\n{error_traceback}"
217
  )
218
  results.append(
219
  {"status": "error", "run_index": run_index, "error": str(exc)}
 
342
 
343
  # Examples from the original code
344
  examples = {
345
+ "puppies": "Find me pictures of cute puppies",
346
+ "gmaps": "Use Google Maps to find the Hugging Face HQ in Paris",
347
+ "wiki": "Go to Wikipedia and find what happend on April 4th",
348
+ "hello": "Write 'Hello World' in a text editor",
349
+ "commute": "Find out how long it takes to travel by train from Bern and Basel",
350
+ "hf_space": "Go to Hugging Face Spaces and then find the Space flux.1 schnell. Use the space to generate an image of a GPU",
 
 
 
351
  }
352
 
353
  # Create output directory if it doesn't exist