Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Add duration counter
Browse files- app.py +1 -1
- e2bqwen.py +9 -7
- eval.py +13 -8
app.py
CHANGED
@@ -627,7 +627,7 @@ _Please note that we store the task logs by default so **do not write any person
|
|
627 |
examples=[
|
628 |
"Check the commuting time between Bern and Zurich on Google maps",
|
629 |
"Write 'Hello World' in a text editor",
|
630 |
-
"
|
631 |
"Search a flight from Rome to Berlin for tomorrow on Skyscanner",
|
632 |
"What' s the name of the pond just south of Château de Fontainebleau in Google maps?",
|
633 |
"Go on the Hugging Face Hub, find the space for FLUX1.dev, then generate a picture of the Golden Gate bridge",
|
|
|
627 |
examples=[
|
628 |
"Check the commuting time between Bern and Zurich on Google maps",
|
629 |
"Write 'Hello World' in a text editor",
|
630 |
+
"Can you give me Bertrand Russel's 'Teapot analogy' as stated in his entry on Stanford Encyclopedia of Philosophy?",
|
631 |
"Search a flight from Rome to Berlin for tomorrow on Skyscanner",
|
632 |
"What' s the name of the pond just south of Château de Fontainebleau in Google maps?",
|
633 |
"Go on the Hugging Face Hub, find the space for FLUX1.dev, then generate a picture of the Golden Gate bridge",
|
e2bqwen.py
CHANGED
@@ -128,9 +128,10 @@ final_answer("Done")
|
|
128 |
|
129 |
<click_guidelines>
|
130 |
Look at elements on the screen to determine what to click or interact with.
|
131 |
-
Use precise coordinates based on the current screenshot for mouse movements and clicks.
|
132 |
-
Whenever you click, MAKE SURE to click in the middle of the button, text, link or any other clickable element. Not under, not on the side. IN THE MIDDLE
|
133 |
-
|
|
|
134 |
Sometimes you may have missed a click, so never assume that you're on the right page, always make sure that your previous action worked. In the screenshot you can see if the mouse is out of the clickable area. Pay special attention to this.
|
135 |
</click_guidelines>
|
136 |
|
@@ -142,7 +143,8 @@ On each step, look at the last screenshot and action to validate if previous ste
|
|
142 |
Use click to move through menus on the desktop and scroll for web and specific applications.
|
143 |
Always analyze the latest screenshot carefully before performing actions.
|
144 |
Desktop menus usually expand with more options, the tiny triangle next to some text in a menu means that menu expands. For example in Office in the Applications menu expands showing presentation or writing applications.
|
145 |
-
NEVER CLICK THE WEB BROWSER ICON TO OPEN THE WEB BROWSER: use open_url
|
|
|
146 |
</general_guidelines>
|
147 |
""".replace("<<current_date>>", datetime.now().strftime("%A, %d-%B-%Y"))
|
148 |
|
@@ -218,7 +220,7 @@ class E2BVisionAgent(CodeAgent):
|
|
218 |
self.step_callbacks.append(self.take_screenshot_callback)
|
219 |
|
220 |
def initialize_system_prompt(self) -> str:
|
221 |
-
if
|
222 |
return """You are a desktop automation assistant that can control a remote desktop environment.
|
223 |
You only have access to the following tools to interact with the desktop, no additional ones:
|
224 |
- click(x, y): Performs a left-click at the specified coordinates
|
@@ -415,7 +417,7 @@ REMEMBER TO ALWAYS CLICK IN THE MIDDLE OF THE TEXT, NOT ON THE SIDE, NOT UNDER.
|
|
415 |
return message
|
416 |
|
417 |
@tool
|
418 |
-
def scroll(x: int, y: int, direction: str = "down", amount: int =
|
419 |
"""
|
420 |
Moves the mouse to selected coordinates, then uses the scroll button: this could scroll the page or zoom, depending on the app. DO NOT use scroll to move through linux desktop menus.
|
421 |
Args:
|
@@ -461,7 +463,7 @@ REMEMBER TO ALWAYS CLICK IN THE MIDDLE OF THE TEXT, NOT ON THE SIDE, NOT UNDER.
|
|
461 |
@tool
|
462 |
def find_on_page_ctrl_f(search_string: str) -> str:
|
463 |
"""
|
464 |
-
Scroll the viewport to the first occurrence of the search string. This is equivalent to Ctrl+F.
|
465 |
Args:
|
466 |
search_string: The string to search for on the page.
|
467 |
"""
|
|
|
128 |
|
129 |
<click_guidelines>
|
130 |
Look at elements on the screen to determine what to click or interact with.
|
131 |
+
Use precise coordinates based on the current screenshot for mouse movements and clicks.
|
132 |
+
Whenever you click, MAKE SURE to click in the middle of the button, text, link or any other clickable element. Not under, not on the side. IN THE MIDDLE, else you risk to miss it.
|
133 |
+
In menus it is always better to click in the middle of the text rather than in the tiny icon. Calculate extremelly well the coordinates. A mistake here can make the full task fail.
|
134 |
+
The desktop has a resolution of <<resolution_x>>x<<resolution_y>> pixels: NEVER USE HYPOTHETIC OR ASSUMED COORDINATES, USE TRUE COORDINATES that you can see from the screenshot.
|
135 |
Sometimes you may have missed a click, so never assume that you're on the right page, always make sure that your previous action worked. In the screenshot you can see if the mouse is out of the clickable area. Pay special attention to this.
|
136 |
</click_guidelines>
|
137 |
|
|
|
143 |
Use click to move through menus on the desktop and scroll for web and specific applications.
|
144 |
Always analyze the latest screenshot carefully before performing actions.
|
145 |
Desktop menus usually expand with more options, the tiny triangle next to some text in a menu means that menu expands. For example in Office in the Applications menu expands showing presentation or writing applications.
|
146 |
+
NEVER CLICK THE WEB BROWSER ICON TO OPEN THE WEB BROWSER: use open_url directly.
|
147 |
+
In browser, ignore any sign in popups while they don't interfere with your usage of the browser.
|
148 |
</general_guidelines>
|
149 |
""".replace("<<current_date>>", datetime.now().strftime("%A, %d-%B-%Y"))
|
150 |
|
|
|
220 |
self.step_callbacks.append(self.take_screenshot_callback)
|
221 |
|
222 |
def initialize_system_prompt(self) -> str:
|
223 |
+
if True:
|
224 |
return """You are a desktop automation assistant that can control a remote desktop environment.
|
225 |
You only have access to the following tools to interact with the desktop, no additional ones:
|
226 |
- click(x, y): Performs a left-click at the specified coordinates
|
|
|
417 |
return message
|
418 |
|
419 |
@tool
|
420 |
+
def scroll(x: int, y: int, direction: str = "down", amount: int = 2) -> str:
|
421 |
"""
|
422 |
Moves the mouse to selected coordinates, then uses the scroll button: this could scroll the page or zoom, depending on the app. DO NOT use scroll to move through linux desktop menus.
|
423 |
Args:
|
|
|
463 |
@tool
|
464 |
def find_on_page_ctrl_f(search_string: str) -> str:
|
465 |
"""
|
466 |
+
Scroll the browser viewport to the first occurrence of the search string. This is equivalent to Ctrl+F. Use this to search on a pdf for instance.
|
467 |
Args:
|
468 |
search_string: The string to search for on the page.
|
469 |
"""
|
eval.py
CHANGED
@@ -234,6 +234,8 @@ def run_evaluation(examples, num_runs, output_dir, max_parallel, max_steps):
|
|
234 |
eval_dir = os.path.join(output_dir, f"eval_{timestamp}_{git_hash}")
|
235 |
os.makedirs(eval_dir, exist_ok=True)
|
236 |
|
|
|
|
|
237 |
thread_safe_print(f"Starting evaluation. Results will be saved to: {eval_dir}")
|
238 |
thread_safe_print(
|
239 |
f"Will run {len(examples)} examples, {num_runs} times each, with {max_parallel} parallel examples"
|
@@ -316,6 +318,8 @@ def run_evaluation(examples, num_runs, output_dir, max_parallel, max_steps):
|
|
316 |
success_rate = summary["example_success_rates"][example_name] * 100
|
317 |
thread_safe_print(f"Example '{example_name}': {success_rate:.1f}% success")
|
318 |
|
|
|
|
|
319 |
return eval_dir
|
320 |
|
321 |
|
@@ -343,14 +347,15 @@ def main():
|
|
343 |
|
344 |
# Examples from the original code
|
345 |
examples = {
|
346 |
-
"puppies": "Find me pictures of cute puppies",
|
347 |
-
"commute": "Check the commuting time between Bern and Zurich on Google maps",
|
348 |
-
"hello": "Write 'Hello World' in a text editor",
|
349 |
-
"wiki": "When was Temple Grandin introduced to the American Academy of Arts and Sciences, according to Wikipedia?",
|
350 |
-
"
|
351 |
-
"
|
352 |
-
"
|
353 |
-
"
|
|
|
354 |
}
|
355 |
|
356 |
# Create output directory if it doesn't exist
|
|
|
234 |
eval_dir = os.path.join(output_dir, f"eval_{timestamp}_{git_hash}")
|
235 |
os.makedirs(eval_dir, exist_ok=True)
|
236 |
|
237 |
+
start_time = datetime.now()
|
238 |
+
|
239 |
thread_safe_print(f"Starting evaluation. Results will be saved to: {eval_dir}")
|
240 |
thread_safe_print(
|
241 |
f"Will run {len(examples)} examples, {num_runs} times each, with {max_parallel} parallel examples"
|
|
|
318 |
success_rate = summary["example_success_rates"][example_name] * 100
|
319 |
thread_safe_print(f"Example '{example_name}': {success_rate:.1f}% success")
|
320 |
|
321 |
+
print("Total duration:", datetime.now() - start_time)
|
322 |
+
|
323 |
return eval_dir
|
324 |
|
325 |
|
|
|
347 |
|
348 |
# Examples from the original code
|
349 |
examples = {
|
350 |
+
# "puppies": "Find me pictures of cute puppies",
|
351 |
+
# "commute": "Check the commuting time between Bern and Zurich on Google maps",
|
352 |
+
# "hello": "Write 'Hello World' in a text editor",
|
353 |
+
# "wiki": "When was Temple Grandin introduced to the American Academy of Arts and Sciences, according to Wikipedia?",
|
354 |
+
"quote": "Can you give me Bertrand Russel's 'Teapot analogy' as stated in his entry on Stanford Encyclopedia of Philosophy?",
|
355 |
+
# "flight": "Search a flight from Rome to Berlin for May 3rd, 2025.",
|
356 |
+
# "pond": "What's the name of the pond just south of Château de Fontainebleau in Google maps?",
|
357 |
+
# "flux": "Go on the Hugging Face Hub, find a Space for FLUX1.dev, and generate a picture of the Golden Gate bridge.",
|
358 |
+
# "hf": "Download me a picture of a puppy from Google, then head to Hugging Face, find a Space dedicated to background removal, and use it to remove the puppy picture's background",
|
359 |
}
|
360 |
|
361 |
# Create output directory if it doesn't exist
|