Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update app.py
Browse files
app.py
CHANGED
@@ -423,19 +423,9 @@ def update_terminal_from_session(session_hash):
|
|
423 |
log_path = get_log_file_path(session_hash)
|
424 |
return read_log_content(log_path)
|
425 |
|
426 |
-
|
427 |
-
def
|
428 |
-
|
429 |
-
desktop = get_or_create_sandbox(session_hash)
|
430 |
-
|
431 |
-
# Create data directory for this session
|
432 |
-
data_dir = os.path.join(TMP_DIR, interaction_id)
|
433 |
-
if not os.path.exists(data_dir):
|
434 |
-
os.makedirs(data_dir)
|
435 |
-
|
436 |
-
log_file = get_log_file_path(session_hash)
|
437 |
-
# Create the agent
|
438 |
-
agent = E2BVisionAgent(
|
439 |
model=model,
|
440 |
data_dir=data_dir,
|
441 |
desktop=desktop,
|
@@ -444,38 +434,60 @@ def run_agent_task(task_input, session_hash, request: gr.Request):
|
|
444 |
planning_interval=5,
|
445 |
log_file = log_file
|
446 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
447 |
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
Always analyze the latest screenshot carefully before performing actions. Make sure to:
|
454 |
-
1. Look at elements on the screen to determine what to click or interact with
|
455 |
-
2. Use precise coordinates for mouse movements and clicks
|
456 |
-
3. Wait for page loads or animations to complete using the wait() tool
|
457 |
-
4. Sometimes you may have missed a click, so never assume that you're on the right page, always make sure that your previous action worked In the screenshot you can see if the mouse is out of the clickable area. Pay special attention to this.
|
458 |
-
|
459 |
-
When you receive a task, break it down into step-by-step actions. On each step, look at the current screenshot to validate if previous steps worked and decide the next action.
|
460 |
-
We can only execute one action at a time. On each step, answer only a python blob with the action to perform
|
461 |
-
""")
|
462 |
-
|
463 |
-
try:
|
464 |
-
# Run the agent
|
465 |
-
result = agent.run(full_task)
|
466 |
-
save_final_status(data_dir, "completed", details = agent.memory.get_succinct_steps())
|
467 |
-
return f"Task completed: {result}", gr.update(visible=True), gr.update(visible=False)
|
468 |
-
|
469 |
-
except Exception as e:
|
470 |
-
error_message = f"Error running agent: {str(e)} Details {traceback.format_exc()}"
|
471 |
-
save_final_status(data_dir, "failed", details = error_message)
|
472 |
-
print(error_message)
|
473 |
-
error_result = "Error running agent - Model inference endpoints not ready. Try again later." if 'Both endpoints failed' in error_message else "Error running agent"
|
474 |
-
return error_result, gr.update(visible=True), gr.update(visible=False)
|
475 |
|
476 |
-
|
477 |
-
|
|
|
478 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
479 |
|
480 |
|
481 |
# Create a Gradio app with Blocks
|
@@ -508,18 +520,23 @@ with gr.Blocks(css=custom_css, js=custom_js) as demo:
|
|
508 |
examples_per_page=4
|
509 |
)
|
510 |
|
511 |
-
with gr.Group(visible=True) as terminal_container:
|
512 |
-
|
513 |
-
|
514 |
-
|
515 |
-
|
516 |
-
|
517 |
-
|
518 |
-
|
|
|
|
|
519 |
|
520 |
# Hidden refresh button
|
521 |
-
|
522 |
-
|
|
|
|
|
|
|
523 |
with gr.Group(visible=False) as results_container:
|
524 |
results_output = gr.Textbox(
|
525 |
label="Results",
|
@@ -528,7 +545,18 @@ with gr.Blocks(css=custom_css, js=custom_js) as demo:
|
|
528 |
)
|
529 |
|
530 |
update_btn = gr.Button("Let's go!")
|
531 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
532 |
|
533 |
def read_log_content(log_file, tail=4):
|
534 |
"""Read the contents of a log file for a specific session"""
|
@@ -572,17 +600,20 @@ with gr.Blocks(css=custom_css, js=custom_js) as demo:
|
|
572 |
fn=clear_and_set_view_only,
|
573 |
inputs=[task_input],
|
574 |
outputs=[results_output, html_output, results_container, terminal_container]
|
575 |
-
)
|
576 |
-
|
577 |
-
|
578 |
-
|
579 |
-
|
580 |
-
|
581 |
-
|
582 |
-
|
583 |
-
|
584 |
-
|
585 |
-
|
|
|
|
|
|
|
586 |
fn=check_and_set_interactive,
|
587 |
inputs=[results_output],
|
588 |
outputs=html_output
|
|
|
423 |
log_path = get_log_file_path(session_hash)
|
424 |
return read_log_content(log_path)
|
425 |
|
426 |
+
|
427 |
+
def create_agent():
|
428 |
+
return E2BVisionAgent(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
429 |
model=model,
|
430 |
data_dir=data_dir,
|
431 |
desktop=desktop,
|
|
|
434 |
planning_interval=5,
|
435 |
log_file = log_file
|
436 |
)
|
437 |
+
|
438 |
+
class EnrichedGradioUI(GradioUI):
|
439 |
+
def interact_with_agent(self, task_input, messages, session_state, session_hash):
|
440 |
+
import gradio as gr
|
441 |
+
|
442 |
+
interaction_id = generate_interaction_id(request)
|
443 |
+
desktop = get_or_create_sandbox(session_hash)
|
444 |
+
|
445 |
+
# Create data directory for this session
|
446 |
+
data_dir = os.path.join(TMP_DIR, interaction_id)
|
447 |
+
if not os.path.exists(data_dir):
|
448 |
+
os.makedirs(data_dir)
|
449 |
+
|
450 |
+
log_file = get_log_file_path(session_hash)
|
451 |
+
|
452 |
+
# Construct the full task with instructions
|
453 |
+
full_task = task_input + dedent(f"""
|
454 |
+
The desktop has a resolution of {WIDTH}x{HEIGHT}, take it into account to decide clicking coordinates.
|
455 |
+
When clicking an element, always make sure to click THE MIDDLE of that element! Else you risk to miss it.
|
456 |
|
457 |
+
Always analyze the latest screenshot carefully before performing actions. Make sure to:
|
458 |
+
1. Look at elements on the screen to determine what to click or interact with
|
459 |
+
2. Use precise coordinates for mouse movements and clicks
|
460 |
+
3. Wait for page loads or animations to complete using the wait() tool
|
461 |
+
4. Sometimes you may have missed a click, so never assume that you're on the right page, always make sure that your previous action worked In the screenshot you can see if the mouse is out of the clickable area. Pay special attention to this.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
462 |
|
463 |
+
When you receive a task, break it down into step-by-step actions. On each step, look at the current screenshot to validate if previous steps worked and decide the next action.
|
464 |
+
We can only execute one action at a time. On each step, answer only a python blob with the action to perform
|
465 |
+
""")
|
466 |
|
467 |
+
# Get the agent type from the template agent
|
468 |
+
if "agent" not in session_state:
|
469 |
+
session_state["agent"] = create_agent()
|
470 |
+
|
471 |
+
try:
|
472 |
+
messages.append(gr.ChatMessage(role="user", content=prompt))
|
473 |
+
yield messages
|
474 |
+
|
475 |
+
for msg in stream_to_gradio(session_state["agent"], task=full_task, reset_agent_memory=False):
|
476 |
+
messages.append(msg)
|
477 |
+
yield messages
|
478 |
+
|
479 |
+
yield messages
|
480 |
+
save_final_status(data_dir, "completed", details = agent.memory.get_succinct_steps())
|
481 |
+
except Exception as e:
|
482 |
+
error_message=f"Error in interaction: {str(e)}"
|
483 |
+
messages.append(gr.ChatMessage(role="assistant", content=error_message))
|
484 |
+
yield messages
|
485 |
+
save_final_status(data_dir, "failed", details = error_message)
|
486 |
+
error_result = "Error running agent - Model inference endpoints not ready. Try again later." if 'Both endpoints failed' in error_message else "Error running agent"
|
487 |
+
yield gr.ChatMessage(role="assistant", content=error_result)
|
488 |
+
|
489 |
+
finally:
|
490 |
+
upload_to_hf_and_remove(data_dir)
|
491 |
|
492 |
|
493 |
# Create a Gradio app with Blocks
|
|
|
520 |
examples_per_page=4
|
521 |
)
|
522 |
|
523 |
+
# with gr.Group(visible=True) as terminal_container:
|
524 |
+
|
525 |
+
#terminal = gr.Textbox(
|
526 |
+
# value="Initializing...",
|
527 |
+
# label='Console',
|
528 |
+
# lines=5,
|
529 |
+
# max_lines=10,
|
530 |
+
# interactive=False
|
531 |
+
#)
|
532 |
+
|
533 |
|
534 |
# Hidden refresh button
|
535 |
+
refresh_btn = gr.Button("Refresh", visible=False, elem_id="refresh-log-btn")
|
536 |
+
|
537 |
+
session_state = gr.State({})
|
538 |
+
stored_messages = gr.State([])
|
539 |
+
|
540 |
with gr.Group(visible=False) as results_container:
|
541 |
results_output = gr.Textbox(
|
542 |
label="Results",
|
|
|
545 |
)
|
546 |
|
547 |
update_btn = gr.Button("Let's go!")
|
548 |
+
|
549 |
+
chatbot = gr.Chatbot(
|
550 |
+
label="Agent",
|
551 |
+
type="messages",
|
552 |
+
avatar_images=(
|
553 |
+
None,
|
554 |
+
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/mascot_smol.png",
|
555 |
+
),
|
556 |
+
resizeable=True,
|
557 |
+
scale=1,
|
558 |
+
)
|
559 |
+
agent_ui = GradioUI(session_state["agent"])
|
560 |
|
561 |
def read_log_content(log_file, tail=4):
|
562 |
"""Read the contents of a log file for a specific session"""
|
|
|
600 |
fn=clear_and_set_view_only,
|
601 |
inputs=[task_input],
|
602 |
outputs=[results_output, html_output, results_container, terminal_container]
|
603 |
+
).then(
|
604 |
+
agent_ui.log_user_message,
|
605 |
+
[task_input, task_input],
|
606 |
+
[stored_messages, text_input, submit_btn],
|
607 |
+
).then(agent_ui.interact_with_agent, [stored_messages, chatbot, session_state, session_hash_state], [chatbot]).then(
|
608 |
+
lambda: (
|
609 |
+
gr.Textbox(
|
610 |
+
interactive=True, placeholder="Enter your prompt here and press Shift+Enter or the button"
|
611 |
+
),
|
612 |
+
gr.Button(interactive=True),
|
613 |
+
),
|
614 |
+
None,
|
615 |
+
[text_input, submit_btn],
|
616 |
+
).then(
|
617 |
fn=check_and_set_interactive,
|
618 |
inputs=[results_output],
|
619 |
outputs=html_output
|