m-ric HF Staff commited on
Commit
50a6a2d
·
verified ·
1 Parent(s): 6f39a1f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -64
app.py CHANGED
@@ -423,19 +423,9 @@ def update_terminal_from_session(session_hash):
423
  log_path = get_log_file_path(session_hash)
424
  return read_log_content(log_path)
425
 
426
-
427
- def run_agent_task(task_input, session_hash, request: gr.Request):
428
- interaction_id = generate_interaction_id(request)
429
- desktop = get_or_create_sandbox(session_hash)
430
-
431
- # Create data directory for this session
432
- data_dir = os.path.join(TMP_DIR, interaction_id)
433
- if not os.path.exists(data_dir):
434
- os.makedirs(data_dir)
435
-
436
- log_file = get_log_file_path(session_hash)
437
- # Create the agent
438
- agent = E2BVisionAgent(
439
  model=model,
440
  data_dir=data_dir,
441
  desktop=desktop,
@@ -444,38 +434,60 @@ def run_agent_task(task_input, session_hash, request: gr.Request):
444
  planning_interval=5,
445
  log_file = log_file
446
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
447
 
448
- # Construct the full task with instructions
449
- full_task = task_input + dedent(f"""
450
- The desktop has a resolution of {WIDTH}x{HEIGHT}, take it into account to decide clicking coordinates.
451
- When clicking an element, always make sure to click THE MIDDLE of that element! Else you risk to miss it.
452
-
453
- Always analyze the latest screenshot carefully before performing actions. Make sure to:
454
- 1. Look at elements on the screen to determine what to click or interact with
455
- 2. Use precise coordinates for mouse movements and clicks
456
- 3. Wait for page loads or animations to complete using the wait() tool
457
- 4. Sometimes you may have missed a click, so never assume that you're on the right page, always make sure that your previous action worked In the screenshot you can see if the mouse is out of the clickable area. Pay special attention to this.
458
-
459
- When you receive a task, break it down into step-by-step actions. On each step, look at the current screenshot to validate if previous steps worked and decide the next action.
460
- We can only execute one action at a time. On each step, answer only a python blob with the action to perform
461
- """)
462
-
463
- try:
464
- # Run the agent
465
- result = agent.run(full_task)
466
- save_final_status(data_dir, "completed", details = agent.memory.get_succinct_steps())
467
- return f"Task completed: {result}", gr.update(visible=True), gr.update(visible=False)
468
-
469
- except Exception as e:
470
- error_message = f"Error running agent: {str(e)} Details {traceback.format_exc()}"
471
- save_final_status(data_dir, "failed", details = error_message)
472
- print(error_message)
473
- error_result = "Error running agent - Model inference endpoints not ready. Try again later." if 'Both endpoints failed' in error_message else "Error running agent"
474
- return error_result, gr.update(visible=True), gr.update(visible=False)
475
 
476
- finally:
477
- upload_to_hf_and_remove(data_dir)
 
478
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
479
 
480
 
481
  # Create a Gradio app with Blocks
@@ -508,18 +520,23 @@ with gr.Blocks(css=custom_css, js=custom_js) as demo:
508
  examples_per_page=4
509
  )
510
 
511
- with gr.Group(visible=True) as terminal_container:
512
- terminal = gr.Textbox(
513
- value="Initializing...",
514
- label='Console',
515
- lines=5,
516
- max_lines=10,
517
- interactive=False
518
- )
 
 
519
 
520
  # Hidden refresh button
521
- refresh_btn = gr.Button("Refresh", visible=False, elem_id="refresh-log-btn")
522
-
 
 
 
523
  with gr.Group(visible=False) as results_container:
524
  results_output = gr.Textbox(
525
  label="Results",
@@ -528,7 +545,18 @@ with gr.Blocks(css=custom_css, js=custom_js) as demo:
528
  )
529
 
530
  update_btn = gr.Button("Let's go!")
531
-
 
 
 
 
 
 
 
 
 
 
 
532
 
533
  def read_log_content(log_file, tail=4):
534
  """Read the contents of a log file for a specific session"""
@@ -572,17 +600,20 @@ with gr.Blocks(css=custom_css, js=custom_js) as demo:
572
  fn=clear_and_set_view_only,
573
  inputs=[task_input],
574
  outputs=[results_output, html_output, results_container, terminal_container]
575
- )
576
-
577
- # 2. Then run the agent task and update visibility
578
- task_result = view_only_event.then(
579
- fn=run_agent_task,
580
- inputs=[task_input,session_hash_state],
581
- outputs=[results_output, results_container, terminal_container]
582
- )
583
-
584
- # 3. Set interactive mode when task completes successfully
585
- task_result.then(
 
 
 
586
  fn=check_and_set_interactive,
587
  inputs=[results_output],
588
  outputs=html_output
 
423
  log_path = get_log_file_path(session_hash)
424
  return read_log_content(log_path)
425
 
426
+
427
+ def create_agent():
428
+ return E2BVisionAgent(
 
 
 
 
 
 
 
 
 
 
429
  model=model,
430
  data_dir=data_dir,
431
  desktop=desktop,
 
434
  planning_interval=5,
435
  log_file = log_file
436
  )
437
+
438
+ class EnrichedGradioUI(GradioUI):
439
+ def interact_with_agent(self, task_input, messages, session_state, session_hash):
440
+ import gradio as gr
441
+
442
+ interaction_id = generate_interaction_id(request)
443
+ desktop = get_or_create_sandbox(session_hash)
444
+
445
+ # Create data directory for this session
446
+ data_dir = os.path.join(TMP_DIR, interaction_id)
447
+ if not os.path.exists(data_dir):
448
+ os.makedirs(data_dir)
449
+
450
+ log_file = get_log_file_path(session_hash)
451
+
452
+ # Construct the full task with instructions
453
+ full_task = task_input + dedent(f"""
454
+ The desktop has a resolution of {WIDTH}x{HEIGHT}, take it into account to decide clicking coordinates.
455
+ When clicking an element, always make sure to click THE MIDDLE of that element! Else you risk to miss it.
456
 
457
+ Always analyze the latest screenshot carefully before performing actions. Make sure to:
458
+ 1. Look at elements on the screen to determine what to click or interact with
459
+ 2. Use precise coordinates for mouse movements and clicks
460
+ 3. Wait for page loads or animations to complete using the wait() tool
461
+ 4. Sometimes you may have missed a click, so never assume that you're on the right page, always make sure that your previous action worked In the screenshot you can see if the mouse is out of the clickable area. Pay special attention to this.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
 
463
+ When you receive a task, break it down into step-by-step actions. On each step, look at the current screenshot to validate if previous steps worked and decide the next action.
464
+ We can only execute one action at a time. On each step, answer only a python blob with the action to perform
465
+ """)
466
 
467
+ # Get the agent type from the template agent
468
+ if "agent" not in session_state:
469
+ session_state["agent"] = create_agent()
470
+
471
+ try:
472
+ messages.append(gr.ChatMessage(role="user", content=prompt))
473
+ yield messages
474
+
475
+ for msg in stream_to_gradio(session_state["agent"], task=full_task, reset_agent_memory=False):
476
+ messages.append(msg)
477
+ yield messages
478
+
479
+ yield messages
480
+ save_final_status(data_dir, "completed", details = agent.memory.get_succinct_steps())
481
+ except Exception as e:
482
+ error_message=f"Error in interaction: {str(e)}"
483
+ messages.append(gr.ChatMessage(role="assistant", content=error_message))
484
+ yield messages
485
+ save_final_status(data_dir, "failed", details = error_message)
486
+ error_result = "Error running agent - Model inference endpoints not ready. Try again later." if 'Both endpoints failed' in error_message else "Error running agent"
487
+ yield gr.ChatMessage(role="assistant", content=error_result)
488
+
489
+ finally:
490
+ upload_to_hf_and_remove(data_dir)
491
 
492
 
493
  # Create a Gradio app with Blocks
 
520
  examples_per_page=4
521
  )
522
 
523
+ # with gr.Group(visible=True) as terminal_container:
524
+
525
+ #terminal = gr.Textbox(
526
+ # value="Initializing...",
527
+ # label='Console',
528
+ # lines=5,
529
+ # max_lines=10,
530
+ # interactive=False
531
+ #)
532
+
533
 
534
  # Hidden refresh button
535
+ refresh_btn = gr.Button("Refresh", visible=False, elem_id="refresh-log-btn")
536
+
537
+ session_state = gr.State({})
538
+ stored_messages = gr.State([])
539
+
540
  with gr.Group(visible=False) as results_container:
541
  results_output = gr.Textbox(
542
  label="Results",
 
545
  )
546
 
547
  update_btn = gr.Button("Let's go!")
548
+
549
+ chatbot = gr.Chatbot(
550
+ label="Agent",
551
+ type="messages",
552
+ avatar_images=(
553
+ None,
554
+ "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/mascot_smol.png",
555
+ ),
556
+ resizeable=True,
557
+ scale=1,
558
+ )
559
+ agent_ui = GradioUI(session_state["agent"])
560
 
561
  def read_log_content(log_file, tail=4):
562
  """Read the contents of a log file for a specific session"""
 
600
  fn=clear_and_set_view_only,
601
  inputs=[task_input],
602
  outputs=[results_output, html_output, results_container, terminal_container]
603
+ ).then(
604
+ agent_ui.log_user_message,
605
+ [task_input, task_input],
606
+ [stored_messages, text_input, submit_btn],
607
+ ).then(agent_ui.interact_with_agent, [stored_messages, chatbot, session_state, session_hash_state], [chatbot]).then(
608
+ lambda: (
609
+ gr.Textbox(
610
+ interactive=True, placeholder="Enter your prompt here and press Shift+Enter or the button"
611
+ ),
612
+ gr.Button(interactive=True),
613
+ ),
614
+ None,
615
+ [text_input, submit_btn],
616
+ ).then(
617
  fn=check_and_set_interactive,
618
  inputs=[results_output],
619
  outputs=html_output