Spaces:
Build error
Build error
File size: 8,749 Bytes
51ff9e5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 |
"""Browsing-related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
import os
import pytest
from conftest import _close_test_runtime, _load_runtime
from openhands.core.logger import openhands_logger as logger
from openhands.events.action import (
BrowseInteractiveAction,
BrowseURLAction,
CmdRunAction,
)
from openhands.events.observation import (
BrowserOutputObservation,
CmdOutputObservation,
)
# ============================================================================================================================
# Browsing tests, without evaluation (poetry install --without evaluation)
# For eval environments, tests need to run with poetry install
# ============================================================================================================================
@pytest.mark.skipif(
os.environ.get('TEST_RUNTIME') == 'cli',
reason='CLIRuntime does not support browsing actions',
)
def test_simple_browse(temp_dir, runtime_cls, run_as_openhands):
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
# Test browse
action_cmd = CmdRunAction(command='python3 -m http.server 8000 > server.log 2>&1 &')
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action_cmd)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
assert '[1]' in obs.content
action_cmd = CmdRunAction(command='sleep 3 && cat server.log')
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action_cmd)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0
action_browse = BrowseURLAction(url='http://localhost:8000')
logger.info(action_browse, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action_browse)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, BrowserOutputObservation)
assert 'http://localhost:8000' in obs.url
assert not obs.error
assert obs.open_pages_urls == ['http://localhost:8000/']
assert obs.active_page_index == 0
assert obs.last_browser_action == 'goto("http://localhost:8000")'
assert obs.last_browser_action_error == ''
assert 'Directory listing for /' in obs.content
assert 'server.log' in obs.content
# clean up
action = CmdRunAction(command='rm -rf server.log')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0
_close_test_runtime(runtime)
@pytest.mark.skipif(
os.environ.get('TEST_RUNTIME') == 'cli',
reason='CLIRuntime does not support browsing actions',
)
def test_read_pdf_browse(temp_dir, runtime_cls, run_as_openhands):
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
# Create a PDF file using reportlab in the host environment
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
pdf_path = os.path.join(temp_dir, 'test_document.pdf')
pdf_content = 'This is test content for PDF reading test'
c = canvas.Canvas(pdf_path, pagesize=letter)
# Add more content to make the PDF more robust
c.drawString(100, 750, pdf_content)
c.drawString(100, 700, 'Additional line for PDF structure')
c.drawString(100, 650, 'Third line to ensure valid PDF')
# Explicitly set PDF version and ensure proper structure
c.setPageCompression(0) # Disable compression for simpler structure
c.save()
# Copy the PDF to the sandbox
sandbox_dir = config.workspace_mount_path_in_sandbox
runtime.copy_to(pdf_path, sandbox_dir)
# Start HTTP server
action_cmd = CmdRunAction(command='ls -alh')
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action_cmd)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
assert 'test_document.pdf' in obs.content
# Get server url
action_cmd = CmdRunAction(command='cat /tmp/oh-server-url')
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action_cmd)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0
server_url = obs.content.strip()
# Browse to the PDF file
pdf_url = f'{server_url}/view?path=/workspace/test_document.pdf'
action_browse = BrowseInteractiveAction(browser_actions=f'goto("{pdf_url}")')
logger.info(action_browse, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action_browse)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
# Verify the browser observation
assert isinstance(obs, BrowserOutputObservation)
observation_text = str(obs)
assert '[Action executed successfully.]' in observation_text
assert 'Canvas' in observation_text
assert (
'Screenshot saved to: /workspace/.browser_screenshots/screenshot_'
in observation_text
)
# Check the /workspace/.browser_screenshots folder
action_cmd = CmdRunAction(command='ls /workspace/.browser_screenshots')
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action_cmd)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
assert 'screenshot_' in obs.content
assert '.png' in obs.content
finally:
_close_test_runtime(runtime)
@pytest.mark.skipif(
os.environ.get('TEST_RUNTIME') == 'cli',
reason='CLIRuntime does not support browsing actions',
)
def test_read_png_browse(temp_dir, runtime_cls, run_as_openhands):
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
try:
# Create a PNG file using PIL in the host environment
from PIL import Image, ImageDraw
png_path = os.path.join(temp_dir, 'test_image.png')
# Create a simple image with text
img = Image.new('RGB', (400, 200), color=(255, 255, 255))
d = ImageDraw.Draw(img)
text = 'This is a test PNG image'
d.text((20, 80), text, fill=(0, 0, 0))
img.save(png_path)
# Copy the PNG to the sandbox
sandbox_dir = config.workspace_mount_path_in_sandbox
runtime.copy_to(png_path, sandbox_dir)
# Verify the file exists in the sandbox
action_cmd = CmdRunAction(command='ls -alh')
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action_cmd)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
assert 'test_image.png' in obs.content
# Get server url
action_cmd = CmdRunAction(command='cat /tmp/oh-server-url')
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action_cmd)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0
server_url = obs.content.strip()
# Browse to the PNG file
png_url = f'{server_url}/view?path=/workspace/test_image.png'
action_browse = BrowseInteractiveAction(browser_actions=f'goto("{png_url}")')
logger.info(action_browse, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action_browse)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
# Verify the browser observation
assert isinstance(obs, BrowserOutputObservation)
observation_text = str(obs)
assert '[Action executed successfully.]' in observation_text
assert 'File Viewer - test_image.png' in observation_text
assert (
'Screenshot saved to: /workspace/.browser_screenshots/screenshot_'
in observation_text
)
# Check the /workspace/.browser_screenshots folder
action_cmd = CmdRunAction(command='ls /workspace/.browser_screenshots')
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action_cmd)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
assert 'screenshot_' in obs.content
assert '.png' in obs.content
finally:
_close_test_runtime(runtime)
|