Spaces:

Backup-bdg
/

OpenHands

Build error

App Files Files Community

OpenHands / tests /runtime /test_browsing.py

Backup-bdg

Upload 964 files

51ff9e5 verified 7 days ago

raw

history blame

8.75 kB

	"""Browsing-related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""

	import os

	import pytest
	from conftest import _close_test_runtime, _load_runtime

	from openhands.core.logger import openhands_logger as logger
	from openhands.events.action import (
	BrowseInteractiveAction,
	BrowseURLAction,
	CmdRunAction,
	)
	from openhands.events.observation import (
	BrowserOutputObservation,
	CmdOutputObservation,
	)

	# ============================================================================================================================
	# Browsing tests, without evaluation (poetry install --without evaluation)
	# For eval environments, tests need to run with poetry install
	# ============================================================================================================================


	@pytest.mark.skipif(
	os.environ.get('TEST_RUNTIME') == 'cli',
	reason='CLIRuntime does not support browsing actions',
	)
	def test_simple_browse(temp_dir, runtime_cls, run_as_openhands):
	runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)

	# Test browse
	action_cmd = CmdRunAction(command='python3 -m http.server 8000 > server.log 2>&1 &')
	logger.info(action_cmd, extra={'msg_type': 'ACTION'})
	obs = runtime.run_action(action_cmd)
	logger.info(obs, extra={'msg_type': 'OBSERVATION'})

	assert isinstance(obs, CmdOutputObservation)
	assert obs.exit_code == 0
	assert '[1]' in obs.content

	action_cmd = CmdRunAction(command='sleep 3 && cat server.log')
	logger.info(action_cmd, extra={'msg_type': 'ACTION'})
	obs = runtime.run_action(action_cmd)
	logger.info(obs, extra={'msg_type': 'OBSERVATION'})
	assert obs.exit_code == 0

	action_browse = BrowseURLAction(url='http://localhost:8000')
	logger.info(action_browse, extra={'msg_type': 'ACTION'})
	obs = runtime.run_action(action_browse)
	logger.info(obs, extra={'msg_type': 'OBSERVATION'})

	assert isinstance(obs, BrowserOutputObservation)
	assert 'http://localhost:8000' in obs.url
	assert not obs.error
	assert obs.open_pages_urls == ['http://localhost:8000/']
	assert obs.active_page_index == 0
	assert obs.last_browser_action == 'goto("http://localhost:8000")'
	assert obs.last_browser_action_error == ''
	assert 'Directory listing for /' in obs.content
	assert 'server.log' in obs.content

	# clean up
	action = CmdRunAction(command='rm -rf server.log')
	logger.info(action, extra={'msg_type': 'ACTION'})
	obs = runtime.run_action(action)
	logger.info(obs, extra={'msg_type': 'OBSERVATION'})
	assert obs.exit_code == 0

	_close_test_runtime(runtime)


	@pytest.mark.skipif(
	os.environ.get('TEST_RUNTIME') == 'cli',
	reason='CLIRuntime does not support browsing actions',
	)
	def test_read_pdf_browse(temp_dir, runtime_cls, run_as_openhands):
	runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
	try:
	# Create a PDF file using reportlab in the host environment
	from reportlab.lib.pagesizes import letter
	from reportlab.pdfgen import canvas

	pdf_path = os.path.join(temp_dir, 'test_document.pdf')
	pdf_content = 'This is test content for PDF reading test'

	c = canvas.Canvas(pdf_path, pagesize=letter)
	# Add more content to make the PDF more robust
	c.drawString(100, 750, pdf_content)
	c.drawString(100, 700, 'Additional line for PDF structure')
	c.drawString(100, 650, 'Third line to ensure valid PDF')
	# Explicitly set PDF version and ensure proper structure
	c.setPageCompression(0) # Disable compression for simpler structure
	c.save()

	# Copy the PDF to the sandbox
	sandbox_dir = config.workspace_mount_path_in_sandbox
	runtime.copy_to(pdf_path, sandbox_dir)

	# Start HTTP server
	action_cmd = CmdRunAction(command='ls -alh')
	logger.info(action_cmd, extra={'msg_type': 'ACTION'})
	obs = runtime.run_action(action_cmd)
	logger.info(obs, extra={'msg_type': 'OBSERVATION'})
	assert isinstance(obs, CmdOutputObservation)
	assert obs.exit_code == 0
	assert 'test_document.pdf' in obs.content

	# Get server url
	action_cmd = CmdRunAction(command='cat /tmp/oh-server-url')
	logger.info(action_cmd, extra={'msg_type': 'ACTION'})
	obs = runtime.run_action(action_cmd)
	logger.info(obs, extra={'msg_type': 'OBSERVATION'})
	assert obs.exit_code == 0
	server_url = obs.content.strip()

	# Browse to the PDF file
	pdf_url = f'{server_url}/view?path=/workspace/test_document.pdf'
	action_browse = BrowseInteractiveAction(browser_actions=f'goto("{pdf_url}")')
	logger.info(action_browse, extra={'msg_type': 'ACTION'})
	obs = runtime.run_action(action_browse)
	logger.info(obs, extra={'msg_type': 'OBSERVATION'})

	# Verify the browser observation
	assert isinstance(obs, BrowserOutputObservation)
	observation_text = str(obs)
	assert '[Action executed successfully.]' in observation_text
	assert 'Canvas' in observation_text
	assert (
	'Screenshot saved to: /workspace/.browser_screenshots/screenshot_'
	in observation_text
	)

	# Check the /workspace/.browser_screenshots folder
	action_cmd = CmdRunAction(command='ls /workspace/.browser_screenshots')
	logger.info(action_cmd, extra={'msg_type': 'ACTION'})
	obs = runtime.run_action(action_cmd)
	logger.info(obs, extra={'msg_type': 'OBSERVATION'})
	assert isinstance(obs, CmdOutputObservation)
	assert obs.exit_code == 0
	assert 'screenshot_' in obs.content
	assert '.png' in obs.content
	finally:
	_close_test_runtime(runtime)


	@pytest.mark.skipif(
	os.environ.get('TEST_RUNTIME') == 'cli',
	reason='CLIRuntime does not support browsing actions',
	)
	def test_read_png_browse(temp_dir, runtime_cls, run_as_openhands):
	runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
	try:
	# Create a PNG file using PIL in the host environment
	from PIL import Image, ImageDraw

	png_path = os.path.join(temp_dir, 'test_image.png')
	# Create a simple image with text
	img = Image.new('RGB', (400, 200), color=(255, 255, 255))
	d = ImageDraw.Draw(img)
	text = 'This is a test PNG image'
	d.text((20, 80), text, fill=(0, 0, 0))
	img.save(png_path)

	# Copy the PNG to the sandbox
	sandbox_dir = config.workspace_mount_path_in_sandbox
	runtime.copy_to(png_path, sandbox_dir)

	# Verify the file exists in the sandbox
	action_cmd = CmdRunAction(command='ls -alh')
	logger.info(action_cmd, extra={'msg_type': 'ACTION'})
	obs = runtime.run_action(action_cmd)
	logger.info(obs, extra={'msg_type': 'OBSERVATION'})
	assert isinstance(obs, CmdOutputObservation)
	assert obs.exit_code == 0
	assert 'test_image.png' in obs.content

	# Get server url
	action_cmd = CmdRunAction(command='cat /tmp/oh-server-url')
	logger.info(action_cmd, extra={'msg_type': 'ACTION'})
	obs = runtime.run_action(action_cmd)
	logger.info(obs, extra={'msg_type': 'OBSERVATION'})
	assert obs.exit_code == 0
	server_url = obs.content.strip()

	# Browse to the PNG file
	png_url = f'{server_url}/view?path=/workspace/test_image.png'
	action_browse = BrowseInteractiveAction(browser_actions=f'goto("{png_url}")')
	logger.info(action_browse, extra={'msg_type': 'ACTION'})
	obs = runtime.run_action(action_browse)
	logger.info(obs, extra={'msg_type': 'OBSERVATION'})

	# Verify the browser observation
	assert isinstance(obs, BrowserOutputObservation)
	observation_text = str(obs)
	assert '[Action executed successfully.]' in observation_text
	assert 'File Viewer - test_image.png' in observation_text
	assert (
	'Screenshot saved to: /workspace/.browser_screenshots/screenshot_'
	in observation_text
	)

	# Check the /workspace/.browser_screenshots folder
	action_cmd = CmdRunAction(command='ls /workspace/.browser_screenshots')
	logger.info(action_cmd, extra={'msg_type': 'ACTION'})
	obs = runtime.run_action(action_cmd)
	logger.info(obs, extra={'msg_type': 'OBSERVATION'})
	assert isinstance(obs, CmdOutputObservation)
	assert obs.exit_code == 0
	assert 'screenshot_' in obs.content
	assert '.png' in obs.content
	finally:
	_close_test_runtime(runtime)