import os import tempfile from evaluation.integration_tests.tests.base import BaseIntegrationTest, TestResult from evaluation.utils.shared import assert_and_raise from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction from openhands.events.event import Event from openhands.events.observation import AgentDelegateObservation from openhands.runtime.base import Runtime HTML_FILE = """ The Ultimate Answer

The Ultimate Answer

Click the button to reveal the answer to life, the universe, and everything.

""" class Test(BaseIntegrationTest): INSTRUCTION = 'Browse localhost:8000, and tell me the ultimate answer to life.' @classmethod def initialize_runtime(cls, runtime: Runtime) -> None: action = CmdRunAction(command='mkdir -p /workspace') obs = runtime.run_action(action) assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}') action = CmdRunAction(command='mkdir -p /tmp/server') obs = runtime.run_action(action) assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}') # create a file with a typo in /workspace/bad.txt with tempfile.TemporaryDirectory() as temp_dir: temp_file_path = os.path.join(temp_dir, 'index.html') with open(temp_file_path, 'w') as f: f.write(HTML_FILE) # Copy the file to the desired location runtime.copy_to(temp_file_path, '/tmp/server') # create README.md action = CmdRunAction( command='cd /tmp/server && nohup python3 -m http.server 8000 &' ) obs = runtime.run_action(action) @classmethod def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult: from openhands.core.logger import openhands_logger as logger # check if the "The answer is OpenHands is all you need!" is in any message message_actions = [ event for event in histories if isinstance( event, (MessageAction, AgentFinishAction, AgentDelegateObservation) ) ] logger.debug(f'Total message-like events: {len(message_actions)}') for event in message_actions: try: if isinstance(event, AgentDelegateObservation): content = event.content elif isinstance(event, AgentFinishAction): content = event.outputs.get('content', '') elif isinstance(event, MessageAction): content = event.content else: logger.warning(f'Unexpected event type: {type(event)}') continue if 'OpenHands is all you need!' in content: return TestResult(success=True) except Exception as e: logger.error(f'Error processing event: {e}') logger.debug( f'Total messages: {len(message_actions)}. Messages: {message_actions}' ) return TestResult( success=False, reason=f'The answer is not found in any message. Total messages: {len(message_actions)}.', )