|
import hashlib
|
|
|
|
from evaluation.integration_tests.tests.base import BaseIntegrationTest, TestResult
|
|
from openhands.events.action import (
|
|
AgentFinishAction,
|
|
FileWriteAction,
|
|
MessageAction,
|
|
)
|
|
from openhands.events.event import Event
|
|
from openhands.events.observation import AgentDelegateObservation
|
|
from openhands.runtime.base import Runtime
|
|
|
|
|
|
class Test(BaseIntegrationTest):
|
|
INSTRUCTION = 'Execute the python script /workspace/python_script.py with input "John" and "25" and tell me the secret number.'
|
|
SECRET_NUMBER = int(hashlib.sha256(str(25).encode()).hexdigest()[:8], 16) % 1000
|
|
|
|
@classmethod
|
|
def initialize_runtime(cls, runtime: Runtime) -> None:
|
|
from openhands.core.logger import openhands_logger as logger
|
|
|
|
action = FileWriteAction(
|
|
path='/workspace/python_script.py',
|
|
content=(
|
|
'name = input("Enter your name: "); age = input("Enter your age: "); '
|
|
'import hashlib; secret = int(hashlib.sha256(str(age).encode()).hexdigest()[:8], 16) % 1000; '
|
|
'print(f"Hello {name}, you are {age} years old. Tell you a secret number: {secret}")'
|
|
),
|
|
)
|
|
logger.info(action, extra={'msg_type': 'ACTION'})
|
|
observation = runtime.run_action(action)
|
|
logger.info(observation, extra={'msg_type': 'OBSERVATION'})
|
|
|
|
@classmethod
|
|
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
|
|
from openhands.core.logger import openhands_logger as logger
|
|
|
|
|
|
message_actions = [
|
|
event
|
|
for event in histories
|
|
if isinstance(
|
|
event, (MessageAction, AgentFinishAction, AgentDelegateObservation)
|
|
)
|
|
]
|
|
logger.info(f'Total message-like events: {len(message_actions)}')
|
|
|
|
for event in message_actions:
|
|
try:
|
|
if isinstance(event, AgentDelegateObservation):
|
|
content = event.content
|
|
elif isinstance(event, AgentFinishAction):
|
|
content = event.outputs.get('content', '')
|
|
if event.thought:
|
|
content += f'\n\n{event.thought}'
|
|
elif isinstance(event, MessageAction):
|
|
content = event.content
|
|
else:
|
|
logger.warning(f'Unexpected event type: {type(event)}')
|
|
continue
|
|
|
|
if str(cls.SECRET_NUMBER) in content:
|
|
return TestResult(success=True)
|
|
except Exception as e:
|
|
logger.error(f'Error processing event: {e}')
|
|
|
|
logger.debug(
|
|
f'Total messages: {len(message_actions)}. Messages: {message_actions}'
|
|
)
|
|
return TestResult(
|
|
success=False,
|
|
reason=f'The answer is not found in any message. Total messages: {len(message_actions)}.',
|
|
)
|
|
|