Spaces:
Build error
Build error
"""Replay tests""" | |
import asyncio | |
from pathlib import Path | |
from conftest import _close_test_runtime, _load_runtime | |
from openhands.controller.state.state import State | |
from openhands.core.config.config_utils import OH_DEFAULT_AGENT | |
from openhands.core.config.openhands_config import OpenHandsConfig | |
from openhands.core.main import run_controller | |
from openhands.core.schema.agent import AgentState | |
from openhands.events.action.empty import NullAction | |
from openhands.events.action.message import MessageAction | |
from openhands.events.event import EventSource | |
from openhands.events.observation.commands import CmdOutputObservation | |
def _get_config(trajectory_name: str, agent: str = OH_DEFAULT_AGENT): | |
return OpenHandsConfig( | |
default_agent=agent, | |
run_as_openhands=False, | |
# do not mount workspace | |
workspace_base=None, | |
workspace_mount_path=None, | |
replay_trajectory_path=str( | |
(Path(__file__).parent / 'trajs' / f'{trajectory_name}.json').resolve() | |
), | |
) | |
def test_simple_replay(temp_dir, runtime_cls, run_as_openhands): | |
""" | |
A simple replay test that involves simple terminal operations and edits | |
(creating a simple 2048 game), using the default agent | |
""" | |
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) | |
config.replay_trajectory_path = str( | |
(Path(__file__).parent / 'trajs' / 'basic.json').resolve() | |
) | |
config.security.confirmation_mode = False | |
state: State | None = asyncio.run( | |
run_controller( | |
config=config, | |
initial_user_action=NullAction(), | |
runtime=runtime, | |
) | |
) | |
assert state.agent_state == AgentState.FINISHED | |
_close_test_runtime(runtime) | |
def test_simple_gui_replay(temp_dir, runtime_cls, run_as_openhands): | |
""" | |
A simple replay test that involves simple terminal operations and edits | |
(writing a Vue.js App), using the default agent | |
Note: | |
1. This trajectory is exported from GUI mode, meaning it has extra | |
environmental actions that don't appear in headless mode's trajectories | |
2. In GUI mode, agents typically don't finish; rather, they wait for the next | |
task from the user, so this exported trajectory ends with awaiting_user_input | |
""" | |
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) | |
config = _get_config('basic_gui_mode') | |
config.security.confirmation_mode = False | |
state: State | None = asyncio.run( | |
run_controller( | |
config=config, | |
initial_user_action=NullAction(), | |
runtime=runtime, | |
# exit on message, otherwise this would be stuck on waiting for user input | |
exit_on_message=True, | |
) | |
) | |
assert state.agent_state == AgentState.FINISHED | |
_close_test_runtime(runtime) | |
def test_replay_wrong_initial_state(temp_dir, runtime_cls, run_as_openhands): | |
""" | |
Replay requires a consistent initial state to start with, otherwise it might | |
be producing garbage. The trajectory used in this test assumes existence of | |
a file named 'game_2048.py', which doesn't exist when we replay the trajectory | |
(so called inconsistent initial states). This test demonstrates how this would | |
look like: the following events would still be replayed even though they are | |
meaningless. | |
""" | |
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) | |
config.replay_trajectory_path = str( | |
(Path(__file__).parent / 'trajs' / 'wrong_initial_state.json').resolve() | |
) | |
config.security.confirmation_mode = False | |
state: State | None = asyncio.run( | |
run_controller( | |
config=config, | |
initial_user_action=NullAction(), | |
runtime=runtime, | |
) | |
) | |
assert state.agent_state == AgentState.FINISHED | |
has_error_in_action = False | |
for event in state.history: | |
if isinstance(event, CmdOutputObservation) and event.exit_code != 0: | |
has_error_in_action = True | |
break | |
assert has_error_in_action | |
_close_test_runtime(runtime) | |
def test_replay_basic_interactions(temp_dir, runtime_cls, run_as_openhands): | |
""" | |
Replay a trajectory that involves interactions, i.e. with user messages | |
in the middle. This tests two things: | |
1) The controller should be able to replay all actions without human | |
interference (no asking for user input). | |
2) The user messages in the trajectory should appear in the history. | |
""" | |
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) | |
config = _get_config('basic_interactions') | |
config.security.confirmation_mode = False | |
state: State | None = asyncio.run( | |
run_controller( | |
config=config, | |
initial_user_action=NullAction(), | |
runtime=runtime, | |
) | |
) | |
assert state.agent_state == AgentState.FINISHED | |
# all user messages appear in the history, so that after a replay (assuming | |
# the trajectory doesn't end with `finish` action), LLM knows about all the | |
# context and can continue | |
user_messages = [ | |
"what's 1+1?", | |
"No, I mean by Goldbach's conjecture!", | |
'Finish please', | |
] | |
i = 0 | |
for event in state.history: | |
if isinstance(event, MessageAction) and event._source == EventSource.USER: | |
assert event.message == user_messages[i] | |
i += 1 | |
assert i == len(user_messages) | |
_close_test_runtime(runtime) | |