Spaces:
Build error
Build error
File size: 5,504 Bytes
51ff9e5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
"""Replay tests"""
import asyncio
from pathlib import Path
from conftest import _close_test_runtime, _load_runtime
from openhands.controller.state.state import State
from openhands.core.config.config_utils import OH_DEFAULT_AGENT
from openhands.core.config.openhands_config import OpenHandsConfig
from openhands.core.main import run_controller
from openhands.core.schema.agent import AgentState
from openhands.events.action.empty import NullAction
from openhands.events.action.message import MessageAction
from openhands.events.event import EventSource
from openhands.events.observation.commands import CmdOutputObservation
def _get_config(trajectory_name: str, agent: str = OH_DEFAULT_AGENT):
return OpenHandsConfig(
default_agent=agent,
run_as_openhands=False,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
replay_trajectory_path=str(
(Path(__file__).parent / 'trajs' / f'{trajectory_name}.json').resolve()
),
)
def test_simple_replay(temp_dir, runtime_cls, run_as_openhands):
"""
A simple replay test that involves simple terminal operations and edits
(creating a simple 2048 game), using the default agent
"""
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
config.replay_trajectory_path = str(
(Path(__file__).parent / 'trajs' / 'basic.json').resolve()
)
config.security.confirmation_mode = False
state: State | None = asyncio.run(
run_controller(
config=config,
initial_user_action=NullAction(),
runtime=runtime,
)
)
assert state.agent_state == AgentState.FINISHED
_close_test_runtime(runtime)
def test_simple_gui_replay(temp_dir, runtime_cls, run_as_openhands):
"""
A simple replay test that involves simple terminal operations and edits
(writing a Vue.js App), using the default agent
Note:
1. This trajectory is exported from GUI mode, meaning it has extra
environmental actions that don't appear in headless mode's trajectories
2. In GUI mode, agents typically don't finish; rather, they wait for the next
task from the user, so this exported trajectory ends with awaiting_user_input
"""
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
config = _get_config('basic_gui_mode')
config.security.confirmation_mode = False
state: State | None = asyncio.run(
run_controller(
config=config,
initial_user_action=NullAction(),
runtime=runtime,
# exit on message, otherwise this would be stuck on waiting for user input
exit_on_message=True,
)
)
assert state.agent_state == AgentState.FINISHED
_close_test_runtime(runtime)
def test_replay_wrong_initial_state(temp_dir, runtime_cls, run_as_openhands):
"""
Replay requires a consistent initial state to start with, otherwise it might
be producing garbage. The trajectory used in this test assumes existence of
a file named 'game_2048.py', which doesn't exist when we replay the trajectory
(so called inconsistent initial states). This test demonstrates how this would
look like: the following events would still be replayed even though they are
meaningless.
"""
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
config.replay_trajectory_path = str(
(Path(__file__).parent / 'trajs' / 'wrong_initial_state.json').resolve()
)
config.security.confirmation_mode = False
state: State | None = asyncio.run(
run_controller(
config=config,
initial_user_action=NullAction(),
runtime=runtime,
)
)
assert state.agent_state == AgentState.FINISHED
has_error_in_action = False
for event in state.history:
if isinstance(event, CmdOutputObservation) and event.exit_code != 0:
has_error_in_action = True
break
assert has_error_in_action
_close_test_runtime(runtime)
def test_replay_basic_interactions(temp_dir, runtime_cls, run_as_openhands):
"""
Replay a trajectory that involves interactions, i.e. with user messages
in the middle. This tests two things:
1) The controller should be able to replay all actions without human
interference (no asking for user input).
2) The user messages in the trajectory should appear in the history.
"""
runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
config = _get_config('basic_interactions')
config.security.confirmation_mode = False
state: State | None = asyncio.run(
run_controller(
config=config,
initial_user_action=NullAction(),
runtime=runtime,
)
)
assert state.agent_state == AgentState.FINISHED
# all user messages appear in the history, so that after a replay (assuming
# the trajectory doesn't end with `finish` action), LLM knows about all the
# context and can continue
user_messages = [
"what's 1+1?",
"No, I mean by Goldbach's conjecture!",
'Finish please',
]
i = 0
for event in state.history:
if isinstance(event, MessageAction) and event._source == EventSource.USER:
assert event.message == user_messages[i]
i += 1
assert i == len(user_messages)
_close_test_runtime(runtime)
|