Backup-bdg commited on
Commit
680c11c
·
verified ·
1 Parent(s): 9678537

Delete tests

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. tests/runtime/README.md +0 -60
  2. tests/runtime/conftest.py +0 -300
  3. tests/runtime/test_aci_edit.py +0 -733
  4. tests/runtime/test_bash.py +0 -1462
  5. tests/runtime/test_browsergym_envs.py +0 -73
  6. tests/runtime/test_browsing.py +0 -213
  7. tests/runtime/test_docker_images.py +0 -96
  8. tests/runtime/test_env_vars.py +0 -120
  9. tests/runtime/test_glob_and_grep.py +0 -303
  10. tests/runtime/test_ipython.py +0 -382
  11. tests/runtime/test_llm_based_edit.py +0 -413
  12. tests/runtime/test_mcp_action.py +0 -362
  13. tests/runtime/test_microagent.py +0 -443
  14. tests/runtime/test_replay.py +0 -161
  15. tests/runtime/test_runtime_resource.py +0 -115
  16. tests/runtime/test_setup.py +0 -84
  17. tests/runtime/test_stress_remote_runtime.py +0 -483
  18. tests/runtime/trajs/basic.json +0 -202
  19. tests/runtime/trajs/basic_gui_mode.json +0 -631
  20. tests/runtime/trajs/basic_interactions.json +0 -128
  21. tests/runtime/trajs/wrong_initial_state.json +0 -454
  22. tests/runtime/utils/test_system_stats.py +0 -60
  23. tests/test_fileops.py +0 -66
  24. tests/unit/README.md +0 -29
  25. tests/unit/core/config/test_config_utils.py +0 -168
  26. tests/unit/frontend/test_translation_completeness.py +0 -33
  27. tests/unit/resolver/github/test_guess_success.py +0 -202
  28. tests/unit/resolver/github/test_issue_handler.py +0 -645
  29. tests/unit/resolver/github/test_issue_handler_error_handling.py +0 -281
  30. tests/unit/resolver/github/test_pr_handler_guess_success.py +0 -672
  31. tests/unit/resolver/github/test_pr_title_escaping.py +0 -166
  32. tests/unit/resolver/github/test_resolve_issues.py +0 -1035
  33. tests/unit/resolver/github/test_send_pull_request.py +0 -1304
  34. tests/unit/resolver/gitlab/test_gitlab_guess_success.py +0 -202
  35. tests/unit/resolver/gitlab/test_gitlab_issue_handler.py +0 -683
  36. tests/unit/resolver/gitlab/test_gitlab_issue_handler_error_handling.py +0 -283
  37. tests/unit/resolver/gitlab/test_gitlab_pr_handler_guess_success.py +0 -672
  38. tests/unit/resolver/gitlab/test_gitlab_pr_title_escaping.py +0 -166
  39. tests/unit/resolver/gitlab/test_gitlab_resolve_issues.py +0 -1000
  40. tests/unit/resolver/gitlab/test_gitlab_send_pull_request.py +0 -1206
  41. tests/unit/resolver/mock_output/output.jsonl +0 -0
  42. tests/unit/resolver/mock_output/repo/src/App.css +0 -42
  43. tests/unit/resolver/mock_output/repo/src/App.tsx +0 -14
  44. tests/unit/resolver/mock_output/repo/src/PullRequestViewer.test.tsx +0 -19
  45. tests/unit/resolver/mock_output/repo/src/PullRequestViewer.tsx +0 -112
  46. tests/unit/resolver/test_issue_handler_factory.py +0 -77
  47. tests/unit/resolver/test_issue_references.py +0 -56
  48. tests/unit/resolver/test_patch_apply.py +0 -47
  49. tests/unit/resolver/test_resolve_issue.py +0 -171
  50. tests/unit/test_acompletion.py +0 -196
tests/runtime/README.md DELETED
@@ -1,60 +0,0 @@
1
- ## Runtime Tests
2
-
3
- This folder contains integration tests that verify the functionality of OpenHands' runtime environments and their interactions with various tools and features.
4
-
5
- ### What are Runtime Tests?
6
-
7
- Runtime tests focus on testing:
8
- - Tool interactions within a runtime environment (bash commands, browsing, file operations)
9
- - Environment setup and configuration
10
- - Resource management and cleanup
11
- - Browser-based operations and file viewing capabilities
12
- - IPython/Jupyter integration
13
- - Environment variables and configuration handling
14
-
15
- The tests can be run against different runtime environments (Docker, Local, Remote, Runloop, or Daytona) by setting the TEST_RUNTIME environment variable. By default, tests run using the Docker runtime.
16
-
17
- ### How are they different from Unit Tests?
18
-
19
- While unit tests in `tests/unit/` focus on testing individual components in isolation, runtime tests verify:
20
- 1. Integration between components
21
- 2. Actual execution of commands in different runtime environments
22
- 3. System-level interactions (file system, network, browser)
23
- 4. Environment setup and teardown
24
- 5. Tool functionality in real runtime contexts
25
-
26
- ### Running the Tests
27
-
28
- Run all runtime tests:
29
-
30
- ```bash
31
- poetry run pytest ./tests/runtime
32
- ```
33
-
34
- Run specific test file:
35
-
36
- ```bash
37
- poetry run pytest ./tests/runtime/test_bash.py
38
- ```
39
-
40
- Run specific test:
41
-
42
- ```bash
43
- poetry run pytest ./tests/runtime/test_bash.py::test_bash_command_env
44
- ```
45
-
46
- For verbose output, add the `-v` flag (more verbose: `-vv` and `-vvv`):
47
-
48
- ```bash
49
- poetry run pytest -v ./tests/runtime/test_bash.py
50
- ```
51
-
52
- ### Environment Variables
53
-
54
- The runtime tests can be configured using environment variables:
55
- - `TEST_IN_CI`: Set to 'True' when running in CI environment
56
- - `TEST_RUNTIME`: Specify the runtime to test ('docker', 'local', 'remote', 'runloop', 'daytona')
57
- - `RUN_AS_OPENHANDS`: Set to 'True' to run tests as openhands user (default), 'False' for root
58
- - `SANDBOX_BASE_CONTAINER_IMAGE`: Specify a custom base container image for Docker runtime
59
-
60
- For more details on pytest usage, see the [pytest documentation](https://docs.pytest.org/en/latest/contents.html).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/runtime/conftest.py DELETED
@@ -1,300 +0,0 @@
1
- import os
2
- import random
3
- import shutil
4
- import stat
5
- import time
6
-
7
- import pytest
8
- from pytest import TempPathFactory
9
-
10
- from openhands.core.config import MCPConfig, OpenHandsConfig, load_openhands_config
11
- from openhands.core.logger import openhands_logger as logger
12
- from openhands.events import EventStream
13
- from openhands.runtime.base import Runtime
14
- from openhands.runtime.impl.cli.cli_runtime import CLIRuntime
15
- from openhands.runtime.impl.daytona.daytona_runtime import DaytonaRuntime
16
- from openhands.runtime.impl.docker.docker_runtime import DockerRuntime
17
- from openhands.runtime.impl.local.local_runtime import LocalRuntime
18
- from openhands.runtime.impl.remote.remote_runtime import RemoteRuntime
19
- from openhands.runtime.impl.runloop.runloop_runtime import RunloopRuntime
20
- from openhands.runtime.plugins import AgentSkillsRequirement, JupyterRequirement
21
- from openhands.storage import get_file_store
22
- from openhands.utils.async_utils import call_async_from_sync
23
-
24
- TEST_IN_CI = os.getenv('TEST_IN_CI', 'False').lower() in ['true', '1', 'yes']
25
- TEST_RUNTIME = os.getenv('TEST_RUNTIME', 'docker').lower()
26
- RUN_AS_OPENHANDS = os.getenv('RUN_AS_OPENHANDS', 'True').lower() in ['true', '1', 'yes']
27
- test_mount_path = ''
28
- project_dir = os.path.dirname(
29
- os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
30
- )
31
- sandbox_test_folder = '/workspace'
32
-
33
-
34
- def _get_runtime_sid(runtime: Runtime) -> str:
35
- logger.debug(f'\nruntime.sid: {runtime.sid}')
36
- return runtime.sid
37
-
38
-
39
- def _get_host_folder(runtime: Runtime) -> str:
40
- return runtime.config.workspace_mount_path
41
-
42
-
43
- def _remove_folder(folder: str) -> bool:
44
- success = False
45
- if folder and os.path.isdir(folder):
46
- try:
47
- os.rmdir(folder)
48
- success = True
49
- except OSError:
50
- try:
51
- shutil.rmtree(folder)
52
- success = True
53
- except OSError:
54
- pass
55
- logger.debug(f'\nCleanup: `{folder}`: ' + ('[OK]' if success else '[FAILED]'))
56
- return success
57
-
58
-
59
- def _close_test_runtime(runtime: Runtime) -> None:
60
- if isinstance(runtime, DockerRuntime):
61
- runtime.close(rm_all_containers=False)
62
- else:
63
- runtime.close()
64
- time.sleep(1)
65
-
66
-
67
- def _reset_cwd() -> None:
68
- global project_dir
69
- # Try to change back to project directory
70
- try:
71
- os.chdir(project_dir)
72
- logger.info(f'Changed back to project directory `{project_dir}')
73
- except Exception as e:
74
- logger.error(f'Failed to change back to project directory: {e}')
75
-
76
-
77
- # *****************************************************************************
78
- # *****************************************************************************
79
-
80
-
81
- @pytest.fixture(autouse=True)
82
- def print_method_name(request):
83
- print(
84
- '\n\n########################################################################'
85
- )
86
- print(f'Running test: {request.node.name}')
87
- print(
88
- '########################################################################\n\n'
89
- )
90
-
91
-
92
- @pytest.fixture
93
- def temp_dir(tmp_path_factory: TempPathFactory, request) -> str:
94
- """Creates a unique temporary directory.
95
-
96
- Upon finalization, the temporary directory and its content is removed.
97
- The cleanup function is also called upon KeyboardInterrupt.
98
-
99
- Parameters:
100
- - tmp_path_factory (TempPathFactory): A TempPathFactory class
101
-
102
- Returns:
103
- - str: The temporary directory path that was created
104
- """
105
- temp_dir = tmp_path_factory.mktemp(
106
- 'rt_' + str(random.randint(100000, 999999)), numbered=False
107
- )
108
-
109
- logger.info(f'\n*** {request.node.name}\n>> temp folder: {temp_dir}\n')
110
-
111
- # Set permissions to ensure the directory is writable and deletable
112
- os.chmod(temp_dir, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) # 0777 permissions
113
-
114
- def cleanup():
115
- global project_dir
116
- os.chdir(project_dir)
117
- _remove_folder(temp_dir)
118
-
119
- request.addfinalizer(cleanup)
120
-
121
- return str(temp_dir)
122
-
123
-
124
- # Depending on TEST_RUNTIME, feed the appropriate box class(es) to the test.
125
- def get_runtime_classes() -> list[type[Runtime]]:
126
- runtime = TEST_RUNTIME
127
- if runtime.lower() == 'docker' or runtime.lower() == 'eventstream':
128
- return [DockerRuntime]
129
- elif runtime.lower() == 'local':
130
- return [LocalRuntime]
131
- elif runtime.lower() == 'remote':
132
- return [RemoteRuntime]
133
- elif runtime.lower() == 'runloop':
134
- return [RunloopRuntime]
135
- elif runtime.lower() == 'daytona':
136
- return [DaytonaRuntime]
137
- elif runtime.lower() == 'cli':
138
- return [CLIRuntime]
139
- else:
140
- raise ValueError(f'Invalid runtime: {runtime}')
141
-
142
-
143
- def get_run_as_openhands() -> list[bool]:
144
- print(
145
- '\n\n########################################################################'
146
- )
147
- print('USER: ' + 'openhands' if RUN_AS_OPENHANDS else 'root')
148
- print(
149
- '########################################################################\n\n'
150
- )
151
- return [RUN_AS_OPENHANDS]
152
-
153
-
154
- @pytest.fixture(scope='module') # for xdist
155
- def runtime_setup_module():
156
- _reset_cwd()
157
- yield
158
- _reset_cwd()
159
-
160
-
161
- @pytest.fixture(scope='session') # not for xdist
162
- def runtime_setup_session():
163
- _reset_cwd()
164
- yield
165
- _reset_cwd()
166
-
167
-
168
- # This assures that all tests run together per runtime, not alternating between them,
169
- # which cause errors (especially outside GitHub actions).
170
- @pytest.fixture(scope='module', params=get_runtime_classes())
171
- def runtime_cls(request):
172
- time.sleep(1)
173
- return request.param
174
-
175
-
176
- # TODO: We will change this to `run_as_user` when `ServerRuntime` is deprecated.
177
- # since `DockerRuntime` supports running as an arbitrary user.
178
- @pytest.fixture(scope='module', params=get_run_as_openhands())
179
- def run_as_openhands(request):
180
- time.sleep(1)
181
- return request.param
182
-
183
-
184
- @pytest.fixture(scope='module', params=None)
185
- def base_container_image(request):
186
- time.sleep(1)
187
- env_image = os.environ.get('SANDBOX_BASE_CONTAINER_IMAGE')
188
- if env_image:
189
- request.param = env_image
190
- else:
191
- if not hasattr(request, 'param'): # prevent runtime AttributeError
192
- request.param = None
193
- if request.param is None and hasattr(request.config, 'sandbox'):
194
- try:
195
- request.param = request.config.sandbox.getoption(
196
- '--base_container_image'
197
- )
198
- except ValueError:
199
- request.param = None
200
- if request.param is None:
201
- request.param = pytest.param(
202
- 'nikolaik/python-nodejs:python3.12-nodejs22',
203
- 'golang:1.23-bookworm',
204
- )
205
- print(f'Container image: {request.param}')
206
- return request.param
207
-
208
-
209
- def _load_runtime(
210
- temp_dir,
211
- runtime_cls,
212
- run_as_openhands: bool = True,
213
- enable_auto_lint: bool = False,
214
- base_container_image: str | None = None,
215
- browsergym_eval_env: str | None = None,
216
- use_workspace: bool | None = None,
217
- force_rebuild_runtime: bool = False,
218
- runtime_startup_env_vars: dict[str, str] | None = None,
219
- docker_runtime_kwargs: dict[str, str] | None = None,
220
- override_mcp_config: MCPConfig | None = None,
221
- ) -> tuple[Runtime, OpenHandsConfig]:
222
- sid = 'rt_' + str(random.randint(100000, 999999))
223
-
224
- # AgentSkills need to be initialized **before** Jupyter
225
- # otherwise Jupyter will not access the proper dependencies installed by AgentSkills
226
- plugins = [AgentSkillsRequirement(), JupyterRequirement()]
227
-
228
- config = load_openhands_config()
229
- config.run_as_openhands = run_as_openhands
230
- config.sandbox.force_rebuild_runtime = force_rebuild_runtime
231
- config.sandbox.keep_runtime_alive = False
232
- config.sandbox.docker_runtime_kwargs = docker_runtime_kwargs
233
- # Folder where all tests create their own folder
234
- global test_mount_path
235
- if use_workspace:
236
- test_mount_path = os.path.join(config.workspace_base, 'rt')
237
- elif temp_dir is not None:
238
- test_mount_path = temp_dir
239
- else:
240
- test_mount_path = None
241
- config.workspace_base = test_mount_path
242
- config.workspace_mount_path = test_mount_path
243
-
244
- # Mounting folder specific for this test inside the sandbox
245
- config.workspace_mount_path_in_sandbox = f'{sandbox_test_folder}'
246
- print('\nPaths used:')
247
- print(f'use_host_network: {config.sandbox.use_host_network}')
248
- print(f'workspace_base: {config.workspace_base}')
249
- print(f'workspace_mount_path: {config.workspace_mount_path}')
250
- print(
251
- f'workspace_mount_path_in_sandbox: {config.workspace_mount_path_in_sandbox}\n'
252
- )
253
-
254
- config.sandbox.browsergym_eval_env = browsergym_eval_env
255
- config.sandbox.enable_auto_lint = enable_auto_lint
256
- if runtime_startup_env_vars is not None:
257
- config.sandbox.runtime_startup_env_vars = runtime_startup_env_vars
258
-
259
- if base_container_image is not None:
260
- config.sandbox.base_container_image = base_container_image
261
- config.sandbox.runtime_container_image = None
262
-
263
- if override_mcp_config is not None:
264
- config.mcp = override_mcp_config
265
-
266
- file_store = file_store = get_file_store(
267
- config.file_store,
268
- config.file_store_path,
269
- config.file_store_web_hook_url,
270
- config.file_store_web_hook_headers,
271
- )
272
- event_stream = EventStream(sid, file_store)
273
-
274
- runtime = runtime_cls(
275
- config=config,
276
- event_stream=event_stream,
277
- sid=sid,
278
- plugins=plugins,
279
- )
280
-
281
- # For CLIRuntime, the tests' assertions should be based on the physical workspace path,
282
- # not the logical "/workspace". So, we adjust config.workspace_mount_path_in_sandbox
283
- # to reflect the actual physical path used by CLIRuntime's OHEditor.
284
- if isinstance(runtime, CLIRuntime):
285
- config.workspace_mount_path_in_sandbox = str(runtime.workspace_root)
286
- logger.info(
287
- f'Adjusted workspace_mount_path_in_sandbox for CLIRuntime to: {config.workspace_mount_path_in_sandbox}'
288
- )
289
-
290
- call_async_from_sync(runtime.connect)
291
- time.sleep(2)
292
- return runtime, runtime.config
293
-
294
-
295
- # Export necessary function
296
- __all__ = [
297
- '_load_runtime',
298
- '_get_host_folder',
299
- '_remove_folder',
300
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/runtime/test_aci_edit.py DELETED
@@ -1,733 +0,0 @@
1
- """Editor-related tests for the DockerRuntime."""
2
-
3
- import os
4
- from unittest.mock import MagicMock
5
-
6
- from conftest import _close_test_runtime, _load_runtime
7
-
8
- from openhands.core.logger import openhands_logger as logger
9
- from openhands.events.action import FileEditAction, FileWriteAction
10
- from openhands.runtime.action_execution_server import _execute_file_editor
11
- from openhands.runtime.impl.cli.cli_runtime import CLIRuntime
12
-
13
-
14
- def test_view_file(temp_dir, runtime_cls, run_as_openhands):
15
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
16
- try:
17
- # Create test file
18
- test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
19
- action = FileWriteAction(
20
- content='This is a test file.\nThis file is for testing purposes.',
21
- path=test_file,
22
- )
23
- obs = runtime.run_action(action)
24
-
25
- # Test view command
26
- action = FileEditAction(
27
- command='view',
28
- path=test_file,
29
- )
30
- obs = runtime.run_action(action)
31
-
32
- assert f"Here's the result of running `cat -n` on {test_file}:" in obs.content
33
- assert '1\tThis is a test file.' in obs.content
34
- assert '2\tThis file is for testing purposes.' in obs.content
35
-
36
- finally:
37
- _close_test_runtime(runtime)
38
-
39
-
40
- def test_view_directory(temp_dir, runtime_cls, run_as_openhands):
41
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
42
- try:
43
- # Create test file
44
- test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
45
- action = FileWriteAction(
46
- content='This is a test file.\nThis file is for testing purposes.',
47
- path=test_file,
48
- )
49
- obs = runtime.run_action(action)
50
-
51
- # Test view command
52
- action = FileEditAction(
53
- command='view',
54
- path=config.workspace_mount_path_in_sandbox,
55
- )
56
- obs = runtime.run_action(action)
57
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
58
- assert (
59
- obs.content
60
- == f"""Here's the files and directories up to 2 levels deep in {config.workspace_mount_path_in_sandbox}, excluding hidden items:
61
- {config.workspace_mount_path_in_sandbox}/
62
- {config.workspace_mount_path_in_sandbox}/test.txt"""
63
- )
64
-
65
- finally:
66
- _close_test_runtime(runtime)
67
-
68
-
69
- def test_create_file(temp_dir, runtime_cls, run_as_openhands):
70
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
71
- try:
72
- new_file = os.path.join(config.workspace_mount_path_in_sandbox, 'new_file.txt')
73
- action = FileEditAction(
74
- command='create',
75
- path=new_file,
76
- file_text='New file content',
77
- )
78
- obs = runtime.run_action(action)
79
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
80
- assert 'File created successfully' in obs.content
81
-
82
- # Verify file content
83
- action = FileEditAction(
84
- command='view',
85
- path=new_file,
86
- )
87
- obs = runtime.run_action(action)
88
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
89
- assert 'New file content' in obs.content
90
-
91
- finally:
92
- _close_test_runtime(runtime)
93
-
94
-
95
- def test_create_file_with_empty_content(temp_dir, runtime_cls, run_as_openhands):
96
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
97
- try:
98
- new_file = os.path.join(config.workspace_mount_path_in_sandbox, 'new_file.txt')
99
- action = FileEditAction(
100
- command='create',
101
- path=new_file,
102
- file_text='',
103
- )
104
- obs = runtime.run_action(action)
105
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
106
- assert 'File created successfully' in obs.content
107
-
108
- # Verify file content
109
- action = FileEditAction(
110
- command='view',
111
- path=new_file,
112
- )
113
- obs = runtime.run_action(action)
114
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
115
- assert '1\t' in obs.content
116
-
117
- finally:
118
- _close_test_runtime(runtime)
119
-
120
-
121
- def test_create_with_none_file_text(temp_dir, runtime_cls, run_as_openhands):
122
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
123
- try:
124
- new_file = os.path.join(
125
- config.workspace_mount_path_in_sandbox, 'none_content.txt'
126
- )
127
- action = FileEditAction(
128
- command='create',
129
- path=new_file,
130
- file_text=None,
131
- )
132
- obs = runtime.run_action(action)
133
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
134
- assert (
135
- obs.content
136
- == 'ERROR:\nParameter `file_text` is required for command: create.'
137
- )
138
- finally:
139
- _close_test_runtime(runtime)
140
-
141
-
142
- def test_str_replace(temp_dir, runtime_cls, run_as_openhands):
143
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
144
- try:
145
- # Create test file
146
- test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
147
- action = FileWriteAction(
148
- content='This is a test file.\nThis file is for testing purposes.',
149
- path=test_file,
150
- )
151
- runtime.run_action(action)
152
-
153
- # Test str_replace command
154
- action = FileEditAction(
155
- command='str_replace',
156
- path=test_file,
157
- old_str='test file',
158
- new_str='sample file',
159
- )
160
- obs = runtime.run_action(action)
161
- assert f'The file {test_file} has been edited' in obs.content
162
-
163
- # Verify file content
164
- action = FileEditAction(
165
- command='view',
166
- path=test_file,
167
- )
168
- obs = runtime.run_action(action)
169
- assert 'This is a sample file.' in obs.content
170
-
171
- finally:
172
- _close_test_runtime(runtime)
173
-
174
-
175
- def test_str_replace_multi_line(temp_dir, runtime_cls, run_as_openhands):
176
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
177
- try:
178
- test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
179
- action = FileWriteAction(
180
- content='This is a test file.\nThis file is for testing purposes.',
181
- path=test_file,
182
- )
183
- runtime.run_action(action)
184
-
185
- # Test str_replace command
186
- action = FileEditAction(
187
- command='str_replace',
188
- path=test_file,
189
- old_str='This is a test file.\nThis file is for testing purposes.',
190
- new_str='This is a sample file.\nThis file is for testing purposes.',
191
- )
192
- obs = runtime.run_action(action)
193
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
194
- assert f'The file {test_file} has been edited.' in obs.content
195
- assert 'This is a sample file.' in obs.content
196
- assert 'This file is for testing purposes.' in obs.content
197
-
198
- finally:
199
- _close_test_runtime(runtime)
200
-
201
-
202
- def test_str_replace_multi_line_with_tabs(temp_dir, runtime_cls, run_as_openhands):
203
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
204
- try:
205
- test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
206
- action = FileEditAction(
207
- command='create',
208
- path=test_file,
209
- file_text='def test():\n\tprint("Hello, World!")',
210
- )
211
- runtime.run_action(action)
212
-
213
- # Test str_replace command
214
- action = FileEditAction(
215
- command='str_replace',
216
- path=test_file,
217
- old_str='def test():\n\tprint("Hello, World!")',
218
- new_str='def test():\n\tprint("Hello, Universe!")',
219
- )
220
- obs = runtime.run_action(action)
221
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
222
- assert (
223
- obs.content
224
- == f"""The file {test_file} has been edited. Here's the result of running `cat -n` on a snippet of {test_file}:
225
- 1\tdef test():
226
- 2\t\tprint("Hello, Universe!")
227
- Review the changes and make sure they are as expected. Edit the file again if necessary."""
228
- )
229
-
230
- finally:
231
- _close_test_runtime(runtime)
232
-
233
-
234
- def test_str_replace_error_multiple_occurrences(
235
- temp_dir, runtime_cls, run_as_openhands
236
- ):
237
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
238
- try:
239
- test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
240
- action = FileWriteAction(
241
- content='This is a test file.\nThis file is for testing purposes.',
242
- path=test_file,
243
- )
244
- runtime.run_action(action)
245
-
246
- action = FileEditAction(
247
- command='str_replace', path=test_file, old_str='test', new_str='sample'
248
- )
249
- obs = runtime.run_action(action)
250
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
251
- assert 'Multiple occurrences of old_str `test`' in obs.content
252
- assert '[1, 2]' in obs.content # Should show both line numbers
253
- finally:
254
- _close_test_runtime(runtime)
255
-
256
-
257
- def test_str_replace_error_multiple_multiline_occurrences(
258
- temp_dir, runtime_cls, run_as_openhands
259
- ):
260
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
261
- try:
262
- test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
263
- # Create a file with two identical multi-line blocks
264
- multi_block = """def example():
265
- print("Hello")
266
- return True"""
267
- content = f"{multi_block}\n\nprint('separator')\n\n{multi_block}"
268
- action = FileWriteAction(
269
- content=content,
270
- path=test_file,
271
- )
272
- runtime.run_action(action)
273
-
274
- # Test str_replace command
275
- action = FileEditAction(
276
- command='str_replace',
277
- path=test_file,
278
- old_str=multi_block,
279
- new_str='def new():\n print("World")',
280
- )
281
- obs = runtime.run_action(action)
282
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
283
- assert 'Multiple occurrences of old_str' in obs.content
284
- assert '[1, 7]' in obs.content # Should show correct starting line numbers
285
-
286
- finally:
287
- _close_test_runtime(runtime)
288
-
289
-
290
- def test_str_replace_nonexistent_string(temp_dir, runtime_cls, run_as_openhands):
291
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
292
- try:
293
- test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
294
- action = FileWriteAction(
295
- content='Line 1\nLine 2',
296
- path=test_file,
297
- )
298
- runtime.run_action(action)
299
- action = FileEditAction(
300
- command='str_replace',
301
- path=test_file,
302
- old_str='Non-existent Line',
303
- new_str='New Line',
304
- )
305
- obs = runtime.run_action(action)
306
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
307
- assert 'No replacement was performed' in obs.content
308
- assert (
309
- f'old_str `Non-existent Line` did not appear verbatim in {test_file}'
310
- in obs.content
311
- )
312
- finally:
313
- _close_test_runtime(runtime)
314
-
315
-
316
- def test_str_replace_with_empty_new_str(temp_dir, runtime_cls, run_as_openhands):
317
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
318
- try:
319
- test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
320
- action = FileWriteAction(
321
- content='Line 1\nLine to remove\nLine 3',
322
- path=test_file,
323
- )
324
- runtime.run_action(action)
325
- action = FileEditAction(
326
- command='str_replace',
327
- path=test_file,
328
- old_str='Line to remove\n',
329
- new_str='',
330
- )
331
- obs = runtime.run_action(action)
332
- assert 'Line to remove' not in obs.content
333
- assert 'Line 1' in obs.content
334
- assert 'Line 3' in obs.content
335
-
336
- finally:
337
- _close_test_runtime(runtime)
338
-
339
-
340
- def test_str_replace_with_empty_old_str(temp_dir, runtime_cls, run_as_openhands):
341
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
342
- try:
343
- test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
344
- action = FileWriteAction(
345
- content='Line 1\nLine 2\nLine 3',
346
- path=test_file,
347
- )
348
- runtime.run_action(action)
349
- action = FileEditAction(
350
- command='str_replace',
351
- path=test_file,
352
- old_str='',
353
- new_str='New string',
354
- )
355
- obs = runtime.run_action(action)
356
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
357
- if isinstance(runtime, CLIRuntime):
358
- # CLIRuntime with a 3-line file without a trailing newline reports 3 occurrences for an empty old_str
359
- assert (
360
- 'No replacement was performed. Multiple occurrences of old_str `` in lines [1, 2, 3]. Please ensure it is unique.'
361
- in obs.content
362
- )
363
- else:
364
- # Other runtimes might behave differently (e.g., implicitly add a newline, leading to 4 matches)
365
- # TODO: Why do they have 4 lines?
366
- assert (
367
- 'No replacement was performed. Multiple occurrences of old_str `` in lines [1, 2, 3, 4]. Please ensure it is unique.'
368
- in obs.content
369
- )
370
- finally:
371
- _close_test_runtime(runtime)
372
-
373
-
374
- def test_str_replace_with_none_old_str(temp_dir, runtime_cls, run_as_openhands):
375
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
376
- try:
377
- test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
378
- action = FileWriteAction(
379
- content='Line 1\nLine 2\nLine 3',
380
- path=test_file,
381
- )
382
- runtime.run_action(action)
383
-
384
- action = FileEditAction(
385
- command='str_replace',
386
- path=test_file,
387
- old_str=None,
388
- new_str='new content',
389
- )
390
- obs = runtime.run_action(action)
391
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
392
- assert 'old_str' in obs.content
393
- finally:
394
- _close_test_runtime(runtime)
395
-
396
-
397
- def test_insert(temp_dir, runtime_cls, run_as_openhands):
398
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
399
- try:
400
- # Create test file
401
- test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
402
- action = FileWriteAction(
403
- content='Line 1\nLine 2',
404
- path=test_file,
405
- )
406
- runtime.run_action(action)
407
-
408
- # Test insert command
409
- action = FileEditAction(
410
- command='insert',
411
- path=test_file,
412
- insert_line=1,
413
- new_str='Inserted line',
414
- )
415
- obs = runtime.run_action(action)
416
- assert f'The file {test_file} has been edited' in obs.content
417
-
418
- # Verify file content
419
- action = FileEditAction(
420
- command='view',
421
- path=test_file,
422
- )
423
- obs = runtime.run_action(action)
424
- assert 'Line 1' in obs.content
425
- assert 'Inserted line' in obs.content
426
- assert 'Line 2' in obs.content
427
-
428
- finally:
429
- _close_test_runtime(runtime)
430
-
431
-
432
- def test_insert_invalid_line(temp_dir, runtime_cls, run_as_openhands):
433
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
434
- try:
435
- test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
436
- action = FileWriteAction(
437
- content='Line 1\nLine 2',
438
- path=test_file,
439
- )
440
- runtime.run_action(action)
441
- action = FileEditAction(
442
- command='insert',
443
- path=test_file,
444
- insert_line=10,
445
- new_str='Invalid Insert',
446
- )
447
- obs = runtime.run_action(action)
448
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
449
- assert 'Invalid `insert_line` parameter' in obs.content
450
- assert 'It should be within the range of allowed values' in obs.content
451
- finally:
452
- _close_test_runtime(runtime)
453
-
454
-
455
- def test_insert_with_empty_string(temp_dir, runtime_cls, run_as_openhands):
456
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
457
- try:
458
- test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
459
- action = FileWriteAction(
460
- content='Line 1\nLine 2',
461
- path=test_file,
462
- )
463
- runtime.run_action(action)
464
- action = FileEditAction(
465
- command='insert',
466
- path=test_file,
467
- insert_line=1,
468
- new_str='',
469
- )
470
- obs = runtime.run_action(action)
471
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
472
- assert '1\tLine 1' in obs.content
473
- assert '2\t\n' in obs.content
474
- assert '3\tLine 2' in obs.content
475
- finally:
476
- _close_test_runtime(runtime)
477
-
478
-
479
- def test_insert_with_none_new_str(temp_dir, runtime_cls, run_as_openhands):
480
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
481
- try:
482
- test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
483
- action = FileWriteAction(
484
- content='Line 1\nLine 2',
485
- path=test_file,
486
- )
487
- runtime.run_action(action)
488
-
489
- action = FileEditAction(
490
- command='insert',
491
- path=test_file,
492
- insert_line=1,
493
- new_str=None,
494
- )
495
- obs = runtime.run_action(action)
496
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
497
- assert 'ERROR' in obs.content
498
- assert 'Parameter `new_str` is required for command: insert' in obs.content
499
- finally:
500
- _close_test_runtime(runtime)
501
-
502
-
503
- def test_undo_edit(temp_dir, runtime_cls, run_as_openhands):
504
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
505
- try:
506
- # Create test file
507
- test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
508
- action = FileWriteAction(
509
- content='This is a test file.',
510
- path=test_file,
511
- )
512
- runtime.run_action(action)
513
-
514
- # Make an edit
515
- action = FileEditAction(
516
- command='str_replace',
517
- path=test_file,
518
- old_str='test',
519
- new_str='sample',
520
- )
521
- obs = runtime.run_action(action)
522
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
523
- assert 'This is a sample file.' in obs.content
524
-
525
- # Undo the edit
526
- action = FileEditAction(
527
- command='undo_edit',
528
- path=test_file,
529
- )
530
- obs = runtime.run_action(action)
531
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
532
- assert 'Last edit to' in obs.content
533
- assert 'This is a test file.' in obs.content
534
-
535
- # Verify file content
536
- action = FileEditAction(
537
- command='view',
538
- path=test_file,
539
- )
540
- obs = runtime.run_action(action)
541
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
542
- assert 'This is a test file.' in obs.content
543
-
544
- finally:
545
- _close_test_runtime(runtime)
546
-
547
-
548
- def test_validate_path_invalid(temp_dir, runtime_cls, run_as_openhands):
549
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
550
- try:
551
- invalid_file = os.path.join(
552
- config.workspace_mount_path_in_sandbox, 'nonexistent.txt'
553
- )
554
- action = FileEditAction(
555
- command='view',
556
- path=invalid_file,
557
- )
558
- obs = runtime.run_action(action)
559
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
560
- assert 'Invalid `path` parameter' in obs.content
561
- assert f'The path {invalid_file} does not exist' in obs.content
562
- finally:
563
- _close_test_runtime(runtime)
564
-
565
-
566
- def test_create_existing_file_error(temp_dir, runtime_cls, run_as_openhands):
567
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
568
- try:
569
- test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
570
- action = FileWriteAction(
571
- content='Line 1\nLine 2',
572
- path=test_file,
573
- )
574
- runtime.run_action(action)
575
- action = FileEditAction(
576
- command='create',
577
- path=test_file,
578
- file_text='New content',
579
- )
580
- obs = runtime.run_action(action)
581
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
582
- assert 'File already exists' in obs.content
583
- finally:
584
- _close_test_runtime(runtime)
585
-
586
-
587
- def test_str_replace_missing_old_str(temp_dir, runtime_cls, run_as_openhands):
588
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
589
- try:
590
- test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
591
- action = FileWriteAction(
592
- content='Line 1\nLine 2',
593
- path=test_file,
594
- )
595
- runtime.run_action(action)
596
- action = FileEditAction(
597
- command='str_replace',
598
- path=test_file,
599
- old_str='',
600
- new_str='sample',
601
- )
602
- obs = runtime.run_action(action)
603
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
604
- assert (
605
- 'No replacement was performed. Multiple occurrences of old_str ``'
606
- in obs.content
607
- )
608
- finally:
609
- _close_test_runtime(runtime)
610
-
611
-
612
- def test_str_replace_new_str_and_old_str_same(temp_dir, runtime_cls, run_as_openhands):
613
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
614
- try:
615
- test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
616
- action = FileWriteAction(
617
- content='Line 1\nLine 2',
618
- path=test_file,
619
- )
620
- runtime.run_action(action)
621
- action = FileEditAction(
622
- command='str_replace',
623
- path=test_file,
624
- old_str='test file',
625
- new_str='test file',
626
- )
627
- obs = runtime.run_action(action)
628
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
629
- assert (
630
- 'No replacement was performed. `new_str` and `old_str` must be different.'
631
- in obs.content
632
- )
633
- finally:
634
- _close_test_runtime(runtime)
635
-
636
-
637
- def test_insert_missing_line_param(temp_dir, runtime_cls, run_as_openhands):
638
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
639
- try:
640
- test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
641
- action = FileWriteAction(
642
- content='Line 1\nLine 2',
643
- path=test_file,
644
- )
645
- runtime.run_action(action)
646
- action = FileEditAction(
647
- command='insert',
648
- path=test_file,
649
- new_str='Missing insert line',
650
- )
651
- obs = runtime.run_action(action)
652
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
653
- assert 'Parameter `insert_line` is required for command: insert' in obs.content
654
- finally:
655
- _close_test_runtime(runtime)
656
-
657
-
658
- def test_undo_edit_no_history_error(temp_dir, runtime_cls, run_as_openhands):
659
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
660
- try:
661
- empty_file = os.path.join(config.workspace_mount_path_in_sandbox, 'empty.txt')
662
- action = FileWriteAction(
663
- content='',
664
- path=empty_file,
665
- )
666
- runtime.run_action(action)
667
-
668
- action = FileEditAction(
669
- command='undo_edit',
670
- path=empty_file,
671
- )
672
- obs = runtime.run_action(action)
673
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
674
- assert 'No edit history found for' in obs.content
675
- finally:
676
- _close_test_runtime(runtime)
677
-
678
-
679
- def test_view_large_file_with_truncation(temp_dir, runtime_cls, run_as_openhands):
680
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
681
- try:
682
- # Create a large file to trigger truncation
683
- large_file = os.path.join(
684
- config.workspace_mount_path_in_sandbox, 'large_test.txt'
685
- )
686
- large_content = 'Line 1\n' * 16000 # 16000 lines should trigger truncation
687
- action = FileWriteAction(
688
- content=large_content,
689
- path=large_file,
690
- )
691
- runtime.run_action(action)
692
-
693
- action = FileEditAction(
694
- command='view',
695
- path=large_file,
696
- )
697
- obs = runtime.run_action(action)
698
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
699
- assert (
700
- 'Due to the max output limit, only part of this file has been shown to you.'
701
- in obs.content
702
- )
703
- finally:
704
- _close_test_runtime(runtime)
705
-
706
-
707
- def test_insert_line_string_conversion():
708
- """Test that insert_line is properly converted from string to int.
709
-
710
- This test reproduces issue #8369 Example 2 where a string value for insert_line
711
- causes a TypeError in the editor.
712
- """
713
- # Mock the OHEditor
714
- mock_editor = MagicMock()
715
- mock_editor.return_value = MagicMock(
716
- error=None, output='Success', old_content=None, new_content=None
717
- )
718
-
719
- # Test with string insert_line
720
- result, _ = _execute_file_editor(
721
- editor=mock_editor,
722
- command='insert',
723
- path='/test/path.py',
724
- insert_line='185', # String instead of int
725
- new_str='test content',
726
- )
727
-
728
- # Verify the editor was called with the correct parameters (insert_line converted to int)
729
- mock_editor.assert_called_once()
730
- args, kwargs = mock_editor.call_args
731
- assert isinstance(kwargs['insert_line'], int)
732
- assert kwargs['insert_line'] == 185
733
- assert result == 'Success'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/runtime/test_bash.py DELETED
@@ -1,1462 +0,0 @@
1
- """Bash-related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
2
-
3
- import os
4
- import sys
5
- import time
6
- from pathlib import Path
7
-
8
- import pytest
9
- from conftest import (
10
- _close_test_runtime,
11
- _load_runtime,
12
- )
13
-
14
- from openhands.core.logger import openhands_logger as logger
15
- from openhands.events.action import CmdRunAction
16
- from openhands.events.observation import CmdOutputObservation, ErrorObservation
17
- from openhands.runtime.impl.cli.cli_runtime import CLIRuntime
18
- from openhands.runtime.impl.local.local_runtime import LocalRuntime
19
- from openhands.runtime.utils.bash_constants import TIMEOUT_MESSAGE_TEMPLATE
20
-
21
-
22
- def get_timeout_suffix(timeout_seconds):
23
- """Helper function to generate the expected timeout suffix."""
24
- return (
25
- f'[The command timed out after {timeout_seconds} seconds. '
26
- f'{TIMEOUT_MESSAGE_TEMPLATE}]'
27
- )
28
-
29
-
30
- # ============================================================================================================================
31
- # Bash-specific tests
32
- # ============================================================================================================================
33
-
34
-
35
- # Helper function to determine if running on Windows
36
- def is_windows():
37
- return sys.platform == 'win32'
38
-
39
-
40
- def _run_cmd_action(runtime, custom_command: str):
41
- action = CmdRunAction(command=custom_command)
42
- logger.info(action, extra={'msg_type': 'ACTION'})
43
- obs = runtime.run_action(action)
44
- assert isinstance(obs, (CmdOutputObservation, ErrorObservation))
45
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
46
- return obs
47
-
48
-
49
- # Get platform-appropriate command
50
- def get_platform_command(linux_cmd, windows_cmd):
51
- return windows_cmd if is_windows() else linux_cmd
52
-
53
-
54
- def test_bash_server(temp_dir, runtime_cls, run_as_openhands):
55
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
56
- try:
57
- # Use python -u for unbuffered output, potentially helping capture initial output on Windows
58
- action = CmdRunAction(command='python -u -m http.server 8081')
59
- action.set_hard_timeout(1)
60
- obs = runtime.run_action(action)
61
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
62
- assert isinstance(obs, CmdOutputObservation)
63
- assert obs.exit_code == -1
64
- assert 'Serving HTTP on' in obs.content
65
-
66
- if runtime_cls == CLIRuntime:
67
- assert '[The command timed out after 1.0 seconds.]' in obs.metadata.suffix
68
- else:
69
- assert get_timeout_suffix(1.0) in obs.metadata.suffix
70
-
71
- action = CmdRunAction(command='C-c', is_input=True)
72
- action.set_hard_timeout(30)
73
- obs_interrupt = runtime.run_action(action)
74
- logger.info(obs_interrupt, extra={'msg_type': 'OBSERVATION'})
75
-
76
- if runtime_cls == CLIRuntime:
77
- assert isinstance(obs_interrupt, ErrorObservation)
78
- assert (
79
- "CLIRuntime does not support interactive input from the agent (e.g., 'C-c'). The command 'C-c' was not sent to any process."
80
- in obs_interrupt.content
81
- )
82
- assert obs_interrupt.error_id == 'AGENT_ERROR$BAD_ACTION'
83
- else:
84
- assert isinstance(obs_interrupt, CmdOutputObservation)
85
- assert obs_interrupt.exit_code == 0
86
- if not is_windows():
87
- # Linux/macOS behavior
88
- assert 'Keyboard interrupt received, exiting.' in obs_interrupt.content
89
- assert (
90
- config.workspace_mount_path_in_sandbox
91
- in obs_interrupt.metadata.working_dir
92
- )
93
-
94
- # Verify the server is actually stopped by trying to start another one
95
- # on the same port (regardless of OS)
96
- action = CmdRunAction(command='ls')
97
- action.set_hard_timeout(1)
98
- obs = runtime.run_action(action)
99
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
100
- assert isinstance(obs, CmdOutputObservation)
101
- assert obs.exit_code == 0
102
- # Check that the interrupt message is NOT present in subsequent output
103
- assert 'Keyboard interrupt received, exiting.' not in obs.content
104
- # Check working directory remains correct after interrupt handling
105
- if runtime_cls == CLIRuntime:
106
- # For CLIRuntime, working_dir is the absolute host path
107
- assert obs.metadata.working_dir == config.workspace_base
108
- else:
109
- # For other runtimes (e.g., Docker), it's relative to or contains the sandbox path
110
- assert config.workspace_mount_path_in_sandbox in obs.metadata.working_dir
111
-
112
- # run it again!
113
- action = CmdRunAction(command='python -u -m http.server 8081')
114
- action.set_hard_timeout(1)
115
- obs = runtime.run_action(action)
116
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
117
- assert isinstance(obs, CmdOutputObservation)
118
- assert obs.exit_code == -1
119
- assert 'Serving HTTP on' in obs.content
120
-
121
- finally:
122
- _close_test_runtime(runtime)
123
-
124
-
125
- def test_bash_background_server(temp_dir, runtime_cls, run_as_openhands):
126
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
127
- server_port = 8081
128
- try:
129
- # Start the server, expect it to timeout (run in background manner)
130
- action = CmdRunAction(f'python3 -m http.server {server_port} &')
131
- obs = runtime.run_action(action)
132
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
133
- assert isinstance(obs, CmdOutputObservation)
134
-
135
- if runtime_cls == CLIRuntime:
136
- # The '&' does not detach cleanly; the PTY session remains active.
137
- # the main cmd ends, then the server may receive SIGHUP.
138
- assert obs.exit_code == 0
139
-
140
- # Give the server a moment to be ready
141
- time.sleep(1)
142
-
143
- # `curl --fail` exits non-zero if connection fails or server returns an error.
144
- # Use a short connect timeout as the server is expected to be down.
145
- curl_action = CmdRunAction(
146
- f'curl --fail --connect-timeout 1 http://localhost:{server_port}'
147
- )
148
- curl_obs = runtime.run_action(curl_action)
149
- logger.info(curl_obs, extra={'msg_type': 'OBSERVATION'})
150
- assert isinstance(curl_obs, CmdOutputObservation)
151
- assert curl_obs.exit_code != 0
152
-
153
- # Confirm with pkill (CLIRuntime is assumed non-Windows here).
154
- # pkill returns 1 if no processes were matched.
155
- kill_action = CmdRunAction('pkill -f "http.server"')
156
- kill_obs = runtime.run_action(kill_action)
157
- logger.info(kill_obs, extra={'msg_type': 'OBSERVATION'})
158
- assert isinstance(kill_obs, CmdOutputObservation)
159
- # For CLIRuntime, bash -c "cmd &" exits quickly, orphaning "cmd".
160
- # CLIRuntime's timeout tries to kill the already-exited bash -c.
161
- # The orphaned http.server continues running.
162
- # So, pkill should find and kill the server.
163
- assert kill_obs.exit_code == 0
164
- else:
165
- assert obs.exit_code == 0
166
-
167
- # Give the server a moment to be ready
168
- time.sleep(1)
169
-
170
- # Verify the server is running by curling it
171
- if is_windows():
172
- curl_action = CmdRunAction(
173
- f'Invoke-WebRequest -Uri http://localhost:{server_port} -UseBasicParsing | Select-Object -ExpandProperty Content'
174
- )
175
- else:
176
- curl_action = CmdRunAction(f'curl http://localhost:{server_port}')
177
- curl_obs = runtime.run_action(curl_action)
178
- logger.info(curl_obs, extra={'msg_type': 'OBSERVATION'})
179
- assert isinstance(curl_obs, CmdOutputObservation)
180
- assert curl_obs.exit_code == 0
181
- # Check for content typical of python http.server directory listing
182
- assert 'Directory listing for' in curl_obs.content
183
-
184
- # Kill the server
185
- if is_windows():
186
- # This assumes PowerShell context if LocalRuntime is used on Windows.
187
- kill_action = CmdRunAction('Get-Job | Stop-Job')
188
- else:
189
- kill_action = CmdRunAction('pkill -f "http.server"')
190
- kill_obs = runtime.run_action(kill_action)
191
- logger.info(kill_obs, extra={'msg_type': 'OBSERVATION'})
192
- assert isinstance(kill_obs, CmdOutputObservation)
193
- assert kill_obs.exit_code == 0
194
-
195
- finally:
196
- _close_test_runtime(runtime)
197
-
198
-
199
- def test_multiline_commands(temp_dir, runtime_cls):
200
- runtime, config = _load_runtime(temp_dir, runtime_cls)
201
- try:
202
- if is_windows():
203
- # Windows PowerShell version using backticks for line continuation
204
- obs = _run_cmd_action(runtime, 'Write-Output `\n "foo"')
205
- assert obs.exit_code == 0, 'The exit code should be 0.'
206
- assert 'foo' in obs.content
207
-
208
- # test multiline output
209
- obs = _run_cmd_action(runtime, 'Write-Output "hello`nworld"')
210
- assert obs.exit_code == 0, 'The exit code should be 0.'
211
- assert 'hello\nworld' in obs.content
212
-
213
- # test whitespace
214
- obs = _run_cmd_action(runtime, 'Write-Output "a`n`n`nz"')
215
- assert obs.exit_code == 0, 'The exit code should be 0.'
216
- assert '\n\n\n' in obs.content
217
- else:
218
- # Original Linux bash version
219
- # single multiline command
220
- obs = _run_cmd_action(runtime, 'echo \\\n -e "foo"')
221
- assert obs.exit_code == 0, 'The exit code should be 0.'
222
- assert 'foo' in obs.content
223
-
224
- # test multiline echo
225
- obs = _run_cmd_action(runtime, 'echo -e "hello\nworld"')
226
- assert obs.exit_code == 0, 'The exit code should be 0.'
227
- assert 'hello\nworld' in obs.content
228
-
229
- # test whitespace
230
- obs = _run_cmd_action(runtime, 'echo -e "a\\n\\n\\nz"')
231
- assert obs.exit_code == 0, 'The exit code should be 0.'
232
- assert '\n\n\n' in obs.content
233
- finally:
234
- _close_test_runtime(runtime)
235
-
236
-
237
- @pytest.mark.skipif(
238
- is_windows(), reason='Test relies on Linux bash-specific complex commands'
239
- )
240
- def test_complex_commands(temp_dir, runtime_cls, run_as_openhands):
241
- cmd = """count=0; tries=0; while [ $count -lt 3 ]; do result=$(echo "Heads"); tries=$((tries+1)); echo "Flip $tries: $result"; if [ "$result" = "Heads" ]; then count=$((count+1)); else count=0; fi; done; echo "Got 3 heads in a row after $tries flips!";"""
242
-
243
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
244
- try:
245
- obs = _run_cmd_action(runtime, cmd)
246
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
247
- assert obs.exit_code == 0, 'The exit code should be 0.'
248
- assert 'Got 3 heads in a row after 3 flips!' in obs.content
249
-
250
- finally:
251
- _close_test_runtime(runtime)
252
-
253
-
254
- def test_no_ps2_in_output(temp_dir, runtime_cls, run_as_openhands):
255
- """Test that the PS2 sign is not added to the output of a multiline command."""
256
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
257
- try:
258
- if is_windows():
259
- obs = _run_cmd_action(runtime, 'Write-Output "hello`nworld"')
260
- else:
261
- obs = _run_cmd_action(runtime, 'echo -e "hello\nworld"')
262
- assert obs.exit_code == 0, 'The exit code should be 0.'
263
-
264
- assert 'hello\nworld' in obs.content
265
- assert '>' not in obs.content
266
- finally:
267
- _close_test_runtime(runtime)
268
-
269
-
270
- @pytest.mark.skipif(
271
- is_windows(), reason='Test uses Linux-specific bash loops and sed commands'
272
- )
273
- def test_multiline_command_loop(temp_dir, runtime_cls):
274
- # https://github.com/All-Hands-AI/OpenHands/issues/3143
275
- init_cmd = """mkdir -p _modules && \
276
- for month in {01..04}; do
277
- for day in {01..05}; do
278
- touch "_modules/2024-${month}-${day}-sample.md"
279
- done
280
- done && echo "created files"
281
- """
282
- follow_up_cmd = """for file in _modules/*.md; do
283
- new_date=$(echo $file | sed -E 's/2024-(01|02|03|04)-/2024-/;s/2024-01/2024-08/;s/2024-02/2024-09/;s/2024-03/2024-10/;s/2024-04/2024-11/')
284
- mv "$file" "$new_date"
285
- done && echo "success"
286
- """
287
- runtime, config = _load_runtime(temp_dir, runtime_cls)
288
- try:
289
- obs = _run_cmd_action(runtime, init_cmd)
290
- assert obs.exit_code == 0, 'The exit code should be 0.'
291
- assert 'created files' in obs.content
292
-
293
- obs = _run_cmd_action(runtime, follow_up_cmd)
294
- assert obs.exit_code == 0, 'The exit code should be 0.'
295
- assert 'success' in obs.content
296
- finally:
297
- _close_test_runtime(runtime)
298
-
299
-
300
- @pytest.mark.skipif(
301
- os.getenv('TEST_RUNTIME') == 'cli',
302
- reason='CLIRuntime uses bash -c which handles newline-separated commands. This test expects rejection. See test_cliruntime_multiple_newline_commands.',
303
- )
304
- def test_multiple_multiline_commands(temp_dir, runtime_cls, run_as_openhands):
305
- if is_windows():
306
- cmds = [
307
- 'Get-ChildItem',
308
- 'Write-Output "hello`nworld"',
309
- """Write-Output "hello it's me\"""",
310
- """Write-Output `
311
- ('hello ' + `
312
- 'world')""",
313
- """Write-Output 'hello\nworld\nare\nyou\nthere?'""",
314
- """Write-Output 'hello\nworld\nare\nyou\n\nthere?'""",
315
- """Write-Output 'hello\nworld "'""", # Escape the trailing double quote
316
- ]
317
- else:
318
- cmds = [
319
- 'ls -l',
320
- 'echo -e "hello\nworld"',
321
- """echo -e "hello it's me\"""",
322
- """echo \\
323
- -e 'hello' \\
324
- world""",
325
- """echo -e 'hello\\nworld\\nare\\nyou\\nthere?'""",
326
- """echo -e 'hello\nworld\nare\nyou\n\nthere?'""",
327
- """echo -e 'hello\nworld "'""",
328
- ]
329
- joined_cmds = '\n'.join(cmds)
330
-
331
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
332
- try:
333
- # First test that running multiple commands at once fails
334
- obs = _run_cmd_action(runtime, joined_cmds)
335
- assert isinstance(obs, ErrorObservation)
336
- assert 'Cannot execute multiple commands at once' in obs.content
337
-
338
- # Now run each command individually and verify they work
339
- results = []
340
- for cmd in cmds:
341
- obs = _run_cmd_action(runtime, cmd)
342
- assert isinstance(obs, CmdOutputObservation)
343
- assert obs.exit_code == 0
344
- results.append(obs.content)
345
-
346
- # Verify all expected outputs are present
347
- if is_windows():
348
- assert '.git_config' in results[0] # Get-ChildItem
349
- else:
350
- assert 'total 0' in results[0] # ls -l
351
- assert 'hello\nworld' in results[1] # echo -e "hello\nworld"
352
- assert "hello it's me" in results[2] # echo -e "hello it\'s me"
353
- assert 'hello world' in results[3] # echo -e 'hello' world
354
- assert (
355
- 'hello\nworld\nare\nyou\nthere?' in results[4]
356
- ) # echo -e 'hello\nworld\nare\nyou\nthere?'
357
- assert (
358
- 'hello\nworld\nare\nyou\n\nthere?' in results[5]
359
- ) # echo -e with literal newlines
360
- assert 'hello\nworld "' in results[6] # echo -e with quote
361
- finally:
362
- _close_test_runtime(runtime)
363
-
364
-
365
- def test_cliruntime_multiple_newline_commands(temp_dir, run_as_openhands):
366
- # This test is specific to CLIRuntime
367
- runtime_cls = CLIRuntime
368
- if is_windows():
369
- # Minimal check for Windows if CLIRuntime were to support it robustly with PowerShell for this.
370
- # For now, this test primarily targets the bash -c behavior on non-Windows.
371
- pytest.skip(
372
- 'CLIRuntime newline command test primarily for non-Windows bash behavior'
373
- )
374
- # cmds = [
375
- # 'Get-ChildItem -Name .git_config', # Simpler command
376
- # 'Write-Output "hello`nworld"'
377
- # ]
378
- # expected_outputs = ['.git_config', 'hello\nworld']
379
- else:
380
- cmds = [
381
- 'echo "hello"', # A command that will always work
382
- 'echo -e "hello\nworld"',
383
- """echo -e "hello it's me\"""",
384
- ]
385
- expected_outputs = [
386
- 'hello', # Simple string output
387
- 'hello\nworld',
388
- "hello it's me",
389
- ] # Simplified expectations
390
- joined_cmds = '\n'.join(cmds)
391
-
392
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
393
- try:
394
- obs = _run_cmd_action(runtime, joined_cmds)
395
- assert isinstance(obs, CmdOutputObservation)
396
- assert obs.exit_code == 0
397
- # Check that parts of each command's expected output are present
398
- for expected_part in expected_outputs:
399
- assert expected_part in obs.content
400
- finally:
401
- _close_test_runtime(runtime)
402
-
403
-
404
- def test_cmd_run(temp_dir, runtime_cls, run_as_openhands):
405
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
406
- try:
407
- if is_windows():
408
- # Windows PowerShell version
409
- obs = _run_cmd_action(
410
- runtime, f'Get-ChildItem -Path {config.workspace_mount_path_in_sandbox}'
411
- )
412
- assert obs.exit_code == 0
413
-
414
- obs = _run_cmd_action(runtime, 'Get-ChildItem')
415
- assert obs.exit_code == 0
416
-
417
- obs = _run_cmd_action(runtime, 'New-Item -ItemType Directory -Path test')
418
- assert obs.exit_code == 0
419
-
420
- obs = _run_cmd_action(runtime, 'Get-ChildItem')
421
- assert obs.exit_code == 0
422
- assert 'test' in obs.content
423
-
424
- obs = _run_cmd_action(runtime, 'New-Item -ItemType File -Path test/foo.txt')
425
- assert obs.exit_code == 0
426
-
427
- obs = _run_cmd_action(runtime, 'Get-ChildItem test')
428
- assert obs.exit_code == 0
429
- assert 'foo.txt' in obs.content
430
-
431
- # clean up
432
- _run_cmd_action(runtime, 'Remove-Item -Recurse -Force test')
433
- assert obs.exit_code == 0
434
- else:
435
- # Unix version
436
- obs = _run_cmd_action(
437
- runtime, f'ls -l {config.workspace_mount_path_in_sandbox}'
438
- )
439
- assert obs.exit_code == 0
440
-
441
- obs = _run_cmd_action(runtime, 'ls -l')
442
- assert obs.exit_code == 0
443
- assert 'total 0' in obs.content
444
-
445
- obs = _run_cmd_action(runtime, 'mkdir test')
446
- assert obs.exit_code == 0
447
-
448
- obs = _run_cmd_action(runtime, 'ls -l')
449
- assert obs.exit_code == 0
450
- if (
451
- run_as_openhands
452
- and runtime_cls != CLIRuntime
453
- and runtime_cls != LocalRuntime
454
- ):
455
- assert 'openhands' in obs.content
456
- elif runtime_cls == LocalRuntime or runtime_cls == CLIRuntime:
457
- assert 'root' not in obs.content and 'openhands' not in obs.content
458
- else:
459
- assert 'root' in obs.content
460
- assert 'test' in obs.content
461
-
462
- obs = _run_cmd_action(runtime, 'touch test/foo.txt')
463
- assert obs.exit_code == 0
464
-
465
- obs = _run_cmd_action(runtime, 'ls -l test')
466
- assert obs.exit_code == 0
467
- assert 'foo.txt' in obs.content
468
-
469
- # clean up: this is needed, since CI will not be
470
- # run as root, and this test may leave a file
471
- # owned by root
472
- _run_cmd_action(runtime, 'rm -rf test')
473
- assert obs.exit_code == 0
474
- finally:
475
- _close_test_runtime(runtime)
476
-
477
-
478
- @pytest.mark.skipif(
479
- sys.platform != 'win32' and os.getenv('TEST_RUNTIME') == 'cli',
480
- reason='CLIRuntime runs as the host user, so ~ is the host home. This test assumes a sandboxed user.',
481
- )
482
- def test_run_as_user_correct_home_dir(temp_dir, runtime_cls, run_as_openhands):
483
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
484
- try:
485
- if is_windows():
486
- # Windows PowerShell version
487
- obs = _run_cmd_action(runtime, 'cd $HOME && Get-Location')
488
- assert obs.exit_code == 0
489
- # Check for Windows-style home paths
490
- if runtime_cls == LocalRuntime:
491
- assert (
492
- os.getenv('USERPROFILE') in obs.content
493
- or os.getenv('HOME') in obs.content
494
- )
495
- # For non-local runtime, we are less concerned with precise paths
496
- else:
497
- # Original Linux version
498
- obs = _run_cmd_action(runtime, 'cd ~ && pwd')
499
- assert obs.exit_code == 0
500
- if runtime_cls == LocalRuntime:
501
- assert os.getenv('HOME') in obs.content
502
- elif run_as_openhands:
503
- assert '/home/openhands' in obs.content
504
- else:
505
- assert '/root' in obs.content
506
- finally:
507
- _close_test_runtime(runtime)
508
-
509
-
510
- def test_multi_cmd_run_in_single_line(temp_dir, runtime_cls):
511
- runtime, config = _load_runtime(temp_dir, runtime_cls)
512
- try:
513
- if is_windows():
514
- # Windows PowerShell version using semicolon
515
- obs = _run_cmd_action(runtime, 'Get-Location && Get-ChildItem')
516
- assert obs.exit_code == 0
517
- assert config.workspace_mount_path_in_sandbox in obs.content
518
- assert '.git_config' in obs.content
519
- else:
520
- # Original Linux version using &&
521
- obs = _run_cmd_action(runtime, 'pwd && ls -l')
522
- assert obs.exit_code == 0
523
- assert config.workspace_mount_path_in_sandbox in obs.content
524
- assert 'total 0' in obs.content
525
- finally:
526
- _close_test_runtime(runtime)
527
-
528
-
529
- def test_stateful_cmd(temp_dir, runtime_cls):
530
- runtime, config = _load_runtime(temp_dir, runtime_cls)
531
- try:
532
- if is_windows():
533
- # Windows PowerShell version
534
- obs = _run_cmd_action(
535
- runtime, 'New-Item -ItemType Directory -Path test -Force'
536
- )
537
- assert obs.exit_code == 0, 'The exit code should be 0.'
538
-
539
- obs = _run_cmd_action(runtime, 'Set-Location test')
540
- assert obs.exit_code == 0, 'The exit code should be 0.'
541
-
542
- obs = _run_cmd_action(runtime, 'Get-Location')
543
- assert obs.exit_code == 0, 'The exit code should be 0.'
544
- # Account for both forward and backward slashes in path
545
- norm_path = config.workspace_mount_path_in_sandbox.replace(
546
- '\\', '/'
547
- ).replace('//', '/')
548
- test_path = f'{norm_path}/test'.replace('//', '/')
549
- assert test_path in obs.content.replace('\\', '/')
550
- else:
551
- # Original Linux version
552
- obs = _run_cmd_action(runtime, 'mkdir -p test')
553
- assert obs.exit_code == 0, 'The exit code should be 0.'
554
-
555
- if runtime_cls == CLIRuntime:
556
- # For CLIRuntime, test CWD change and command execution within a single action
557
- # as CWD is enforced in the workspace.
558
- obs = _run_cmd_action(runtime, 'cd test && pwd')
559
- else:
560
- # For other runtimes, test stateful CWD change across actions
561
- obs = _run_cmd_action(runtime, 'cd test')
562
- assert obs.exit_code == 0, 'The exit code should be 0 for cd test.'
563
- obs = _run_cmd_action(runtime, 'pwd')
564
-
565
- assert obs.exit_code == 0, (
566
- 'The exit code for the pwd command (or combined command) should be 0.'
567
- )
568
- assert (
569
- f'{config.workspace_mount_path_in_sandbox}/test' in obs.content.strip()
570
- )
571
- finally:
572
- _close_test_runtime(runtime)
573
-
574
-
575
- def test_failed_cmd(temp_dir, runtime_cls):
576
- runtime, config = _load_runtime(temp_dir, runtime_cls)
577
- try:
578
- obs = _run_cmd_action(runtime, 'non_existing_command')
579
- assert obs.exit_code != 0, 'The exit code should not be 0 for a failed command.'
580
- finally:
581
- _close_test_runtime(runtime)
582
-
583
-
584
- def _create_test_file(host_temp_dir):
585
- # Single file
586
- with open(os.path.join(host_temp_dir, 'test_file.txt'), 'w') as f:
587
- f.write('Hello, World!')
588
-
589
-
590
- def test_copy_single_file(temp_dir, runtime_cls):
591
- runtime, config = _load_runtime(temp_dir, runtime_cls)
592
- try:
593
- sandbox_dir = config.workspace_mount_path_in_sandbox
594
- sandbox_file = os.path.join(sandbox_dir, 'test_file.txt')
595
- _create_test_file(temp_dir)
596
- runtime.copy_to(os.path.join(temp_dir, 'test_file.txt'), sandbox_dir)
597
-
598
- if is_windows():
599
- obs = _run_cmd_action(runtime, f'Get-ChildItem -Path {sandbox_dir}')
600
- assert obs.exit_code == 0
601
- assert 'test_file.txt' in obs.content
602
-
603
- obs = _run_cmd_action(runtime, f'Get-Content {sandbox_file}')
604
- assert obs.exit_code == 0
605
- assert 'Hello, World!' in obs.content
606
- else:
607
- obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}')
608
- assert obs.exit_code == 0
609
- assert 'test_file.txt' in obs.content
610
-
611
- obs = _run_cmd_action(runtime, f'cat {sandbox_file}')
612
- assert obs.exit_code == 0
613
- assert 'Hello, World!' in obs.content
614
- finally:
615
- _close_test_runtime(runtime)
616
-
617
-
618
- def _create_host_test_dir_with_files(test_dir):
619
- logger.debug(f'creating `{test_dir}`')
620
- if not os.path.isdir(test_dir):
621
- os.makedirs(test_dir, exist_ok=True)
622
- logger.debug('creating test files in `test_dir`')
623
- with open(os.path.join(test_dir, 'file1.txt'), 'w') as f:
624
- f.write('File 1 content')
625
- with open(os.path.join(test_dir, 'file2.txt'), 'w') as f:
626
- f.write('File 2 content')
627
-
628
-
629
- def test_copy_directory_recursively(temp_dir, runtime_cls):
630
- runtime, config = _load_runtime(temp_dir, runtime_cls)
631
-
632
- sandbox_dir = config.workspace_mount_path_in_sandbox
633
- try:
634
- temp_dir_copy = os.path.join(temp_dir, 'test_dir')
635
- # We need a separate directory, since temp_dir is mounted to /workspace
636
- _create_host_test_dir_with_files(temp_dir_copy)
637
-
638
- runtime.copy_to(temp_dir_copy, sandbox_dir, recursive=True)
639
-
640
- if is_windows():
641
- obs = _run_cmd_action(runtime, f'Get-ChildItem -Path {sandbox_dir}')
642
- assert obs.exit_code == 0
643
- assert 'test_dir' in obs.content
644
- assert 'file1.txt' not in obs.content
645
- assert 'file2.txt' not in obs.content
646
-
647
- obs = _run_cmd_action(
648
- runtime, f'Get-ChildItem -Path {sandbox_dir}/test_dir'
649
- )
650
- assert obs.exit_code == 0
651
- assert 'file1.txt' in obs.content
652
- assert 'file2.txt' in obs.content
653
-
654
- obs = _run_cmd_action(
655
- runtime, f'Get-Content {sandbox_dir}/test_dir/file1.txt'
656
- )
657
- assert obs.exit_code == 0
658
- assert 'File 1 content' in obs.content
659
- else:
660
- obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}')
661
- assert obs.exit_code == 0
662
- assert 'test_dir' in obs.content
663
- assert 'file1.txt' not in obs.content
664
- assert 'file2.txt' not in obs.content
665
-
666
- obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}/test_dir')
667
- assert obs.exit_code == 0
668
- assert 'file1.txt' in obs.content
669
- assert 'file2.txt' in obs.content
670
-
671
- obs = _run_cmd_action(runtime, f'cat {sandbox_dir}/test_dir/file1.txt')
672
- assert obs.exit_code == 0
673
- assert 'File 1 content' in obs.content
674
- finally:
675
- _close_test_runtime(runtime)
676
-
677
-
678
- def test_copy_to_non_existent_directory(temp_dir, runtime_cls):
679
- runtime, config = _load_runtime(temp_dir, runtime_cls)
680
- try:
681
- sandbox_dir = config.workspace_mount_path_in_sandbox
682
- _create_test_file(temp_dir)
683
- runtime.copy_to(
684
- os.path.join(temp_dir, 'test_file.txt'), f'{sandbox_dir}/new_dir'
685
- )
686
-
687
- obs = _run_cmd_action(runtime, f'cat {sandbox_dir}/new_dir/test_file.txt')
688
- assert obs.exit_code == 0
689
- assert 'Hello, World!' in obs.content
690
- finally:
691
- _close_test_runtime(runtime)
692
-
693
-
694
- def test_overwrite_existing_file(temp_dir, runtime_cls):
695
- runtime, config = _load_runtime(temp_dir, runtime_cls)
696
- try:
697
- sandbox_dir = config.workspace_mount_path_in_sandbox
698
- sandbox_file = os.path.join(sandbox_dir, 'test_file.txt')
699
-
700
- if is_windows():
701
- # Check initial state
702
- obs = _run_cmd_action(runtime, f'Get-ChildItem -Path {sandbox_dir}')
703
- assert obs.exit_code == 0
704
- assert 'test_file.txt' not in obs.content
705
-
706
- # Create an empty file
707
- obs = _run_cmd_action(
708
- runtime, f'New-Item -ItemType File -Path {sandbox_file} -Force'
709
- )
710
- assert obs.exit_code == 0
711
-
712
- # Verify file exists and is empty
713
- obs = _run_cmd_action(runtime, f'Get-ChildItem -Path {sandbox_dir}')
714
- assert obs.exit_code == 0
715
- assert 'test_file.txt' in obs.content
716
-
717
- obs = _run_cmd_action(runtime, f'Get-Content {sandbox_file}')
718
- assert obs.exit_code == 0
719
- assert obs.content.strip() == '' # Empty file
720
- assert 'Hello, World!' not in obs.content
721
-
722
- # Create host file and copy to overwrite
723
- _create_test_file(temp_dir)
724
- runtime.copy_to(os.path.join(temp_dir, 'test_file.txt'), sandbox_dir)
725
-
726
- # Verify file content is overwritten
727
- obs = _run_cmd_action(runtime, f'Get-Content {sandbox_file}')
728
- assert obs.exit_code == 0
729
- assert 'Hello, World!' in obs.content
730
- else:
731
- # Original Linux version
732
- obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}')
733
- assert obs.exit_code == 0
734
- assert 'test_file.txt' not in obs.content # Check initial state
735
-
736
- obs = _run_cmd_action(runtime, f'touch {sandbox_file}')
737
- assert obs.exit_code == 0
738
-
739
- obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}')
740
- assert obs.exit_code == 0
741
- assert 'test_file.txt' in obs.content
742
-
743
- obs = _run_cmd_action(runtime, f'cat {sandbox_file}')
744
- assert obs.exit_code == 0
745
- assert obs.content.strip() == '' # Empty file
746
- assert 'Hello, World!' not in obs.content
747
-
748
- _create_test_file(temp_dir)
749
- runtime.copy_to(os.path.join(temp_dir, 'test_file.txt'), sandbox_dir)
750
-
751
- obs = _run_cmd_action(runtime, f'cat {sandbox_file}')
752
- assert obs.exit_code == 0
753
- assert 'Hello, World!' in obs.content
754
- finally:
755
- _close_test_runtime(runtime)
756
-
757
-
758
- def test_copy_non_existent_file(temp_dir, runtime_cls):
759
- runtime, config = _load_runtime(temp_dir, runtime_cls)
760
- try:
761
- sandbox_dir = config.workspace_mount_path_in_sandbox
762
- with pytest.raises(FileNotFoundError):
763
- runtime.copy_to(
764
- os.path.join(sandbox_dir, 'non_existent_file.txt'),
765
- f'{sandbox_dir}/should_not_exist.txt',
766
- )
767
-
768
- obs = _run_cmd_action(runtime, f'ls {sandbox_dir}/should_not_exist.txt')
769
- assert obs.exit_code != 0 # File should not exist
770
- finally:
771
- _close_test_runtime(runtime)
772
-
773
-
774
- def test_copy_from_directory(temp_dir, runtime_cls):
775
- runtime, config = _load_runtime(temp_dir, runtime_cls)
776
- sandbox_dir = config.workspace_mount_path_in_sandbox
777
- try:
778
- temp_dir_copy = os.path.join(temp_dir, 'test_dir')
779
- # We need a separate directory, since temp_dir is mounted to /workspace
780
- _create_host_test_dir_with_files(temp_dir_copy)
781
-
782
- # Initial state
783
- runtime.copy_to(temp_dir_copy, sandbox_dir, recursive=True)
784
-
785
- path_to_copy_from = f'{sandbox_dir}/test_dir'
786
- result = runtime.copy_from(path=path_to_copy_from)
787
-
788
- # Result is returned as a path
789
- assert isinstance(result, Path)
790
-
791
- if result.exists() and not is_windows():
792
- result.unlink()
793
- finally:
794
- _close_test_runtime(runtime)
795
-
796
-
797
- @pytest.mark.skipif(
798
- is_windows(), reason='Test uses Linux-specific file permissions and sudo commands'
799
- )
800
- def test_git_operation(temp_dir, runtime_cls):
801
- # do not mount workspace, since workspace mount by tests will be owned by root
802
- # while the user_id we get via os.getuid() is different from root
803
- # which causes permission issues
804
- runtime, config = _load_runtime(
805
- temp_dir=temp_dir,
806
- use_workspace=False,
807
- runtime_cls=runtime_cls,
808
- # Need to use non-root user to expose issues
809
- run_as_openhands=True,
810
- )
811
- # this will happen if permission of runtime is not properly configured
812
- # fatal: detected dubious ownership in repository at config.workspace_mount_path_in_sandbox
813
- try:
814
- if runtime_cls != LocalRuntime and runtime_cls != CLIRuntime:
815
- # on local machine, permissionless sudo will probably not be available
816
- obs = _run_cmd_action(runtime, 'sudo chown -R openhands:root .')
817
- assert obs.exit_code == 0
818
-
819
- # check the ownership of the current directory
820
- obs = _run_cmd_action(runtime, 'ls -alh .')
821
- assert obs.exit_code == 0
822
- # drwx--S--- 2 openhands root 64 Aug 7 23:32 .
823
- # drwxr-xr-x 1 root root 4.0K Aug 7 23:33 ..
824
- for line in obs.content.split('\n'):
825
- if runtime_cls == LocalRuntime or runtime_cls == CLIRuntime:
826
- continue # skip these checks
827
-
828
- if ' ..' in line:
829
- # parent directory should be owned by root
830
- assert 'root' in line
831
- assert 'openhands' not in line
832
- elif ' .' in line:
833
- # current directory should be owned by openhands
834
- # and its group should be root
835
- assert 'openhands' in line
836
- assert 'root' in line
837
-
838
- # make sure all git operations are allowed
839
- obs = _run_cmd_action(runtime, 'git init')
840
- assert obs.exit_code == 0
841
-
842
- # create a file
843
- obs = _run_cmd_action(runtime, 'echo "hello" > test_file.txt')
844
- assert obs.exit_code == 0
845
-
846
- if runtime_cls == LocalRuntime or runtime_cls == CLIRuntime:
847
- # set git config author in CI only, not on local machine
848
- logger.info('Setting git config author')
849
- obs = _run_cmd_action(
850
- runtime,
851
- 'git config user.name "openhands" && git config user.email "[email protected]"',
852
- )
853
- assert obs.exit_code == 0
854
-
855
- # Set up git config - list current settings (should be empty or just what was set)
856
- obs = _run_cmd_action(runtime, 'git config --list')
857
- assert obs.exit_code == 0
858
-
859
- # git add
860
- obs = _run_cmd_action(runtime, 'git add test_file.txt')
861
- assert obs.exit_code == 0
862
-
863
- # git diff
864
- obs = _run_cmd_action(runtime, 'git diff --no-color --cached')
865
- assert obs.exit_code == 0
866
- assert 'b/test_file.txt' in obs.content
867
- assert '+hello' in obs.content
868
-
869
- # git commit
870
- obs = _run_cmd_action(runtime, 'git commit -m "test commit"')
871
- assert obs.exit_code == 0
872
- finally:
873
- _close_test_runtime(runtime)
874
-
875
-
876
- def test_python_version(temp_dir, runtime_cls, run_as_openhands):
877
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
878
- try:
879
- obs = runtime.run_action(CmdRunAction(command='python --version'))
880
-
881
- assert isinstance(obs, CmdOutputObservation), (
882
- 'The observation should be a CmdOutputObservation.'
883
- )
884
- assert obs.exit_code == 0, 'The exit code should be 0.'
885
- assert 'Python 3' in obs.content, 'The output should contain "Python 3".'
886
- finally:
887
- _close_test_runtime(runtime)
888
-
889
-
890
- def test_pwd_property(temp_dir, runtime_cls, run_as_openhands):
891
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
892
- try:
893
- # Create a subdirectory and verify pwd updates
894
- obs = _run_cmd_action(runtime, 'mkdir -p random_dir')
895
- assert obs.exit_code == 0
896
-
897
- obs = _run_cmd_action(runtime, 'cd random_dir && pwd')
898
- assert obs.exit_code == 0
899
- assert 'random_dir' in obs.content
900
- finally:
901
- _close_test_runtime(runtime)
902
-
903
-
904
- def test_basic_command(temp_dir, runtime_cls, run_as_openhands):
905
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
906
- try:
907
- if is_windows():
908
- # Test simple command
909
- obs = _run_cmd_action(runtime, "Write-Output 'hello world'")
910
- assert 'hello world' in obs.content
911
- assert obs.exit_code == 0
912
-
913
- # Test command with error
914
- obs = _run_cmd_action(runtime, 'nonexistent_command')
915
- assert obs.exit_code != 0
916
- assert 'not recognized' in obs.content or 'command not found' in obs.content
917
-
918
- # Test command with special characters
919
- obs = _run_cmd_action(
920
- runtime, 'Write-Output "hello world with`nspecial chars"'
921
- )
922
- assert 'hello world with\nspecial chars' in obs.content
923
- assert obs.exit_code == 0
924
-
925
- # Test multiple commands in sequence
926
- obs = _run_cmd_action(
927
- runtime,
928
- 'Write-Output "first" && Write-Output "second" && Write-Output "third"',
929
- )
930
- assert 'first' in obs.content
931
- assert 'second' in obs.content
932
- assert 'third' in obs.content
933
- assert obs.exit_code == 0
934
- else:
935
- # Original Linux version
936
- # Test simple command
937
- obs = _run_cmd_action(runtime, "echo 'hello world'")
938
- assert 'hello world' in obs.content
939
- assert obs.exit_code == 0
940
-
941
- # Test command with error
942
- obs = _run_cmd_action(runtime, 'nonexistent_command')
943
- assert obs.exit_code == 127
944
- assert 'nonexistent_command: command not found' in obs.content
945
-
946
- # Test command with special characters
947
- obs = _run_cmd_action(
948
- runtime, "echo 'hello world with\nspecial chars'"
949
- )
950
- assert 'hello world with\nspecial chars' in obs.content
951
- assert obs.exit_code == 0
952
-
953
- # Test multiple commands in sequence
954
- obs = _run_cmd_action(
955
- runtime, 'echo "first" && echo "second" && echo "third"'
956
- )
957
- assert 'first' in obs.content
958
- assert 'second' in obs.content
959
- assert 'third' in obs.content
960
- assert obs.exit_code == 0
961
- finally:
962
- _close_test_runtime(runtime)
963
-
964
-
965
- @pytest.mark.skipif(
966
- is_windows(), reason='Powershell does not support interactive commands'
967
- )
968
- @pytest.mark.skipif(
969
- os.getenv('TEST_RUNTIME') == 'cli',
970
- reason='CLIRuntime does not support interactive commands from the agent.',
971
- )
972
- def test_interactive_command(temp_dir, runtime_cls, run_as_openhands):
973
- runtime, config = _load_runtime(
974
- temp_dir,
975
- runtime_cls,
976
- run_as_openhands,
977
- runtime_startup_env_vars={'NO_CHANGE_TIMEOUT_SECONDS': '1'},
978
- )
979
- try:
980
- # Test interactive command
981
- action = CmdRunAction('read -p "Enter name: " name && echo "Hello $name"')
982
- obs = runtime.run_action(action)
983
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
984
- # This should trigger SOFT timeout, so no need to set hard timeout
985
- assert 'Enter name:' in obs.content
986
- assert '[The command has no new output after 1 seconds.' in obs.metadata.suffix
987
-
988
- action = CmdRunAction('John', is_input=True)
989
- obs = runtime.run_action(action)
990
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
991
- assert 'Hello John' in obs.content
992
- assert '[The command completed with exit code 0.]' in obs.metadata.suffix
993
-
994
- # Test multiline command input with here document
995
- action = CmdRunAction("""cat << EOF
996
- line 1
997
- line 2
998
- EOF""")
999
- obs = runtime.run_action(action)
1000
- assert 'line 1\nline 2' in obs.content
1001
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
1002
- assert '[The command completed with exit code 0.]' in obs.metadata.suffix
1003
- assert obs.exit_code == 0
1004
- finally:
1005
- _close_test_runtime(runtime)
1006
-
1007
-
1008
- @pytest.mark.skipif(
1009
- is_windows(),
1010
- reason='Test relies on Linux-specific commands like seq and bash for loops',
1011
- )
1012
- def test_long_output(temp_dir, runtime_cls, run_as_openhands):
1013
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
1014
- try:
1015
- # Generate a long output
1016
- action = CmdRunAction('for i in $(seq 1 5000); do echo "Line $i"; done')
1017
- action.set_hard_timeout(10)
1018
- obs = runtime.run_action(action)
1019
- assert obs.exit_code == 0
1020
- assert 'Line 1' in obs.content
1021
- assert 'Line 5000' in obs.content
1022
- finally:
1023
- _close_test_runtime(runtime)
1024
-
1025
-
1026
- @pytest.mark.skipif(
1027
- is_windows(),
1028
- reason='Test relies on Linux-specific commands like seq and bash for loops',
1029
- )
1030
- @pytest.mark.skipif(
1031
- os.getenv('TEST_RUNTIME') == 'cli',
1032
- reason='CLIRuntime does not truncate command output.',
1033
- )
1034
- def test_long_output_exceed_history_limit(temp_dir, runtime_cls, run_as_openhands):
1035
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
1036
- try:
1037
- # Generate a long output
1038
- action = CmdRunAction('for i in $(seq 1 50000); do echo "Line $i"; done')
1039
- action.set_hard_timeout(30)
1040
- obs = runtime.run_action(action)
1041
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
1042
- assert obs.exit_code == 0
1043
- assert 'Previous command outputs are truncated' in obs.metadata.prefix
1044
- assert 'Line 40000' in obs.content
1045
- assert 'Line 50000' in obs.content
1046
- finally:
1047
- _close_test_runtime(runtime)
1048
-
1049
-
1050
- @pytest.mark.skipif(
1051
- is_windows(), reason='Test uses Linux-specific temp directory and bash for loops'
1052
- )
1053
- def test_long_output_from_nested_directories(temp_dir, runtime_cls, run_as_openhands):
1054
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
1055
- try:
1056
- # Create nested directories with many files
1057
- setup_cmd = 'mkdir -p /tmp/test_dir && cd /tmp/test_dir && for i in $(seq 1 100); do mkdir -p "folder_$i"; for j in $(seq 1 100); do touch "folder_$i/file_$j.txt"; done; done'
1058
- setup_action = CmdRunAction(setup_cmd.strip())
1059
- setup_action.set_hard_timeout(60)
1060
- obs = runtime.run_action(setup_action)
1061
- assert obs.exit_code == 0
1062
-
1063
- # List the directory structure recursively
1064
- action = CmdRunAction('ls -R /tmp/test_dir')
1065
- action.set_hard_timeout(60)
1066
- obs = runtime.run_action(action)
1067
- assert obs.exit_code == 0
1068
-
1069
- # Verify output contains expected files
1070
- assert 'folder_1' in obs.content
1071
- assert 'file_1.txt' in obs.content
1072
- assert 'folder_100' in obs.content
1073
- assert 'file_100.txt' in obs.content
1074
- finally:
1075
- _close_test_runtime(runtime)
1076
-
1077
-
1078
- @pytest.mark.skipif(
1079
- is_windows(),
1080
- reason='Test uses Linux-specific commands like find and grep with complex syntax',
1081
- )
1082
- def test_command_backslash(temp_dir, runtime_cls, run_as_openhands):
1083
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
1084
- try:
1085
- # Create a file with the content "implemented_function"
1086
- action = CmdRunAction(
1087
- 'mkdir -p /tmp/test_dir && echo "implemented_function" > /tmp/test_dir/file_1.txt'
1088
- )
1089
- obs = runtime.run_action(action)
1090
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
1091
- assert obs.exit_code == 0
1092
-
1093
- # Reproduce an issue we ran into during evaluation
1094
- # find /workspace/sympy__sympy__1.0 -type f -exec grep -l "implemented_function" {} \;
1095
- # find: missing argument to `-exec'
1096
- # --> This is unexpected output due to incorrect escaping of \;
1097
- # This tests for correct escaping of \;
1098
- action = CmdRunAction(
1099
- 'find /tmp/test_dir -type f -exec grep -l "implemented_function" {} \\;'
1100
- )
1101
- obs = runtime.run_action(action)
1102
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
1103
- assert obs.exit_code == 0
1104
- assert '/tmp/test_dir/file_1.txt' in obs.content
1105
- finally:
1106
- _close_test_runtime(runtime)
1107
-
1108
-
1109
- @pytest.mark.skipif(
1110
- is_windows(), reason='Test uses Linux-specific ps aux, awk, and grep commands'
1111
- )
1112
- @pytest.mark.skipif(
1113
- os.getenv('TEST_RUNTIME') == 'cli',
1114
- reason='CLIRuntime does not support interactive commands from the agent.',
1115
- )
1116
- def test_stress_long_output_with_soft_and_hard_timeout(
1117
- temp_dir, runtime_cls, run_as_openhands
1118
- ):
1119
- runtime, config = _load_runtime(
1120
- temp_dir,
1121
- runtime_cls,
1122
- run_as_openhands,
1123
- runtime_startup_env_vars={'NO_CHANGE_TIMEOUT_SECONDS': '1'},
1124
- docker_runtime_kwargs={
1125
- 'cpu_period': 100000, # 100ms
1126
- 'cpu_quota': 100000, # Can use 100ms out of each 100ms period (1 CPU)
1127
- 'mem_limit': '4G', # 4 GB of memory
1128
- },
1129
- )
1130
- try:
1131
- # Run a command that generates long output multiple times
1132
- for i in range(10):
1133
- start_time = time.time()
1134
-
1135
- # Check tmux memory usage (in KB)
1136
- mem_action = CmdRunAction(
1137
- 'ps aux | awk \'{printf "%8.1f KB %s\\n", $6, $0}\' | sort -nr | grep "/usr/bin/tmux" | grep -v grep | awk \'{print $1}\''
1138
- )
1139
- mem_obs = runtime.run_action(mem_action)
1140
- assert mem_obs.exit_code == 0
1141
- logger.info(
1142
- f'Tmux memory usage (iteration {i}): {mem_obs.content.strip()} KB'
1143
- )
1144
-
1145
- # Check action_execution_server mem
1146
- mem_action = CmdRunAction(
1147
- 'ps aux | awk \'{printf "%8.1f KB %s\\n", $6, $0}\' | sort -nr | grep "action_execution_server" | grep "/openhands/poetry" | grep -v grep | awk \'{print $1}\''
1148
- )
1149
- mem_obs = runtime.run_action(mem_action)
1150
- assert mem_obs.exit_code == 0
1151
- logger.info(
1152
- f'Action execution server memory usage (iteration {i}): {mem_obs.content.strip()} KB'
1153
- )
1154
-
1155
- # Test soft timeout
1156
- action = CmdRunAction(
1157
- 'read -p "Do you want to continue? [Y/n] " answer; if [[ $answer == "Y" ]]; then echo "Proceeding with operation..."; echo "Operation completed successfully!"; else echo "Operation cancelled."; exit 1; fi'
1158
- )
1159
- obs = runtime.run_action(action)
1160
- assert 'Do you want to continue?' in obs.content
1161
- assert obs.exit_code == -1 # Command is still running, waiting for input
1162
-
1163
- # Send the confirmation
1164
- action = CmdRunAction('Y', is_input=True)
1165
- obs = runtime.run_action(action)
1166
- assert 'Proceeding with operation...' in obs.content
1167
- assert 'Operation completed successfully!' in obs.content
1168
- assert obs.exit_code == 0
1169
- assert '[The command completed with exit code 0.]' in obs.metadata.suffix
1170
-
1171
- # Test hard timeout w/ long output
1172
- # Generate long output with 1000 asterisks per line
1173
- action = CmdRunAction(
1174
- f'export i={i}; for j in $(seq 1 100); do echo "Line $j - Iteration $i - $(printf \'%1000s\' | tr " " "*")"; sleep 1; done'
1175
- )
1176
- action.set_hard_timeout(2)
1177
- obs = runtime.run_action(action)
1178
-
1179
- # Verify the output
1180
- assert obs.exit_code == -1
1181
- assert f'Line 1 - Iteration {i}' in obs.content
1182
- # assert f'Line 1000 - Iteration {i}' in obs.content
1183
- # assert '[The command completed with exit code 0.]' in obs.metadata.suffix
1184
-
1185
- # Because hard-timeout is triggered, the terminal will in a weird state
1186
- # where it will not accept any new commands.
1187
- obs = runtime.run_action(CmdRunAction('ls'))
1188
- assert obs.exit_code == -1
1189
- assert 'The previous command is still running' in obs.metadata.suffix
1190
-
1191
- # We need to send a Ctrl+C to reset the terminal.
1192
- obs = runtime.run_action(CmdRunAction('C-c', is_input=True))
1193
- assert obs.exit_code == 130
1194
-
1195
- # Now make sure the terminal is in a good state
1196
- obs = runtime.run_action(CmdRunAction('ls'))
1197
- assert obs.exit_code == 0
1198
-
1199
- duration = time.time() - start_time
1200
- logger.info(f'Completed iteration {i} in {duration:.2f} seconds')
1201
-
1202
- finally:
1203
- _close_test_runtime(runtime)
1204
-
1205
-
1206
- @pytest.mark.skipif(
1207
- os.getenv('TEST_RUNTIME') == 'cli',
1208
- reason='FIXME: CLIRuntime does not watch previously timed-out commands except for getting full output a short time after timeout.',
1209
- )
1210
- def test_command_output_continuation(temp_dir, runtime_cls, run_as_openhands):
1211
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
1212
- try:
1213
- if is_windows():
1214
- # Windows PowerShell version
1215
- action = CmdRunAction(
1216
- '1..5 | ForEach-Object { Write-Output $_; Start-Sleep 3 }'
1217
- )
1218
- action.set_hard_timeout(2.5)
1219
- obs = runtime.run_action(action)
1220
- assert obs.content.strip() == '1'
1221
- assert obs.metadata.prefix == ''
1222
- assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
1223
-
1224
- # Continue watching output
1225
- action = CmdRunAction('')
1226
- action.set_hard_timeout(2.5)
1227
- obs = runtime.run_action(action)
1228
- assert (
1229
- '[Below is the output of the previous command.]' in obs.metadata.prefix
1230
- )
1231
- assert obs.content.strip() == '2'
1232
- assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
1233
-
1234
- # Continue until completion
1235
- for expected in ['3', '4', '5']:
1236
- action = CmdRunAction('')
1237
- action.set_hard_timeout(2.5)
1238
- obs = runtime.run_action(action)
1239
- assert (
1240
- '[Below is the output of the previous command.]'
1241
- in obs.metadata.prefix
1242
- )
1243
- assert obs.content.strip() == expected
1244
- assert (
1245
- '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
1246
- )
1247
-
1248
- # Final empty command to complete
1249
- action = CmdRunAction('')
1250
- obs = runtime.run_action(action)
1251
- assert '[The command completed with exit code 0.]' in obs.metadata.suffix
1252
- else:
1253
- # Original Linux version
1254
- # Start a command that produces output slowly
1255
- action = CmdRunAction('for i in {1..5}; do echo $i; sleep 3; done')
1256
- action.set_hard_timeout(2.5)
1257
- obs = runtime.run_action(action)
1258
- assert obs.content.strip() == '1'
1259
- assert obs.metadata.prefix == ''
1260
- assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
1261
-
1262
- # Continue watching output
1263
- action = CmdRunAction('')
1264
- action.set_hard_timeout(2.5)
1265
- obs = runtime.run_action(action)
1266
- assert (
1267
- '[Below is the output of the previous command.]' in obs.metadata.prefix
1268
- )
1269
- assert obs.content.strip() == '2'
1270
- assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
1271
-
1272
- # Continue until completion
1273
- for expected in ['3', '4', '5']:
1274
- action = CmdRunAction('')
1275
- action.set_hard_timeout(2.5)
1276
- obs = runtime.run_action(action)
1277
- assert (
1278
- '[Below is the output of the previous command.]'
1279
- in obs.metadata.prefix
1280
- )
1281
- assert obs.content.strip() == expected
1282
- assert (
1283
- '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
1284
- )
1285
-
1286
- # Final empty command to complete
1287
- action = CmdRunAction('')
1288
- obs = runtime.run_action(action)
1289
- assert '[The command completed with exit code 0.]' in obs.metadata.suffix
1290
- finally:
1291
- _close_test_runtime(runtime)
1292
-
1293
-
1294
- @pytest.mark.skipif(
1295
- os.getenv('TEST_RUNTIME') == 'cli',
1296
- reason='FIXME: CLIRuntime does not implement empty command behavior.',
1297
- )
1298
- def test_long_running_command_follow_by_execute(
1299
- temp_dir, runtime_cls, run_as_openhands
1300
- ):
1301
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
1302
- try:
1303
- if is_windows():
1304
- action = CmdRunAction('1..3 | ForEach-Object { Write-Output $_; sleep 3 }')
1305
- else:
1306
- # Test command that produces output slowly
1307
- action = CmdRunAction('for i in {1..3}; do echo $i; sleep 3; done')
1308
-
1309
- action.set_hard_timeout(2.5)
1310
- obs = runtime.run_action(action)
1311
- assert '1' in obs.content # First number should appear before timeout
1312
- assert obs.metadata.exit_code == -1 # -1 indicates command is still running
1313
- assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
1314
- assert obs.metadata.prefix == ''
1315
-
1316
- # Continue watching output
1317
- action = CmdRunAction('')
1318
- action.set_hard_timeout(2.5)
1319
- obs = runtime.run_action(action)
1320
- assert '2' in obs.content
1321
- assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
1322
- assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
1323
- assert obs.metadata.exit_code == -1 # -1 indicates command is still running
1324
-
1325
- # Test command that produces no output
1326
- action = CmdRunAction('sleep 15')
1327
- action.set_hard_timeout(2.5)
1328
- obs = runtime.run_action(action)
1329
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
1330
- assert '3' not in obs.content
1331
- assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
1332
- assert 'The previous command is still running' in obs.metadata.suffix
1333
- assert obs.metadata.exit_code == -1 # -1 indicates command is still running
1334
-
1335
- # Finally continue again
1336
- action = CmdRunAction('')
1337
- obs = runtime.run_action(action)
1338
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
1339
- assert '3' in obs.content
1340
- assert '[The command completed with exit code 0.]' in obs.metadata.suffix
1341
- finally:
1342
- _close_test_runtime(runtime)
1343
-
1344
-
1345
- @pytest.mark.skipif(
1346
- os.getenv('TEST_RUNTIME') == 'cli',
1347
- reason='FIXME: CLIRuntime does not implement empty command behavior.',
1348
- )
1349
- def test_empty_command_errors(temp_dir, runtime_cls, run_as_openhands):
1350
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
1351
- try:
1352
- # Test empty command without previous command - behavior should be the same on all platforms
1353
- obs = runtime.run_action(CmdRunAction(''))
1354
- assert isinstance(obs, CmdOutputObservation)
1355
- assert (
1356
- 'ERROR: No previous running command to retrieve logs from.' in obs.content
1357
- )
1358
- finally:
1359
- _close_test_runtime(runtime)
1360
-
1361
-
1362
- @pytest.mark.skipif(
1363
- is_windows(), reason='Powershell does not support interactive commands'
1364
- )
1365
- @pytest.mark.skipif(
1366
- os.getenv('TEST_RUNTIME') == 'cli',
1367
- reason='CLIRuntime does not support interactive commands from the agent.',
1368
- )
1369
- def test_python_interactive_input(temp_dir, runtime_cls, run_as_openhands):
1370
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
1371
- try:
1372
- # Test Python program that asks for input - same for both platforms
1373
- python_script = """name = input('Enter your name: '); age = input('Enter your age: '); print(f'Hello {name}, you are {age} years old')"""
1374
-
1375
- # Start Python with the interactive script
1376
- # For both platforms we can use the same command
1377
- obs = runtime.run_action(CmdRunAction(f'python -c "{python_script}"'))
1378
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
1379
- assert 'Enter your name:' in obs.content
1380
- assert obs.metadata.exit_code == -1 # -1 indicates command is still running
1381
-
1382
- # Send first input (name)
1383
- obs = runtime.run_action(CmdRunAction('Alice', is_input=True))
1384
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
1385
- assert 'Enter your age:' in obs.content
1386
- assert obs.metadata.exit_code == -1
1387
-
1388
- # Send second input (age)
1389
- obs = runtime.run_action(CmdRunAction('25', is_input=True))
1390
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
1391
- assert 'Hello Alice, you are 25 years old' in obs.content
1392
- assert obs.metadata.exit_code == 0
1393
- assert '[The command completed with exit code 0.]' in obs.metadata.suffix
1394
- finally:
1395
- _close_test_runtime(runtime)
1396
-
1397
-
1398
- @pytest.mark.skipif(
1399
- is_windows(), reason='Powershell does not support interactive commands'
1400
- )
1401
- @pytest.mark.skipif(
1402
- os.getenv('TEST_RUNTIME') == 'cli',
1403
- reason='CLIRuntime does not support interactive commands from the agent.',
1404
- )
1405
- def test_python_interactive_input_without_set_input(
1406
- temp_dir, runtime_cls, run_as_openhands
1407
- ):
1408
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
1409
- try:
1410
- # Test Python program that asks for input
1411
- python_script = """name = input('Enter your name: '); age = input('Enter your age: '); print(f'Hello {name}, you are {age} years old')"""
1412
-
1413
- # Start Python with the interactive script
1414
- obs = runtime.run_action(CmdRunAction(f'python -c "{python_script}"'))
1415
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
1416
- assert 'Enter your name:' in obs.content
1417
- assert obs.metadata.exit_code == -1 # -1 indicates command is still running
1418
-
1419
- # Send first input (name)
1420
- obs = runtime.run_action(CmdRunAction('Alice', is_input=False))
1421
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
1422
- assert 'Enter your age:' not in obs.content
1423
- assert (
1424
- 'Your command "Alice" is NOT executed. The previous command is still running'
1425
- in obs.metadata.suffix
1426
- )
1427
- assert obs.metadata.exit_code == -1
1428
-
1429
- # Try again now with input
1430
- obs = runtime.run_action(CmdRunAction('Alice', is_input=True))
1431
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
1432
- assert 'Enter your age:' in obs.content
1433
- assert obs.metadata.exit_code == -1
1434
-
1435
- obs = runtime.run_action(CmdRunAction('25', is_input=True))
1436
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
1437
- assert 'Hello Alice, you are 25 years old' in obs.content
1438
- assert obs.metadata.exit_code == 0
1439
- assert '[The command completed with exit code 0.]' in obs.metadata.suffix
1440
- finally:
1441
- _close_test_runtime(runtime)
1442
-
1443
-
1444
- def test_bash_remove_prefix(temp_dir, runtime_cls, run_as_openhands):
1445
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
1446
- try:
1447
- # create a git repo - same for both platforms
1448
- action = CmdRunAction(
1449
- 'git init && git remote add origin https://github.com/All-Hands-AI/OpenHands'
1450
- )
1451
- obs = runtime.run_action(action)
1452
- # logger.info(obs, extra={'msg_type': 'OBSERVATION'})
1453
- assert obs.metadata.exit_code == 0
1454
-
1455
- # Check git remote - same for both platforms
1456
- obs = runtime.run_action(CmdRunAction('git remote -v'))
1457
- # logger.info(obs, extra={'msg_type': 'OBSERVATION'})
1458
- assert obs.metadata.exit_code == 0
1459
- assert 'https://github.com/All-Hands-AI/OpenHands' in obs.content
1460
- assert 'git remote -v' not in obs.content
1461
- finally:
1462
- _close_test_runtime(runtime)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/runtime/test_browsergym_envs.py DELETED
@@ -1,73 +0,0 @@
1
- import json
2
-
3
- import pytest
4
-
5
- from openhands.core.logger import openhands_logger as logger
6
- from openhands.events.action.browse import BrowseInteractiveAction
7
- from openhands.events.observation.browse import BrowserOutputObservation
8
- from tests.runtime.conftest import _close_test_runtime, _load_runtime
9
-
10
-
11
- def has_miniwob():
12
- try:
13
- import importlib.util
14
-
15
- # try to find this browser environment, if it was installed
16
- spec = importlib.util.find_spec('browsergym.miniwob')
17
- if spec is None:
18
- return False
19
-
20
- # try to import this environment
21
- importlib.util.module_from_spec(spec)
22
- return True
23
- except ImportError:
24
- return False
25
-
26
-
27
- @pytest.mark.skipif(
28
- not has_miniwob(),
29
- reason='Requires browsergym-miniwob package to be installed',
30
- )
31
- def test_browsergym_eval_env(runtime_cls, temp_dir):
32
- runtime, config = _load_runtime(
33
- temp_dir,
34
- runtime_cls=runtime_cls,
35
- run_as_openhands=False, # need root permission to access file
36
- base_container_image='xingyaoww/od-eval-miniwob:v1.0',
37
- browsergym_eval_env='browsergym/miniwob.choose-list',
38
- force_rebuild_runtime=True,
39
- )
40
- from openhands.runtime.browser.browser_env import (
41
- BROWSER_EVAL_GET_GOAL_ACTION,
42
- BROWSER_EVAL_GET_REWARDS_ACTION,
43
- )
44
-
45
- # Test browse
46
- action = BrowseInteractiveAction(browser_actions=BROWSER_EVAL_GET_GOAL_ACTION)
47
- logger.info(action, extra={'msg_type': 'ACTION'})
48
- obs = runtime.run_action(action)
49
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
50
-
51
- assert isinstance(obs, BrowserOutputObservation)
52
- assert not obs.error
53
- assert 'Select' in obs.content
54
- assert 'from the list and click Submit' in obs.content
55
-
56
- # Make sure the browser can produce observation in eval env
57
- action = BrowseInteractiveAction(browser_actions='noop()')
58
- logger.info(action, extra={'msg_type': 'ACTION'})
59
- obs = runtime.run_action(action)
60
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
61
- assert (
62
- obs.url.strip()
63
- == 'file:///miniwob-plusplus/miniwob/html/miniwob/choose-list.html'
64
- )
65
-
66
- # Make sure the rewards are working
67
- action = BrowseInteractiveAction(browser_actions=BROWSER_EVAL_GET_REWARDS_ACTION)
68
- logger.info(action, extra={'msg_type': 'ACTION'})
69
- obs = runtime.run_action(action)
70
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
71
- assert json.loads(obs.content) == [0.0]
72
-
73
- _close_test_runtime(runtime)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/runtime/test_browsing.py DELETED
@@ -1,213 +0,0 @@
1
- """Browsing-related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
2
-
3
- import os
4
-
5
- import pytest
6
- from conftest import _close_test_runtime, _load_runtime
7
-
8
- from openhands.core.logger import openhands_logger as logger
9
- from openhands.events.action import (
10
- BrowseInteractiveAction,
11
- BrowseURLAction,
12
- CmdRunAction,
13
- )
14
- from openhands.events.observation import (
15
- BrowserOutputObservation,
16
- CmdOutputObservation,
17
- )
18
-
19
- # ============================================================================================================================
20
- # Browsing tests, without evaluation (poetry install --without evaluation)
21
- # For eval environments, tests need to run with poetry install
22
- # ============================================================================================================================
23
-
24
-
25
- @pytest.mark.skipif(
26
- os.environ.get('TEST_RUNTIME') == 'cli',
27
- reason='CLIRuntime does not support browsing actions',
28
- )
29
- def test_simple_browse(temp_dir, runtime_cls, run_as_openhands):
30
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
31
-
32
- # Test browse
33
- action_cmd = CmdRunAction(command='python3 -m http.server 8000 > server.log 2>&1 &')
34
- logger.info(action_cmd, extra={'msg_type': 'ACTION'})
35
- obs = runtime.run_action(action_cmd)
36
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
37
-
38
- assert isinstance(obs, CmdOutputObservation)
39
- assert obs.exit_code == 0
40
- assert '[1]' in obs.content
41
-
42
- action_cmd = CmdRunAction(command='sleep 3 && cat server.log')
43
- logger.info(action_cmd, extra={'msg_type': 'ACTION'})
44
- obs = runtime.run_action(action_cmd)
45
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
46
- assert obs.exit_code == 0
47
-
48
- action_browse = BrowseURLAction(url='http://localhost:8000')
49
- logger.info(action_browse, extra={'msg_type': 'ACTION'})
50
- obs = runtime.run_action(action_browse)
51
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
52
-
53
- assert isinstance(obs, BrowserOutputObservation)
54
- assert 'http://localhost:8000' in obs.url
55
- assert not obs.error
56
- assert obs.open_pages_urls == ['http://localhost:8000/']
57
- assert obs.active_page_index == 0
58
- assert obs.last_browser_action == 'goto("http://localhost:8000")'
59
- assert obs.last_browser_action_error == ''
60
- assert 'Directory listing for /' in obs.content
61
- assert 'server.log' in obs.content
62
-
63
- # clean up
64
- action = CmdRunAction(command='rm -rf server.log')
65
- logger.info(action, extra={'msg_type': 'ACTION'})
66
- obs = runtime.run_action(action)
67
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
68
- assert obs.exit_code == 0
69
-
70
- _close_test_runtime(runtime)
71
-
72
-
73
- @pytest.mark.skipif(
74
- os.environ.get('TEST_RUNTIME') == 'cli',
75
- reason='CLIRuntime does not support browsing actions',
76
- )
77
- def test_read_pdf_browse(temp_dir, runtime_cls, run_as_openhands):
78
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
79
- try:
80
- # Create a PDF file using reportlab in the host environment
81
- from reportlab.lib.pagesizes import letter
82
- from reportlab.pdfgen import canvas
83
-
84
- pdf_path = os.path.join(temp_dir, 'test_document.pdf')
85
- pdf_content = 'This is test content for PDF reading test'
86
-
87
- c = canvas.Canvas(pdf_path, pagesize=letter)
88
- # Add more content to make the PDF more robust
89
- c.drawString(100, 750, pdf_content)
90
- c.drawString(100, 700, 'Additional line for PDF structure')
91
- c.drawString(100, 650, 'Third line to ensure valid PDF')
92
- # Explicitly set PDF version and ensure proper structure
93
- c.setPageCompression(0) # Disable compression for simpler structure
94
- c.save()
95
-
96
- # Copy the PDF to the sandbox
97
- sandbox_dir = config.workspace_mount_path_in_sandbox
98
- runtime.copy_to(pdf_path, sandbox_dir)
99
-
100
- # Start HTTP server
101
- action_cmd = CmdRunAction(command='ls -alh')
102
- logger.info(action_cmd, extra={'msg_type': 'ACTION'})
103
- obs = runtime.run_action(action_cmd)
104
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
105
- assert isinstance(obs, CmdOutputObservation)
106
- assert obs.exit_code == 0
107
- assert 'test_document.pdf' in obs.content
108
-
109
- # Get server url
110
- action_cmd = CmdRunAction(command='cat /tmp/oh-server-url')
111
- logger.info(action_cmd, extra={'msg_type': 'ACTION'})
112
- obs = runtime.run_action(action_cmd)
113
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
114
- assert obs.exit_code == 0
115
- server_url = obs.content.strip()
116
-
117
- # Browse to the PDF file
118
- pdf_url = f'{server_url}/view?path=/workspace/test_document.pdf'
119
- action_browse = BrowseInteractiveAction(browser_actions=f'goto("{pdf_url}")')
120
- logger.info(action_browse, extra={'msg_type': 'ACTION'})
121
- obs = runtime.run_action(action_browse)
122
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
123
-
124
- # Verify the browser observation
125
- assert isinstance(obs, BrowserOutputObservation)
126
- observation_text = str(obs)
127
- assert '[Action executed successfully.]' in observation_text
128
- assert 'Canvas' in observation_text
129
- assert (
130
- 'Screenshot saved to: /workspace/.browser_screenshots/screenshot_'
131
- in observation_text
132
- )
133
-
134
- # Check the /workspace/.browser_screenshots folder
135
- action_cmd = CmdRunAction(command='ls /workspace/.browser_screenshots')
136
- logger.info(action_cmd, extra={'msg_type': 'ACTION'})
137
- obs = runtime.run_action(action_cmd)
138
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
139
- assert isinstance(obs, CmdOutputObservation)
140
- assert obs.exit_code == 0
141
- assert 'screenshot_' in obs.content
142
- assert '.png' in obs.content
143
- finally:
144
- _close_test_runtime(runtime)
145
-
146
-
147
- @pytest.mark.skipif(
148
- os.environ.get('TEST_RUNTIME') == 'cli',
149
- reason='CLIRuntime does not support browsing actions',
150
- )
151
- def test_read_png_browse(temp_dir, runtime_cls, run_as_openhands):
152
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
153
- try:
154
- # Create a PNG file using PIL in the host environment
155
- from PIL import Image, ImageDraw
156
-
157
- png_path = os.path.join(temp_dir, 'test_image.png')
158
- # Create a simple image with text
159
- img = Image.new('RGB', (400, 200), color=(255, 255, 255))
160
- d = ImageDraw.Draw(img)
161
- text = 'This is a test PNG image'
162
- d.text((20, 80), text, fill=(0, 0, 0))
163
- img.save(png_path)
164
-
165
- # Copy the PNG to the sandbox
166
- sandbox_dir = config.workspace_mount_path_in_sandbox
167
- runtime.copy_to(png_path, sandbox_dir)
168
-
169
- # Verify the file exists in the sandbox
170
- action_cmd = CmdRunAction(command='ls -alh')
171
- logger.info(action_cmd, extra={'msg_type': 'ACTION'})
172
- obs = runtime.run_action(action_cmd)
173
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
174
- assert isinstance(obs, CmdOutputObservation)
175
- assert obs.exit_code == 0
176
- assert 'test_image.png' in obs.content
177
-
178
- # Get server url
179
- action_cmd = CmdRunAction(command='cat /tmp/oh-server-url')
180
- logger.info(action_cmd, extra={'msg_type': 'ACTION'})
181
- obs = runtime.run_action(action_cmd)
182
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
183
- assert obs.exit_code == 0
184
- server_url = obs.content.strip()
185
-
186
- # Browse to the PNG file
187
- png_url = f'{server_url}/view?path=/workspace/test_image.png'
188
- action_browse = BrowseInteractiveAction(browser_actions=f'goto("{png_url}")')
189
- logger.info(action_browse, extra={'msg_type': 'ACTION'})
190
- obs = runtime.run_action(action_browse)
191
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
192
-
193
- # Verify the browser observation
194
- assert isinstance(obs, BrowserOutputObservation)
195
- observation_text = str(obs)
196
- assert '[Action executed successfully.]' in observation_text
197
- assert 'File Viewer - test_image.png' in observation_text
198
- assert (
199
- 'Screenshot saved to: /workspace/.browser_screenshots/screenshot_'
200
- in observation_text
201
- )
202
-
203
- # Check the /workspace/.browser_screenshots folder
204
- action_cmd = CmdRunAction(command='ls /workspace/.browser_screenshots')
205
- logger.info(action_cmd, extra={'msg_type': 'ACTION'})
206
- obs = runtime.run_action(action_cmd)
207
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
208
- assert isinstance(obs, CmdOutputObservation)
209
- assert obs.exit_code == 0
210
- assert 'screenshot_' in obs.content
211
- assert '.png' in obs.content
212
- finally:
213
- _close_test_runtime(runtime)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/runtime/test_docker_images.py DELETED
@@ -1,96 +0,0 @@
1
- """Image-related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
2
-
3
- import os
4
-
5
- import pytest
6
- from conftest import _close_test_runtime, _load_runtime
7
-
8
- from openhands.core.logger import openhands_logger as logger
9
- from openhands.events.action import CmdRunAction
10
-
11
- # ============================================================================================================================
12
- # Image-specific tests
13
- # ============================================================================================================================
14
-
15
- # Skip all tests in this file if running with CLIRuntime or LocalRuntime,
16
- # as these tests are specific to Docker images.
17
- pytestmark = pytest.mark.skipif(
18
- os.environ.get('TEST_RUNTIME') in ['cli', 'local'],
19
- reason='Image tests are specific to DockerRuntime and not applicable to CLIRuntime or LocalRuntime.',
20
- )
21
-
22
-
23
- def test_bash_python_version(temp_dir, runtime_cls, base_container_image):
24
- """Make sure Python is available in bash."""
25
- if base_container_image not in [
26
- 'python:3.12-bookworm',
27
- ]:
28
- pytest.skip('This test is only for python-related images')
29
-
30
- runtime, config = _load_runtime(
31
- temp_dir, runtime_cls, base_container_image=base_container_image
32
- )
33
-
34
- action = CmdRunAction(command='which python')
35
- logger.info(action, extra={'msg_type': 'ACTION'})
36
- obs = runtime.run_action(action)
37
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
38
- assert obs.exit_code == 0
39
-
40
- action = CmdRunAction(command='python --version')
41
- logger.info(action, extra={'msg_type': 'ACTION'})
42
- obs = runtime.run_action(action)
43
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
44
- assert obs.exit_code == 0
45
- assert 'Python 3.12' in obs.content # Check for specific version
46
-
47
- action = CmdRunAction(command='pip --version')
48
- logger.info(action, extra={'msg_type': 'ACTION'})
49
- obs = runtime.run_action(action)
50
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
51
- assert obs.exit_code == 0
52
- assert 'pip' in obs.content # Check that pip is available
53
-
54
- _close_test_runtime(runtime)
55
-
56
-
57
- def test_nodejs_22_version(temp_dir, runtime_cls, base_container_image):
58
- """Make sure Node.js is available in bash."""
59
- if base_container_image not in [
60
- 'node:22-bookworm',
61
- ]:
62
- pytest.skip('This test is only for nodejs-related images')
63
-
64
- runtime, config = _load_runtime(
65
- temp_dir, runtime_cls, base_container_image=base_container_image
66
- )
67
-
68
- action = CmdRunAction(command='node --version')
69
- logger.info(action, extra={'msg_type': 'ACTION'})
70
- obs = runtime.run_action(action)
71
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
72
- assert obs.exit_code == 0
73
- assert 'v22' in obs.content # Check for specific version
74
-
75
- _close_test_runtime(runtime)
76
-
77
-
78
- def test_go_version(temp_dir, runtime_cls, base_container_image):
79
- """Make sure Go is available in bash."""
80
- if base_container_image not in [
81
- 'golang:1.23-bookworm',
82
- ]:
83
- pytest.skip('This test is only for go-related images')
84
-
85
- runtime, config = _load_runtime(
86
- temp_dir, runtime_cls, base_container_image=base_container_image
87
- )
88
-
89
- action = CmdRunAction(command='go version')
90
- logger.info(action, extra={'msg_type': 'ACTION'})
91
- obs = runtime.run_action(action)
92
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
93
- assert obs.exit_code == 0
94
- assert 'go1.23' in obs.content # Check for specific version
95
-
96
- _close_test_runtime(runtime)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/runtime/test_env_vars.py DELETED
@@ -1,120 +0,0 @@
1
- """Env vars related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
2
-
3
- import os
4
- from unittest.mock import patch
5
-
6
- import pytest
7
- from conftest import _close_test_runtime, _load_runtime
8
-
9
- from openhands.events.action import CmdRunAction
10
- from openhands.events.observation import CmdOutputObservation
11
-
12
- # ============================================================================================================================
13
- # Environment variables tests
14
- # ============================================================================================================================
15
-
16
-
17
- def test_env_vars_os_environ(temp_dir, runtime_cls, run_as_openhands):
18
- with patch.dict(os.environ, {'SANDBOX_ENV_FOOBAR': 'BAZ'}):
19
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
20
-
21
- obs: CmdOutputObservation = runtime.run_action(CmdRunAction(command='env'))
22
- print(obs)
23
-
24
- obs: CmdOutputObservation = runtime.run_action(
25
- CmdRunAction(command='echo $FOOBAR')
26
- )
27
- print(obs)
28
- assert obs.exit_code == 0, 'The exit code should be 0.'
29
- assert obs.content.strip().split('\n\r')[0].strip() == 'BAZ', (
30
- f'Output: [{obs.content}] for {runtime_cls}'
31
- )
32
-
33
- _close_test_runtime(runtime)
34
-
35
-
36
- def test_env_vars_runtime_operations(temp_dir, runtime_cls):
37
- runtime, config = _load_runtime(temp_dir, runtime_cls)
38
-
39
- # Test adding single env var
40
- runtime.add_env_vars({'QUUX': 'abc"def'})
41
- obs = runtime.run_action(CmdRunAction(command='echo $QUUX'))
42
- assert (
43
- obs.exit_code == 0 and obs.content.strip().split('\r\n')[0].strip() == 'abc"def'
44
- )
45
-
46
- # Test adding multiple env vars
47
- runtime.add_env_vars({'FOOBAR': 'xyz'})
48
- obs = runtime.run_action(CmdRunAction(command='echo $QUUX $FOOBAR'))
49
- assert (
50
- obs.exit_code == 0
51
- and obs.content.strip().split('\r\n')[0].strip() == 'abc"def xyz'
52
- )
53
-
54
- # Test adding empty dict
55
- prev_env = runtime.run_action(CmdRunAction(command='env')).content
56
- runtime.add_env_vars({})
57
- current_env = runtime.run_action(CmdRunAction(command='env')).content
58
- assert prev_env == current_env
59
-
60
- # Test overwriting env vars
61
- runtime.add_env_vars({'QUUX': 'new_value'})
62
- obs = runtime.run_action(CmdRunAction(command='echo $QUUX'))
63
- assert (
64
- obs.exit_code == 0
65
- and obs.content.strip().split('\r\n')[0].strip() == 'new_value'
66
- )
67
-
68
- _close_test_runtime(runtime)
69
-
70
-
71
- def test_env_vars_added_by_config(temp_dir, runtime_cls):
72
- runtime, config = _load_runtime(
73
- temp_dir,
74
- runtime_cls,
75
- runtime_startup_env_vars={'ADDED_ENV_VAR': 'added_value'},
76
- )
77
-
78
- # Test adding single env var
79
- obs = runtime.run_action(CmdRunAction(command='echo $ADDED_ENV_VAR'))
80
- assert (
81
- obs.exit_code == 0
82
- and obs.content.strip().split('\r\n')[0].strip() == 'added_value'
83
- )
84
- _close_test_runtime(runtime)
85
-
86
-
87
- @pytest.mark.skipif(
88
- os.environ.get('TEST_RUNTIME') in ['cli', 'local'],
89
- reason='This test is specific to DockerRuntime and its pause/resume persistence',
90
- )
91
- def test_docker_runtime_env_vars_persist_after_restart(temp_dir):
92
- from openhands.runtime.impl.docker.docker_runtime import DockerRuntime
93
-
94
- runtime, config = _load_runtime(temp_dir, DockerRuntime)
95
-
96
- # Add a test environment variable
97
- runtime.add_env_vars({'GITHUB_TOKEN': 'test_token'})
98
-
99
- # Verify the variable is set in current session
100
- obs = runtime.run_action(CmdRunAction(command='echo $GITHUB_TOKEN'))
101
- assert obs.exit_code == 0
102
- assert obs.content.strip().split('\r\n')[0].strip() == 'test_token'
103
-
104
- # Verify the variable is added to .bashrc
105
- obs = runtime.run_action(
106
- CmdRunAction(command='grep "^export GITHUB_TOKEN=" ~/.bashrc')
107
- )
108
- assert obs.exit_code == 0
109
- assert 'export GITHUB_TOKEN=' in obs.content
110
-
111
- # Test pause/resume cycle
112
- runtime.pause()
113
- runtime.resume()
114
-
115
- # Verify the variable persists after restart
116
- obs = runtime.run_action(CmdRunAction(command='echo $GITHUB_TOKEN'))
117
- assert obs.exit_code == 0
118
- assert obs.content.strip().split('\r\n')[0].strip() == 'test_token'
119
-
120
- _close_test_runtime(runtime)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/runtime/test_glob_and_grep.py DELETED
@@ -1,303 +0,0 @@
1
- """Tests for the command helper functions in function_calling.py."""
2
-
3
- import os
4
-
5
- import pytest
6
- from conftest import (
7
- _close_test_runtime,
8
- _load_runtime,
9
- )
10
-
11
- from openhands.agenthub.readonly_agent.function_calling import (
12
- glob_to_cmdrun,
13
- grep_to_cmdrun,
14
- )
15
- from openhands.core.logger import openhands_logger as logger
16
- from openhands.events.action import CmdRunAction
17
- from openhands.events.observation import CmdOutputObservation, ErrorObservation
18
-
19
- # Skip all tests in this file if running with CLIRuntime,
20
- # as they depend on `rg` (ripgrep) which is not guaranteed to be available.
21
- # The underlying ReadOnlyAgent tools (GrepTool, GlobTool) also currently depend on `rg`.
22
- # TODO: implement a fallback version of these tools that uses `find` and `grep`.
23
- pytestmark = pytest.mark.skipif(
24
- os.environ.get('TEST_RUNTIME') == 'cli',
25
- reason="CLIRuntime: ReadOnlyAgent's GrepTool/GlobTool tests require `rg` (ripgrep), which may not be installed.",
26
- )
27
-
28
-
29
- def _run_cmd_action(runtime, custom_command: str):
30
- action = CmdRunAction(command=custom_command)
31
- logger.info(action, extra={'msg_type': 'ACTION'})
32
- obs = runtime.run_action(action)
33
- assert isinstance(obs, (CmdOutputObservation, ErrorObservation))
34
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
35
- return obs
36
-
37
-
38
- def test_grep_to_cmdrun_basic():
39
- """Test basic pattern with no special characters."""
40
- cmd = grep_to_cmdrun('function', 'src')
41
- assert 'rg -li function' in cmd
42
- assert 'Below are the execution results' in cmd
43
-
44
- # With include parameter
45
- cmd = grep_to_cmdrun('error', 'src', '*.js')
46
- assert 'rg -li error' in cmd
47
- assert "--glob '*.js'" in cmd
48
- assert 'Below are the execution results' in cmd
49
-
50
-
51
- def test_grep_to_cmdrun_quotes(temp_dir, runtime_cls, run_as_openhands):
52
- """Test patterns with different types of quotes."""
53
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
54
- try:
55
- # Double quotes in pattern
56
- cmd = grep_to_cmdrun(r'const message = "Hello"', '/workspace')
57
- assert 'rg -li' in cmd
58
-
59
- # Verify command works by executing it on a test file
60
- setup_cmd = 'echo \'const message = "Hello";\' > /workspace/test_quotes.js'
61
- obs = _run_cmd_action(runtime, setup_cmd)
62
- assert obs.exit_code == 0
63
-
64
- obs = _run_cmd_action(runtime, cmd)
65
- assert obs.exit_code == 0
66
- assert '/workspace/test_quotes.js' in obs.content
67
-
68
- # Single quotes in pattern
69
- cmd = grep_to_cmdrun("function\\('test'\\)", '/workspace')
70
- assert 'rg -li' in cmd
71
-
72
- setup_cmd = 'echo "function(\'test\') {}" > /workspace/test_quotes2.js'
73
- obs = _run_cmd_action(runtime, setup_cmd)
74
- assert obs.exit_code == 0
75
-
76
- obs = _run_cmd_action(runtime, cmd)
77
- assert obs.exit_code == 0
78
- assert '/workspace/test_quotes2.js' in obs.content
79
- finally:
80
- _close_test_runtime(runtime)
81
-
82
-
83
- def test_grep_to_cmdrun_special_chars(runtime_cls, run_as_openhands, temp_dir):
84
- """Test patterns with special shell characters."""
85
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
86
- try:
87
- # Create test directory and files with special pattern content
88
- setup_cmd = """
89
- mkdir -p /workspace/test_special_patterns && \
90
- echo "testing x && y || z pattern" > /workspace/test_special_patterns/logical.txt && \
91
- echo "function() { return x; }" > /workspace/test_special_patterns/function.txt && \
92
- echo "using \\$variable here" > /workspace/test_special_patterns/dollar.txt && \
93
- echo "using \\`backticks\\` here" > /workspace/test_special_patterns/backticks.txt && \
94
- echo "line with \\n newline chars" > /workspace/test_special_patterns/newline.txt && \
95
- echo "matching *.js wildcard" > /workspace/test_special_patterns/wildcard.txt && \
96
- echo "testing x > y redirection" > /workspace/test_special_patterns/redirect.txt && \
97
- echo "testing a | b pipe" > /workspace/test_special_patterns/pipe.txt && \
98
- echo "line with #comment" > /workspace/test_special_patterns/comment.txt && \
99
- echo "CSS \\!important rule" > /workspace/test_special_patterns/bang.txt
100
- """
101
- obs = _run_cmd_action(runtime, setup_cmd)
102
- assert obs.exit_code == 0, 'Failed to set up test files'
103
-
104
- special_patterns = [
105
- r'x && y \|\| z', # Shell logical operators (escaping pipe)
106
- r'function\(\) \{ return x; \}', # Properly escaped braces and parentheses
107
- r'\$variable', # Dollar sign
108
- # r"`backticks`", # Backticks
109
- r'\\n newline', # Escaped characters
110
- r'\*\.js', # Wildcards (escaped)
111
- r'x > y', # Redirection
112
- r'a \| b', # Pipe (escaped)
113
- r'#comment', # Hash
114
- # r"!important", # Bang
115
- ]
116
-
117
- for pattern in special_patterns:
118
- # Generate the grep command using our helper function
119
- cmd = grep_to_cmdrun(pattern, '/workspace/test_special_patterns')
120
- assert 'rg -li' in cmd
121
- assert 'Below are the execution results of the search command:' in cmd
122
-
123
- # Execute the command
124
- obs = _run_cmd_action(runtime, cmd)
125
-
126
- # Verify the command executed successfully
127
- assert 'command not found' not in obs.content
128
- assert 'syntax error' not in obs.content
129
- assert 'unexpected' not in obs.content
130
-
131
- # Check that the pattern was found in the appropriate file
132
- if '&&' in pattern:
133
- assert 'logical.txt' in obs.content
134
- elif 'function' in pattern:
135
- assert 'function.txt' in obs.content
136
- elif '$variable' in pattern:
137
- assert 'dollar.txt' in obs.content
138
- # elif "backticks" in pattern:
139
- # assert "backticks.txt" in obs.content
140
- elif '\\n newline' in pattern:
141
- assert 'newline.txt' in obs.content
142
- elif '*' in pattern:
143
- assert 'wildcard.txt' in obs.content
144
- elif '>' in pattern:
145
- assert 'redirect.txt' in obs.content
146
- elif '|' in pattern:
147
- assert 'pipe.txt' in obs.content
148
- elif '#comment' in pattern:
149
- assert 'comment.txt' in obs.content
150
- # elif "!important" in pattern:
151
- # assert "bang.txt" in obs.content
152
- finally:
153
- _close_test_runtime(runtime)
154
-
155
-
156
- def test_grep_to_cmdrun_paths_with_spaces(runtime_cls, run_as_openhands, temp_dir):
157
- """Test paths with spaces and special characters."""
158
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
159
- try:
160
- # Create test files with content in paths with spaces
161
- setup_cmd = """
162
- mkdir -p "src/my project" "test files/unit tests" "src/special$chars" "path with spaces and $pecial ch@rs" && \
163
- echo "function searchablePattern() { return true; }" > "src/my project/test.js" && \
164
- echo "function testFunction() { return 42; }" > "test files/unit tests/test.js" && \
165
- echo "function specialFunction() { return null; }" > "src/special$chars/test.js" && \
166
- echo "function weirdFunction() { return []; }" > "path with spaces and $pecial ch@rs/test.js"
167
- """
168
- obs = _run_cmd_action(runtime, setup_cmd)
169
- assert obs.exit_code == 0, 'Failed to set up test files'
170
-
171
- special_paths = [
172
- 'src/my project',
173
- 'test files/unit tests',
174
- ]
175
-
176
- for path in special_paths:
177
- # Generate grep command and execute it
178
- cmd = grep_to_cmdrun('function', path)
179
- assert 'rg -li' in cmd
180
-
181
- obs = _run_cmd_action(runtime, cmd)
182
- assert obs.exit_code == 0, f'Grep command failed for path: {path}'
183
- assert 'function' in obs.content, (
184
- f'Expected pattern not found in output for path: {path}'
185
- )
186
-
187
- # Verify the actual file was found
188
- if path == 'src/my project':
189
- assert 'src/my project/test.js' in obs.content
190
- elif path == 'test files/unit tests':
191
- assert 'test files/unit tests/test.js' in obs.content
192
- finally:
193
- _close_test_runtime(runtime)
194
-
195
-
196
- def test_glob_to_cmdrun_basic():
197
- """Test basic glob patterns."""
198
- cmd = glob_to_cmdrun('*.js', 'src')
199
- assert "rg --files src -g '*.js'" in cmd
200
- assert 'head -n 100' in cmd
201
- assert 'echo "Below are the execution results of the glob command:' in cmd
202
-
203
- # Default path
204
- cmd = glob_to_cmdrun('*.py')
205
- assert "rg --files . -g '*.py'" in cmd
206
- assert 'head -n 100' in cmd
207
- assert 'echo "Below are the execution results of the glob command:' in cmd
208
-
209
-
210
- def test_glob_to_cmdrun_special_patterns(runtime_cls, run_as_openhands, temp_dir):
211
- """Test glob patterns with special characters."""
212
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
213
- try:
214
- # Create test files matching the patterns we'll test
215
- setup_cmd = r"""
216
- mkdir -p src/components src/utils && \
217
- touch src/file1.js src/file2.js src/file9.js && \
218
- touch src/components/comp.jsx src/components/comp.tsx && \
219
- touch src/$special-file.js && \
220
- touch src/temp1.js src/temp2.js && \
221
- touch src/file.js src/file.ts src/file.jsx && \
222
- touch "src/weird\`file\`.js" && \
223
- touch "src/file with spaces.js"
224
- """
225
- obs = _run_cmd_action(runtime, setup_cmd)
226
- assert obs.exit_code == 0, 'Failed to set up test files'
227
-
228
- special_patterns = [
229
- '**/*.js', # Double glob
230
- '**/{*.jsx,*.tsx}', # Braces
231
- 'file[0-9].js', # Character class
232
- 'temp?.js', # Single character wildcard
233
- 'file.{js,ts,jsx}', # Multiple extensions
234
- 'file with spaces.js', # Spaces
235
- ]
236
-
237
- for pattern in special_patterns:
238
- cmd = glob_to_cmdrun(pattern, 'src')
239
- logger.info(f'Command: {cmd}')
240
- # Execute the command
241
- obs = _run_cmd_action(runtime, cmd)
242
- assert obs.exit_code == 0, f'Glob command failed for pattern: {pattern}'
243
-
244
- # Verify expected files are found
245
- if pattern == '**/*.js':
246
- assert 'file1.js' in obs.content
247
- assert 'file2.js' in obs.content
248
- elif pattern == '**/{*.jsx,*.tsx}':
249
- assert 'comp.jsx' in obs.content
250
- assert 'comp.tsx' in obs.content
251
- elif pattern == 'file[0-9].js':
252
- assert 'file1.js' in obs.content
253
- assert 'file2.js' in obs.content
254
- assert 'file9.js' in obs.content
255
- elif pattern == 'temp?.js':
256
- assert 'temp1.js' in obs.content
257
- assert 'temp2.js' in obs.content
258
- elif pattern == 'file.{js,ts,jsx}':
259
- assert 'file.js' in obs.content
260
- assert 'file.ts' in obs.content
261
- assert 'file.jsx' in obs.content
262
- elif pattern == 'file with spaces.js':
263
- assert 'file with spaces.js' in obs.content
264
- finally:
265
- _close_test_runtime(runtime)
266
-
267
-
268
- def test_glob_to_cmdrun_paths_with_spaces(runtime_cls, run_as_openhands, temp_dir):
269
- """Test paths with spaces and special characters for glob command."""
270
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
271
- try:
272
- # Create test directories with spaces and special characters
273
- setup_cmd = """
274
- mkdir -p "project files/src" "test results/unit tests" "weird$path/code" "path with spaces and $pecial ch@rs" && \
275
- touch "project files/src/file1.js" "project files/src/file2.js" && \
276
- touch "test results/unit tests/test1.js" "test results/unit tests/test2.js" && \
277
- touch "weird$path/code/weird1.js" "weird$path/code/weird2.js" && \
278
- touch "path with spaces and $pecial ch@rs/special1.js" "path with spaces and $pecial ch@rs/special2.js"
279
- """
280
- obs = _run_cmd_action(runtime, setup_cmd)
281
- assert obs.exit_code == 0, 'Failed to set up test files'
282
-
283
- special_paths = [
284
- 'project files/src',
285
- 'test results/unit tests',
286
- ]
287
-
288
- for path in special_paths:
289
- cmd = glob_to_cmdrun('*.js', path)
290
-
291
- # Execute the command
292
- obs = _run_cmd_action(runtime, cmd)
293
- assert obs.exit_code == 0, f'Glob command failed for path: {path}'
294
-
295
- # Verify expected files are found in each path
296
- if path == 'project files/src':
297
- assert 'file1.js' in obs.content
298
- assert 'file2.js' in obs.content
299
- elif path == 'test results/unit tests':
300
- assert 'test1.js' in obs.content
301
- assert 'test2.js' in obs.content
302
- finally:
303
- _close_test_runtime(runtime)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/runtime/test_ipython.py DELETED
@@ -1,382 +0,0 @@
1
- """Test the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
2
-
3
- import os
4
-
5
- import pytest
6
- from conftest import (
7
- TEST_IN_CI,
8
- _close_test_runtime,
9
- _load_runtime,
10
- )
11
-
12
- from openhands.core.logger import openhands_logger as logger
13
- from openhands.events.action import (
14
- CmdRunAction,
15
- FileReadAction,
16
- FileWriteAction,
17
- IPythonRunCellAction,
18
- )
19
- from openhands.events.observation import (
20
- CmdOutputObservation,
21
- ErrorObservation,
22
- FileReadObservation,
23
- FileWriteObservation,
24
- IPythonRunCellObservation,
25
- )
26
-
27
- # ============================================================================================================================
28
- # ipython-specific tests
29
- # ============================================================================================================================
30
-
31
-
32
- @pytest.mark.skipif(
33
- os.environ.get('TEST_RUNTIME') == 'cli',
34
- reason='CLIRuntime does not support full IPython/Jupyter kernel features or return IPythonRunCellObservation',
35
- )
36
- def test_simple_cmd_ipython_and_fileop(temp_dir, runtime_cls, run_as_openhands):
37
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
38
-
39
- # Test run command
40
- action_cmd = CmdRunAction(command='ls -l')
41
- logger.info(action_cmd, extra={'msg_type': 'ACTION'})
42
- obs = runtime.run_action(action_cmd)
43
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
44
-
45
- assert isinstance(obs, CmdOutputObservation)
46
- assert obs.exit_code == 0
47
- assert 'total 0' in obs.content
48
-
49
- # Test run ipython
50
- test_code = "print('Hello, `World`!\\n')"
51
- action_ipython = IPythonRunCellAction(code=test_code)
52
- logger.info(action_ipython, extra={'msg_type': 'ACTION'})
53
- obs = runtime.run_action(action_ipython)
54
- assert isinstance(obs, IPythonRunCellObservation)
55
-
56
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
57
- assert obs.content.strip() == (
58
- 'Hello, `World`!\n'
59
- '[Jupyter current working directory: /workspace]\n'
60
- '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.12/bin/python]'
61
- )
62
-
63
- # Test read file (file should not exist)
64
- action_read = FileReadAction(path='hello.sh')
65
- logger.info(action_read, extra={'msg_type': 'ACTION'})
66
- obs = runtime.run_action(action_read)
67
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
68
- assert isinstance(obs, ErrorObservation)
69
- assert 'File not found' in obs.content
70
-
71
- # Test write file
72
- action_write = FileWriteAction(content='echo "Hello, World!"', path='hello.sh')
73
- logger.info(action_write, extra={'msg_type': 'ACTION'})
74
- obs = runtime.run_action(action_write)
75
- assert isinstance(obs, FileWriteObservation)
76
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
77
-
78
- assert obs.content == ''
79
- # event stream runtime will always use absolute path
80
- assert obs.path == '/workspace/hello.sh'
81
-
82
- # Test read file (file should exist)
83
- action_read = FileReadAction(path='hello.sh')
84
- logger.info(action_read, extra={'msg_type': 'ACTION'})
85
- obs = runtime.run_action(action_read)
86
- assert isinstance(obs, FileReadObservation), (
87
- 'The observation should be a FileReadObservation.'
88
- )
89
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
90
-
91
- assert obs.content == 'echo "Hello, World!"\n'
92
- assert obs.path == '/workspace/hello.sh'
93
-
94
- # clean up
95
- action = CmdRunAction(command='rm -rf hello.sh')
96
- logger.info(action, extra={'msg_type': 'ACTION'})
97
- obs = runtime.run_action(action)
98
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
99
- assert obs.exit_code == 0
100
-
101
- _close_test_runtime(runtime)
102
-
103
-
104
- @pytest.mark.skipif(
105
- TEST_IN_CI != 'True',
106
- reason='This test is not working in WSL (file ownership)',
107
- )
108
- @pytest.mark.skipif(
109
- os.environ.get('TEST_RUNTIME') == 'cli',
110
- reason='CLIRuntime does not support full IPython/Jupyter kernel features or return IPythonRunCellObservation',
111
- )
112
- def test_ipython_multi_user(temp_dir, runtime_cls, run_as_openhands):
113
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
114
-
115
- # Test run ipython
116
- # get username
117
- test_code = "import os; print(os.environ['USER'])"
118
- action_ipython = IPythonRunCellAction(code=test_code)
119
- logger.info(action_ipython, extra={'msg_type': 'ACTION'})
120
- obs = runtime.run_action(action_ipython)
121
- assert isinstance(obs, IPythonRunCellObservation)
122
-
123
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
124
- if run_as_openhands:
125
- assert 'openhands' in obs.content
126
- else:
127
- assert 'root' in obs.content
128
-
129
- # print the current working directory
130
- test_code = 'import os; print(os.getcwd())'
131
- action_ipython = IPythonRunCellAction(code=test_code)
132
- logger.info(action_ipython, extra={'msg_type': 'ACTION'})
133
- obs = runtime.run_action(action_ipython)
134
- assert isinstance(obs, IPythonRunCellObservation)
135
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
136
- assert (
137
- obs.content.strip()
138
- == (
139
- '/workspace\n'
140
- '[Jupyter current working directory: /workspace]\n'
141
- '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.12/bin/python]'
142
- ).strip()
143
- )
144
-
145
- # write a file
146
- test_code = "with open('test.txt', 'w') as f: f.write('Hello, world!')"
147
- action_ipython = IPythonRunCellAction(code=test_code)
148
- logger.info(action_ipython, extra={'msg_type': 'ACTION'})
149
- obs = runtime.run_action(action_ipython)
150
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
151
- assert isinstance(obs, IPythonRunCellObservation)
152
- assert (
153
- obs.content.strip()
154
- == (
155
- '[Code executed successfully with no output]\n'
156
- '[Jupyter current working directory: /workspace]\n'
157
- '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.12/bin/python]'
158
- ).strip()
159
- )
160
-
161
- # check file owner via bash
162
- action = CmdRunAction(command='ls -alh test.txt')
163
- logger.info(action, extra={'msg_type': 'ACTION'})
164
- obs = runtime.run_action(action)
165
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
166
- assert obs.exit_code == 0
167
- if run_as_openhands:
168
- # -rw-r--r-- 1 openhands root 13 Jul 28 03:53 test.txt
169
- assert 'openhands' in obs.content.split('\r\n')[0]
170
- else:
171
- # -rw-r--r-- 1 root root 13 Jul 28 03:53 test.txt
172
- assert 'root' in obs.content.split('\r\n')[0]
173
-
174
- # clean up
175
- action = CmdRunAction(command='rm -rf test')
176
- logger.info(action, extra={'msg_type': 'ACTION'})
177
- obs = runtime.run_action(action)
178
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
179
- assert obs.exit_code == 0
180
-
181
- _close_test_runtime(runtime)
182
-
183
-
184
- @pytest.mark.skipif(
185
- os.environ.get('TEST_RUNTIME') == 'cli',
186
- reason='CLIRuntime does not support full IPython/Jupyter kernel features or return IPythonRunCellObservation',
187
- )
188
- def test_ipython_simple(temp_dir, runtime_cls):
189
- runtime, config = _load_runtime(temp_dir, runtime_cls)
190
-
191
- # Test run ipython
192
- # get username
193
- test_code = 'print(1)'
194
- action_ipython = IPythonRunCellAction(code=test_code)
195
- logger.info(action_ipython, extra={'msg_type': 'ACTION'})
196
- obs = runtime.run_action(action_ipython)
197
- assert isinstance(obs, IPythonRunCellObservation)
198
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
199
- assert (
200
- obs.content.strip()
201
- == (
202
- '1\n'
203
- '[Jupyter current working directory: /workspace]\n'
204
- '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.12/bin/python]'
205
- ).strip()
206
- )
207
-
208
- _close_test_runtime(runtime)
209
-
210
-
211
- @pytest.mark.skipif(
212
- os.environ.get('TEST_RUNTIME') == 'cli',
213
- reason='CLIRuntime does not support full IPython/Jupyter kernel features or return IPythonRunCellObservation',
214
- )
215
- def test_ipython_chdir(temp_dir, runtime_cls):
216
- """Test that os.chdir correctly handles paths with slashes."""
217
- runtime, config = _load_runtime(temp_dir, runtime_cls)
218
-
219
- # Create a test directory and get its absolute path
220
- test_code = """
221
- import os
222
- os.makedirs('test_dir', exist_ok=True)
223
- abs_path = os.path.abspath('test_dir')
224
- print(abs_path)
225
- """
226
- action_ipython = IPythonRunCellAction(code=test_code)
227
- logger.info(action_ipython, extra={'msg_type': 'ACTION'})
228
- obs = runtime.run_action(action_ipython)
229
- assert isinstance(obs, IPythonRunCellObservation)
230
- test_dir_path = obs.content.split('\n')[0].strip()
231
- logger.info(f'test_dir_path: {test_dir_path}')
232
- assert test_dir_path # Verify we got a valid path
233
-
234
- # Change to the test directory using its absolute path
235
- test_code = f"""
236
- import os
237
- os.chdir(r'{test_dir_path}')
238
- print(os.getcwd())
239
- """
240
- action_ipython = IPythonRunCellAction(code=test_code)
241
- logger.info(action_ipython, extra={'msg_type': 'ACTION'})
242
- obs = runtime.run_action(action_ipython)
243
- assert isinstance(obs, IPythonRunCellObservation)
244
- current_dir = obs.content.split('\n')[0].strip()
245
- assert current_dir == test_dir_path # Verify we changed to the correct directory
246
-
247
- # Clean up
248
- test_code = """
249
- import os
250
- import shutil
251
- shutil.rmtree('test_dir', ignore_errors=True)
252
- """
253
- action_ipython = IPythonRunCellAction(code=test_code)
254
- logger.info(action_ipython, extra={'msg_type': 'ACTION'})
255
- obs = runtime.run_action(action_ipython)
256
- assert isinstance(obs, IPythonRunCellObservation)
257
-
258
- _close_test_runtime(runtime)
259
-
260
-
261
- @pytest.mark.skipif(
262
- os.environ.get('TEST_RUNTIME') == 'cli',
263
- reason='CLIRuntime does not support IPython magics like %pip or return IPythonRunCellObservation',
264
- )
265
- def test_ipython_package_install(temp_dir, runtime_cls, run_as_openhands):
266
- """Make sure that cd in bash also update the current working directory in ipython."""
267
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
268
-
269
- # It should error out since pymsgbox is not installed
270
- action = IPythonRunCellAction(code='import pymsgbox')
271
- logger.info(action, extra={'msg_type': 'ACTION'})
272
- obs = runtime.run_action(action)
273
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
274
- assert "ModuleNotFoundError: No module named 'pymsgbox'" in obs.content
275
-
276
- # Install pymsgbox in Jupyter
277
- action = IPythonRunCellAction(code='%pip install pymsgbox==1.0.9')
278
- logger.info(action, extra={'msg_type': 'ACTION'})
279
- obs = runtime.run_action(action)
280
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
281
- assert (
282
- 'Successfully installed pymsgbox-1.0.9' in obs.content
283
- or '[Package installed successfully]' in obs.content
284
- )
285
-
286
- action = IPythonRunCellAction(code='import pymsgbox')
287
- logger.info(action, extra={'msg_type': 'ACTION'})
288
- obs = runtime.run_action(action)
289
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
290
- # import should not error out
291
- assert obs.content.strip() == (
292
- '[Code executed successfully with no output]\n'
293
- '[Jupyter current working directory: /workspace]\n'
294
- '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.12/bin/python]'
295
- )
296
-
297
- _close_test_runtime(runtime)
298
-
299
-
300
- @pytest.mark.skipif(
301
- os.environ.get('TEST_RUNTIME') == 'cli',
302
- reason='CLIRuntime does not support sudo with password prompts if the user has not enabled passwordless sudo',
303
- )
304
- def test_ipython_file_editor_permissions_as_openhands(temp_dir, runtime_cls):
305
- """Test file editor permission behavior when running as different users."""
306
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands=True)
307
-
308
- # Create a file owned by root with restricted permissions
309
- action = CmdRunAction(
310
- command='sudo touch /root/test.txt && sudo chmod 600 /root/test.txt'
311
- )
312
- logger.info(action, extra={'msg_type': 'ACTION'})
313
- obs = runtime.run_action(action)
314
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
315
- assert obs.exit_code == 0
316
-
317
- # Try to view the file as openhands user - should fail with permission denied
318
- test_code = "print(file_editor(command='view', path='/root/test.txt'))"
319
- action = IPythonRunCellAction(code=test_code)
320
- logger.info(action, extra={'msg_type': 'ACTION'})
321
- obs = runtime.run_action(action)
322
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
323
- assert 'Permission denied' in obs.content
324
-
325
- # Try to edit the file as openhands user - should fail with permission denied
326
- test_code = "print(file_editor(command='str_replace', path='/root/test.txt', old_str='', new_str='test'))"
327
- action = IPythonRunCellAction(code=test_code)
328
- logger.info(action, extra={'msg_type': 'ACTION'})
329
- obs = runtime.run_action(action)
330
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
331
- assert 'Permission denied' in obs.content
332
-
333
- # Try to create a file in root directory - should fail with permission denied
334
- test_code = (
335
- "print(file_editor(command='create', path='/root/new.txt', file_text='test'))"
336
- )
337
- action = IPythonRunCellAction(code=test_code)
338
- logger.info(action, extra={'msg_type': 'ACTION'})
339
- obs = runtime.run_action(action)
340
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
341
- assert 'Permission denied' in obs.content
342
-
343
- # Try to use file editor in openhands sandbox directory - should work
344
- test_code = """
345
- # Create file
346
- print(file_editor(command='create', path='/workspace/test.txt', file_text='Line 1\\nLine 2\\nLine 3'))
347
-
348
- # View file
349
- print(file_editor(command='view', path='/workspace/test.txt'))
350
-
351
- # Edit file
352
- print(file_editor(command='str_replace', path='/workspace/test.txt', old_str='Line 2', new_str='New Line 2'))
353
-
354
- # Undo edit
355
- print(file_editor(command='undo_edit', path='/workspace/test.txt'))
356
- """
357
- action = IPythonRunCellAction(code=test_code)
358
- logger.info(action, extra={'msg_type': 'ACTION'})
359
- obs = runtime.run_action(action)
360
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
361
- assert 'File created successfully' in obs.content
362
- assert 'Line 1' in obs.content
363
- assert 'Line 2' in obs.content
364
- assert 'Line 3' in obs.content
365
- assert 'New Line 2' in obs.content
366
- assert 'Last edit to' in obs.content
367
- assert 'undone successfully' in obs.content
368
-
369
- # Clean up
370
- action = CmdRunAction(command='rm -f /workspace/test.txt')
371
- logger.info(action, extra={'msg_type': 'ACTION'})
372
- obs = runtime.run_action(action)
373
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
374
- assert obs.exit_code == 0
375
-
376
- action = CmdRunAction(command='sudo rm -f /root/test.txt')
377
- logger.info(action, extra={'msg_type': 'ACTION'})
378
- obs = runtime.run_action(action)
379
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
380
- assert obs.exit_code == 0
381
-
382
- _close_test_runtime(runtime)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/runtime/test_llm_based_edit.py DELETED
@@ -1,413 +0,0 @@
1
- """Edit-related tests for the DockerRuntime."""
2
-
3
- import os
4
-
5
- import pytest
6
- from conftest import TEST_IN_CI, _close_test_runtime, _load_runtime
7
- from openhands_aci.utils.diff import get_diff
8
-
9
- from openhands.core.logger import openhands_logger as logger
10
- from openhands.events.action import FileEditAction, FileReadAction
11
- from openhands.events.observation import FileEditObservation
12
-
13
- ORGINAL = """from flask import Flask
14
- app = Flask(__name__)
15
-
16
- @app.route('/')
17
- def index():
18
- numbers = list(range(1, 11))
19
- return str(numbers)
20
-
21
- if __name__ == '__main__':
22
- app.run(port=5000)
23
- """
24
-
25
-
26
- @pytest.mark.skipif(
27
- TEST_IN_CI != 'True',
28
- reason='This test requires LLM to run.',
29
- )
30
- def test_edit_from_scratch(temp_dir, runtime_cls, run_as_openhands):
31
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
32
- try:
33
- action = FileEditAction(
34
- content=ORGINAL,
35
- start=-1,
36
- path=os.path.join('/workspace', 'app.py'),
37
- )
38
- logger.info(action, extra={'msg_type': 'ACTION'})
39
- obs = runtime.run_action(action)
40
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
41
-
42
- assert isinstance(obs, FileEditObservation), (
43
- 'The observation should be a FileEditObservation.'
44
- )
45
-
46
- action = FileReadAction(
47
- path=os.path.join('/workspace', 'app.py'),
48
- )
49
- obs = runtime.run_action(action)
50
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
51
- assert obs.content.strip() == ORGINAL.strip()
52
-
53
- finally:
54
- _close_test_runtime(runtime)
55
-
56
-
57
- EDIT = """# above stays the same
58
- @app.route('/')
59
- def index():
60
- numbers = list(range(1, 11))
61
- return '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
62
- # below stays the same
63
- """
64
-
65
-
66
- @pytest.mark.skipif(
67
- TEST_IN_CI != 'True',
68
- reason='This test requires LLM to run.',
69
- )
70
- def test_edit(temp_dir, runtime_cls, run_as_openhands):
71
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
72
- try:
73
- action = FileEditAction(
74
- content=ORGINAL,
75
- path=os.path.join('/workspace', 'app.py'),
76
- )
77
- logger.info(action, extra={'msg_type': 'ACTION'})
78
- obs = runtime.run_action(action)
79
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
80
-
81
- assert isinstance(obs, FileEditObservation), (
82
- 'The observation should be a FileEditObservation.'
83
- )
84
-
85
- action = FileReadAction(
86
- path=os.path.join('/workspace', 'app.py'),
87
- )
88
- obs = runtime.run_action(action)
89
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
90
- assert obs.content.strip() == ORGINAL.strip()
91
-
92
- action = FileEditAction(
93
- content=EDIT,
94
- path=os.path.join('/workspace', 'app.py'),
95
- )
96
- obs = runtime.run_action(action)
97
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
98
- assert (
99
- obs.content.strip()
100
- == (
101
- '--- /workspace/app.py\n'
102
- '+++ /workspace/app.py\n'
103
- '@@ -4,7 +4,7 @@\n'
104
- " @app.route('/')\n"
105
- ' def index():\n'
106
- ' numbers = list(range(1, 11))\n'
107
- '- return str(numbers)\n'
108
- "+ return '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'\n"
109
- '\n'
110
- " if __name__ == '__main__':\n"
111
- ' app.run(port=5000)\n'
112
- ).strip()
113
- )
114
- finally:
115
- _close_test_runtime(runtime)
116
-
117
-
118
- ORIGINAL_LONG = '\n'.join([f'This is line {i}' for i in range(1, 1000)])
119
- EDIT_LONG = """
120
- This is line 100 + 10
121
- This is line 101 + 10
122
- """
123
-
124
-
125
- @pytest.mark.skipif(
126
- TEST_IN_CI != 'True',
127
- reason='This test requires LLM to run.',
128
- )
129
- def test_edit_long_file(temp_dir, runtime_cls, run_as_openhands):
130
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
131
- try:
132
- action = FileEditAction(
133
- content=ORIGINAL_LONG,
134
- path=os.path.join('/workspace', 'app.py'),
135
- start=-1,
136
- )
137
- logger.info(action, extra={'msg_type': 'ACTION'})
138
- obs = runtime.run_action(action)
139
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
140
-
141
- assert isinstance(obs, FileEditObservation), (
142
- 'The observation should be a FileEditObservation.'
143
- )
144
-
145
- action = FileReadAction(
146
- path=os.path.join('/workspace', 'app.py'),
147
- )
148
- obs = runtime.run_action(action)
149
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
150
- assert obs.content.strip() == ORIGINAL_LONG.strip()
151
-
152
- action = FileEditAction(
153
- content=EDIT_LONG,
154
- path=os.path.join('/workspace', 'app.py'),
155
- start=100,
156
- end=200,
157
- )
158
- obs = runtime.run_action(action)
159
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
160
- assert (
161
- obs.content.strip()
162
- == (
163
- '--- /workspace/app.py\n'
164
- '+++ /workspace/app.py\n'
165
- '@@ -97,8 +97,8 @@\n'
166
- ' This is line 97\n'
167
- ' This is line 98\n'
168
- ' This is line 99\n'
169
- '-This is line 100\n'
170
- '-This is line 101\n'
171
- '+This is line 100 + 10\n'
172
- '+This is line 101 + 10\n'
173
- ' This is line 102\n'
174
- ' This is line 103\n'
175
- ' This is line 104\n'
176
- ).strip()
177
- )
178
- finally:
179
- _close_test_runtime(runtime)
180
-
181
-
182
- # ======================================================================================
183
- # Test FileEditObservation (things that are displayed to the agent)
184
- # ======================================================================================
185
-
186
-
187
- def test_edit_obs_insert_only():
188
- EDIT_LONG_INSERT_ONLY = (
189
- '\n'.join([f'This is line {i}' for i in range(1, 100)])
190
- + EDIT_LONG
191
- + '\n'.join([f'This is line {i}' for i in range(100, 1000)])
192
- )
193
-
194
- diff = get_diff(ORIGINAL_LONG, EDIT_LONG_INSERT_ONLY, '/workspace/app.py')
195
- obs = FileEditObservation(
196
- content=diff,
197
- path='/workspace/app.py',
198
- prev_exist=True,
199
- old_content=ORIGINAL_LONG,
200
- new_content=EDIT_LONG_INSERT_ONLY,
201
- )
202
- assert (
203
- str(obs).strip()
204
- == """
205
- [Existing file /workspace/app.py is edited with 1 changes.]
206
- [begin of edit 1 / 1]
207
- (content before edit)
208
- 98|This is line 98
209
- 99|This is line 99
210
- 100|This is line 100
211
- 101|This is line 101
212
- (content after edit)
213
- 98|This is line 98
214
- 99|This is line 99
215
- +100|This is line 100 + 10
216
- +101|This is line 101 + 10
217
- 102|This is line 100
218
- 103|This is line 101
219
- [end of edit 1 / 1]
220
- """.strip()
221
- )
222
-
223
-
224
- def test_edit_obs_replace():
225
- _new_content = (
226
- '\n'.join([f'This is line {i}' for i in range(1, 100)])
227
- + EDIT_LONG
228
- + '\n'.join([f'This is line {i}' for i in range(102, 1000)])
229
- )
230
-
231
- diff = get_diff(ORIGINAL_LONG, _new_content, '/workspace/app.py')
232
- obs = FileEditObservation(
233
- content=diff,
234
- path='/workspace/app.py',
235
- prev_exist=True,
236
- old_content=ORIGINAL_LONG,
237
- new_content=_new_content,
238
- )
239
- print(str(obs))
240
- assert (
241
- str(obs).strip()
242
- == """
243
- [Existing file /workspace/app.py is edited with 1 changes.]
244
- [begin of edit 1 / 1]
245
- (content before edit)
246
- 98|This is line 98
247
- 99|This is line 99
248
- -100|This is line 100
249
- -101|This is line 101
250
- 102|This is line 102
251
- 103|This is line 103
252
- (content after edit)
253
- 98|This is line 98
254
- 99|This is line 99
255
- +100|This is line 100 + 10
256
- +101|This is line 101 + 10
257
- 102|This is line 102
258
- 103|This is line 103
259
- [end of edit 1 / 1]
260
- """.strip()
261
- )
262
-
263
-
264
- def test_edit_obs_replace_with_empty_line():
265
- _new_content = (
266
- '\n'.join([f'This is line {i}' for i in range(1, 100)])
267
- + '\n'
268
- + EDIT_LONG
269
- + '\n'.join([f'This is line {i}' for i in range(102, 1000)])
270
- )
271
-
272
- diff = get_diff(ORIGINAL_LONG, _new_content, '/workspace/app.py')
273
- obs = FileEditObservation(
274
- content=diff,
275
- path='/workspace/app.py',
276
- prev_exist=True,
277
- old_content=ORIGINAL_LONG,
278
- new_content=_new_content,
279
- )
280
- print(str(obs))
281
- assert (
282
- str(obs).strip()
283
- == """
284
- [Existing file /workspace/app.py is edited with 1 changes.]
285
- [begin of edit 1 / 1]
286
- (content before edit)
287
- 98|This is line 98
288
- 99|This is line 99
289
- -100|This is line 100
290
- -101|This is line 101
291
- 102|This is line 102
292
- 103|This is line 103
293
- (content after edit)
294
- 98|This is line 98
295
- 99|This is line 99
296
- +100|
297
- +101|This is line 100 + 10
298
- +102|This is line 101 + 10
299
- 103|This is line 102
300
- 104|This is line 103
301
- [end of edit 1 / 1]
302
- """.strip()
303
- )
304
-
305
-
306
- def test_edit_obs_multiple_edits():
307
- _new_content = (
308
- '\n'.join([f'This is line {i}' for i in range(1, 50)])
309
- + '\nbalabala\n'
310
- + '\n'.join([f'This is line {i}' for i in range(50, 100)])
311
- + EDIT_LONG
312
- + '\n'.join([f'This is line {i}' for i in range(102, 1000)])
313
- )
314
-
315
- diff = get_diff(ORIGINAL_LONG, _new_content, '/workspace/app.py')
316
- obs = FileEditObservation(
317
- content=diff,
318
- path='/workspace/app.py',
319
- prev_exist=True,
320
- old_content=ORIGINAL_LONG,
321
- new_content=_new_content,
322
- )
323
- assert (
324
- str(obs).strip()
325
- == """
326
- [Existing file /workspace/app.py is edited with 2 changes.]
327
- [begin of edit 1 / 2]
328
- (content before edit)
329
- 48|This is line 48
330
- 49|This is line 49
331
- 50|This is line 50
332
- 51|This is line 51
333
- (content after edit)
334
- 48|This is line 48
335
- 49|This is line 49
336
- +50|balabala
337
- 51|This is line 50
338
- 52|This is line 51
339
- [end of edit 1 / 2]
340
- -------------------------
341
- [begin of edit 2 / 2]
342
- (content before edit)
343
- 98|This is line 98
344
- 99|This is line 99
345
- -100|This is line 100
346
- -101|This is line 101
347
- 102|This is line 102
348
- 103|This is line 103
349
- (content after edit)
350
- 99|This is line 98
351
- 100|This is line 99
352
- +101|This is line 100 + 10
353
- +102|This is line 101 + 10
354
- 103|This is line 102
355
- 104|This is line 103
356
- [end of edit 2 / 2]
357
- """.strip()
358
- )
359
-
360
-
361
- def test_edit_visualize_failed_edit():
362
- _new_content = (
363
- '\n'.join([f'This is line {i}' for i in range(1, 50)])
364
- + '\nbalabala\n'
365
- + '\n'.join([f'This is line {i}' for i in range(50, 100)])
366
- + EDIT_LONG
367
- + '\n'.join([f'This is line {i}' for i in range(102, 1000)])
368
- )
369
-
370
- diff = get_diff(ORIGINAL_LONG, _new_content, '/workspace/app.py')
371
- obs = FileEditObservation(
372
- content=diff,
373
- path='/workspace/app.py',
374
- prev_exist=True,
375
- old_content=ORIGINAL_LONG,
376
- new_content=_new_content,
377
- )
378
- assert (
379
- obs.visualize_diff(change_applied=False).strip()
380
- == """
381
- [Changes are NOT applied to /workspace/app.py - Here's how the file looks like if changes are applied.]
382
- [begin of ATTEMPTED edit 1 / 2]
383
- (content before ATTEMPTED edit)
384
- 48|This is line 48
385
- 49|This is line 49
386
- 50|This is line 50
387
- 51|This is line 51
388
- (content after ATTEMPTED edit)
389
- 48|This is line 48
390
- 49|This is line 49
391
- +50|balabala
392
- 51|This is line 50
393
- 52|This is line 51
394
- [end of ATTEMPTED edit 1 / 2]
395
- -------------------------
396
- [begin of ATTEMPTED edit 2 / 2]
397
- (content before ATTEMPTED edit)
398
- 98|This is line 98
399
- 99|This is line 99
400
- -100|This is line 100
401
- -101|This is line 101
402
- 102|This is line 102
403
- 103|This is line 103
404
- (content after ATTEMPTED edit)
405
- 99|This is line 98
406
- 100|This is line 99
407
- +101|This is line 100 + 10
408
- +102|This is line 101 + 10
409
- 103|This is line 102
410
- 104|This is line 103
411
- [end of ATTEMPTED edit 2 / 2]
412
- """.strip()
413
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/runtime/test_mcp_action.py DELETED
@@ -1,362 +0,0 @@
1
- """Bash-related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
2
-
3
- import json
4
- import os
5
- import socket
6
- import time
7
-
8
- import docker
9
- import pytest
10
- from conftest import (
11
- _load_runtime,
12
- )
13
-
14
- import openhands
15
- from openhands.core.config import MCPConfig
16
- from openhands.core.config.mcp_config import MCPSSEServerConfig, MCPStdioServerConfig
17
- from openhands.core.logger import openhands_logger as logger
18
- from openhands.events.action import CmdRunAction, MCPAction
19
- from openhands.events.observation import CmdOutputObservation, MCPObservation
20
-
21
- # ============================================================================================================================
22
- # Bash-specific tests
23
- # ============================================================================================================================
24
-
25
- pytestmark = pytest.mark.skipif(
26
- os.environ.get('TEST_RUNTIME') == 'cli',
27
- reason='CLIRuntime does not support MCP actions',
28
- )
29
-
30
-
31
- @pytest.fixture
32
- def sse_mcp_docker_server():
33
- """Manages the lifecycle of the SSE MCP Docker container for tests, using a random available port."""
34
- image_name = 'supercorp/supergateway'
35
-
36
- # Find a free port
37
- with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
38
- s.bind(('', 0))
39
- host_port = s.getsockname()[1]
40
-
41
- container_internal_port = (
42
- 8000 # The port the MCP server listens on *inside* the container
43
- )
44
-
45
- container_command_args = [
46
- '--stdio',
47
- 'npx -y @modelcontextprotocol/server-filesystem /',
48
- '--port',
49
- str(container_internal_port), # MCP server inside container listens on this
50
- '--baseUrl',
51
- f'http://localhost:{host_port}', # The URL used to access the server from the host
52
- ]
53
- client = docker.from_env()
54
- container = None
55
- log_streamer = None
56
-
57
- # Import LogStreamer here as it's specific to this fixture's needs
58
- from openhands.runtime.utils.log_streamer import LogStreamer
59
-
60
- try:
61
- logger.info(
62
- f'Starting Docker container {image_name} with command: {" ".join(container_command_args)} '
63
- f'and mapping internal port {container_internal_port} to host port {host_port}',
64
- extra={'msg_type': 'ACTION'},
65
- )
66
- container = client.containers.run(
67
- image_name,
68
- command=container_command_args,
69
- ports={
70
- f'{container_internal_port}/tcp': host_port
71
- }, # Map container's internal port to the random host port
72
- detach=True,
73
- auto_remove=True,
74
- stdin_open=True,
75
- )
76
- logger.info(
77
- f'Container {container.short_id} started, listening on host port {host_port}.'
78
- )
79
-
80
- log_streamer = LogStreamer(
81
- container,
82
- lambda level, msg: getattr(logger, level.lower())(
83
- f'[MCP server {container.short_id}] {msg}'
84
- ),
85
- )
86
- # Wait for the server to initialize, as in the original tests
87
- time.sleep(10)
88
-
89
- yield {'url': f'http://localhost:{host_port}/sse'}
90
-
91
- finally:
92
- if container:
93
- logger.info(f'Stopping container {container.short_id}...')
94
- try:
95
- container.stop(timeout=5)
96
- logger.info(
97
- f'Container {container.short_id} stopped (and should be auto-removed).'
98
- )
99
- except docker.errors.NotFound:
100
- logger.info(
101
- f'Container {container.short_id} not found, likely already stopped and removed.'
102
- )
103
- except Exception as e:
104
- logger.error(f'Error stopping container {container.short_id}: {e}')
105
- if log_streamer:
106
- log_streamer.close()
107
-
108
-
109
- def test_default_activated_tools():
110
- project_root = os.path.dirname(openhands.__file__)
111
- mcp_config_path = os.path.join(project_root, 'runtime', 'mcp', 'config.json')
112
- assert os.path.exists(mcp_config_path), (
113
- f'MCP config file not found at {mcp_config_path}'
114
- )
115
- with open(mcp_config_path, 'r') as f:
116
- mcp_config = json.load(f)
117
- assert 'mcpServers' in mcp_config
118
- assert 'default' in mcp_config['mcpServers']
119
- assert 'tools' in mcp_config
120
- # no tools are always activated yet
121
- assert len(mcp_config['tools']) == 0
122
-
123
-
124
- @pytest.mark.asyncio
125
- async def test_fetch_mcp_via_stdio(temp_dir, runtime_cls, run_as_openhands):
126
- mcp_stdio_server_config = MCPStdioServerConfig(
127
- name='fetch', command='uvx', args=['mcp-server-fetch']
128
- )
129
- override_mcp_config = MCPConfig(stdio_servers=[mcp_stdio_server_config])
130
- runtime, config = _load_runtime(
131
- temp_dir, runtime_cls, run_as_openhands, override_mcp_config=override_mcp_config
132
- )
133
-
134
- # Test browser server
135
- action_cmd = CmdRunAction(command='python3 -m http.server 8000 > server.log 2>&1 &')
136
- logger.info(action_cmd, extra={'msg_type': 'ACTION'})
137
- obs = runtime.run_action(action_cmd)
138
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
139
-
140
- assert isinstance(obs, CmdOutputObservation)
141
- assert obs.exit_code == 0
142
- assert '[1]' in obs.content
143
-
144
- action_cmd = CmdRunAction(command='sleep 3 && cat server.log')
145
- logger.info(action_cmd, extra={'msg_type': 'ACTION'})
146
- obs = runtime.run_action(action_cmd)
147
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
148
- assert obs.exit_code == 0
149
-
150
- mcp_action = MCPAction(name='fetch', arguments={'url': 'http://localhost:8000'})
151
- obs = await runtime.call_tool_mcp(mcp_action)
152
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
153
- assert isinstance(obs, MCPObservation), (
154
- 'The observation should be a MCPObservation.'
155
- )
156
-
157
- result_json = json.loads(obs.content)
158
- assert not result_json['isError']
159
- assert len(result_json['content']) == 1
160
- assert result_json['content'][0]['type'] == 'text'
161
- assert (
162
- result_json['content'][0]['text']
163
- == 'Contents of http://localhost:8000/:\n---\n\n* <server.log>\n\n---'
164
- )
165
-
166
- runtime.close()
167
-
168
-
169
- @pytest.mark.asyncio
170
- async def test_filesystem_mcp_via_sse(
171
- temp_dir, runtime_cls, run_as_openhands, sse_mcp_docker_server
172
- ):
173
- sse_server_info = sse_mcp_docker_server
174
- sse_url = sse_server_info['url']
175
- runtime = None
176
- try:
177
- mcp_sse_server_config = MCPSSEServerConfig(url=sse_url)
178
- override_mcp_config = MCPConfig(sse_servers=[mcp_sse_server_config])
179
- runtime, config = _load_runtime(
180
- temp_dir,
181
- runtime_cls,
182
- run_as_openhands,
183
- override_mcp_config=override_mcp_config,
184
- )
185
-
186
- mcp_action = MCPAction(name='list_directory', arguments={'path': '.'})
187
- obs = await runtime.call_tool_mcp(mcp_action)
188
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
189
- assert isinstance(obs, MCPObservation), (
190
- 'The observation should be a MCPObservation.'
191
- )
192
- assert '[FILE] .dockerenv' in obs.content
193
-
194
- finally:
195
- if runtime:
196
- runtime.close()
197
- # Container and log_streamer cleanup is handled by the sse_mcp_docker_server fixture
198
-
199
-
200
- @pytest.mark.asyncio
201
- async def test_both_stdio_and_sse_mcp(
202
- temp_dir, runtime_cls, run_as_openhands, sse_mcp_docker_server
203
- ):
204
- sse_server_info = sse_mcp_docker_server
205
- sse_url = sse_server_info['url']
206
- runtime = None
207
- try:
208
- mcp_sse_server_config = MCPSSEServerConfig(url=sse_url)
209
-
210
- # Also add stdio server
211
- mcp_stdio_server_config = MCPStdioServerConfig(
212
- name='fetch', command='uvx', args=['mcp-server-fetch']
213
- )
214
-
215
- override_mcp_config = MCPConfig(
216
- sse_servers=[mcp_sse_server_config], stdio_servers=[mcp_stdio_server_config]
217
- )
218
- runtime, config = _load_runtime(
219
- temp_dir,
220
- runtime_cls,
221
- run_as_openhands,
222
- override_mcp_config=override_mcp_config,
223
- )
224
-
225
- # ======= Test SSE server =======
226
- mcp_action_sse = MCPAction(name='list_directory', arguments={'path': '.'})
227
- obs_sse = await runtime.call_tool_mcp(mcp_action_sse)
228
- logger.info(obs_sse, extra={'msg_type': 'OBSERVATION'})
229
- assert isinstance(obs_sse, MCPObservation), (
230
- 'The observation should be a MCPObservation.'
231
- )
232
- assert '[FILE] .dockerenv' in obs_sse.content
233
-
234
- # ======= Test stdio server =======
235
- # Test browser server
236
- action_cmd_http = CmdRunAction(
237
- command='python3 -m http.server 8000 > server.log 2>&1 &'
238
- )
239
- logger.info(action_cmd_http, extra={'msg_type': 'ACTION'})
240
- obs_http = runtime.run_action(action_cmd_http)
241
- logger.info(obs_http, extra={'msg_type': 'OBSERVATION'})
242
-
243
- assert isinstance(obs_http, CmdOutputObservation)
244
- assert obs_http.exit_code == 0
245
- assert '[1]' in obs_http.content
246
-
247
- action_cmd_cat = CmdRunAction(command='sleep 3 && cat server.log')
248
- logger.info(action_cmd_cat, extra={'msg_type': 'ACTION'})
249
- obs_cat = runtime.run_action(action_cmd_cat)
250
- logger.info(obs_cat, extra={'msg_type': 'OBSERVATION'})
251
- assert obs_cat.exit_code == 0
252
-
253
- mcp_action_fetch = MCPAction(
254
- # NOTE: the tool name is `fetch_fetch` because the tool name is `fetch`
255
- # And FastMCP Proxy will pre-pend the server name (in this case, `fetch`)
256
- # to the tool name, so the full tool name becomes `fetch_fetch`
257
- name='fetch',
258
- arguments={'url': 'http://localhost:8000'},
259
- )
260
- obs_fetch = await runtime.call_tool_mcp(mcp_action_fetch)
261
- logger.info(obs_fetch, extra={'msg_type': 'OBSERVATION'})
262
- assert isinstance(obs_fetch, MCPObservation), (
263
- 'The observation should be a MCPObservation.'
264
- )
265
-
266
- result_json = json.loads(obs_fetch.content)
267
- assert not result_json['isError']
268
- assert len(result_json['content']) == 1
269
- assert result_json['content'][0]['type'] == 'text'
270
- assert (
271
- result_json['content'][0]['text']
272
- == 'Contents of http://localhost:8000/:\n---\n\n* <server.log>\n\n---'
273
- )
274
- finally:
275
- if runtime:
276
- runtime.close()
277
- # SSE Docker container cleanup is handled by the sse_mcp_docker_server fixture
278
-
279
-
280
- @pytest.mark.asyncio
281
- async def test_microagent_and_one_stdio_mcp_in_config(
282
- temp_dir, runtime_cls, run_as_openhands
283
- ):
284
- runtime = None
285
- try:
286
- filesystem_config = MCPStdioServerConfig(
287
- name='filesystem',
288
- command='npx',
289
- args=[
290
- '@modelcontextprotocol/server-filesystem',
291
- '/',
292
- ],
293
- )
294
- override_mcp_config = MCPConfig(stdio_servers=[filesystem_config])
295
- runtime, config = _load_runtime(
296
- temp_dir,
297
- runtime_cls,
298
- run_as_openhands,
299
- override_mcp_config=override_mcp_config,
300
- )
301
-
302
- # NOTE: this simulate the case where the microagent adds a new stdio server to the runtime
303
- # but that stdio server is not in the initial config
304
- # Actual invocation of the microagent involves `add_mcp_tools_to_agent`
305
- # which will call `get_mcp_config` with the stdio server from microagent's config
306
- fetch_config = MCPStdioServerConfig(
307
- name='fetch', command='uvx', args=['mcp-server-fetch']
308
- )
309
- updated_config = runtime.get_mcp_config([fetch_config])
310
- logger.info(f'updated_config: {updated_config}')
311
-
312
- # ======= Test the stdio server in the config =======
313
- mcp_action_sse = MCPAction(
314
- name='filesystem_list_directory', arguments={'path': '/'}
315
- )
316
- obs_sse = await runtime.call_tool_mcp(mcp_action_sse)
317
- logger.info(obs_sse, extra={'msg_type': 'OBSERVATION'})
318
- assert isinstance(obs_sse, MCPObservation), (
319
- 'The observation should be a MCPObservation.'
320
- )
321
- assert '[FILE] .dockerenv' in obs_sse.content
322
-
323
- # ======= Test the stdio server added by the microagent =======
324
- # Test browser server
325
- action_cmd_http = CmdRunAction(
326
- command='python3 -m http.server 8000 > server.log 2>&1 &'
327
- )
328
- logger.info(action_cmd_http, extra={'msg_type': 'ACTION'})
329
- obs_http = runtime.run_action(action_cmd_http)
330
- logger.info(obs_http, extra={'msg_type': 'OBSERVATION'})
331
-
332
- assert isinstance(obs_http, CmdOutputObservation)
333
- assert obs_http.exit_code == 0
334
- assert '[1]' in obs_http.content
335
-
336
- action_cmd_cat = CmdRunAction(command='sleep 3 && cat server.log')
337
- logger.info(action_cmd_cat, extra={'msg_type': 'ACTION'})
338
- obs_cat = runtime.run_action(action_cmd_cat)
339
- logger.info(obs_cat, extra={'msg_type': 'OBSERVATION'})
340
- assert obs_cat.exit_code == 0
341
-
342
- mcp_action_fetch = MCPAction(
343
- name='fetch_fetch', arguments={'url': 'http://localhost:8000'}
344
- )
345
- obs_fetch = await runtime.call_tool_mcp(mcp_action_fetch)
346
- logger.info(obs_fetch, extra={'msg_type': 'OBSERVATION'})
347
- assert isinstance(obs_fetch, MCPObservation), (
348
- 'The observation should be a MCPObservation.'
349
- )
350
-
351
- result_json = json.loads(obs_fetch.content)
352
- assert not result_json['isError']
353
- assert len(result_json['content']) == 1
354
- assert result_json['content'][0]['type'] == 'text'
355
- assert (
356
- result_json['content'][0]['text']
357
- == 'Contents of http://localhost:8000/:\n---\n\n* <server.log>\n\n---'
358
- )
359
- finally:
360
- if runtime:
361
- runtime.close()
362
- # SSE Docker container cleanup is handled by the sse_mcp_docker_server fixture
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/runtime/test_microagent.py DELETED
@@ -1,443 +0,0 @@
1
- """Tests for microagent loading in runtime."""
2
-
3
- import os
4
- import tempfile
5
- from pathlib import Path
6
- from unittest.mock import AsyncMock, MagicMock, patch
7
-
8
- import pytest
9
- from conftest import (
10
- _close_test_runtime,
11
- _load_runtime,
12
- )
13
-
14
- from openhands.core.config import MCPConfig
15
- from openhands.core.config.mcp_config import MCPStdioServerConfig
16
- from openhands.mcp.utils import add_mcp_tools_to_agent
17
- from openhands.microagent.microagent import (
18
- BaseMicroagent,
19
- KnowledgeMicroagent,
20
- RepoMicroagent,
21
- TaskMicroagent,
22
- )
23
- from openhands.microagent.types import MicroagentType
24
-
25
-
26
- def _create_test_microagents(test_dir: str):
27
- """Create test microagent files in the given directory."""
28
- microagents_dir = Path(test_dir) / '.openhands' / 'microagents'
29
- microagents_dir.mkdir(parents=True, exist_ok=True)
30
-
31
- # Create test knowledge agent
32
- knowledge_dir = microagents_dir / 'knowledge'
33
- knowledge_dir.mkdir(exist_ok=True)
34
- knowledge_agent = """---
35
- name: test_knowledge_agent
36
- type: knowledge
37
- version: 1.0.0
38
- agent: CodeActAgent
39
- triggers:
40
- - test
41
- - pytest
42
- ---
43
-
44
- # Test Guidelines
45
-
46
- Testing best practices and guidelines.
47
- """
48
- (knowledge_dir / 'knowledge.md').write_text(knowledge_agent)
49
-
50
- # Create test repo agent
51
- repo_agent = """---
52
- name: test_repo_agent
53
- type: repo
54
- version: 1.0.0
55
- agent: CodeActAgent
56
- ---
57
-
58
- # Test Repository Agent
59
-
60
- Repository-specific test instructions.
61
- """
62
- (microagents_dir / 'repo.md').write_text(repo_agent)
63
-
64
- # Create legacy repo instructions
65
- legacy_instructions = """# Legacy Instructions
66
-
67
- These are legacy repository instructions.
68
- """
69
- (Path(test_dir) / '.openhands_instructions').write_text(legacy_instructions)
70
-
71
-
72
- def test_load_microagents_with_trailing_slashes(
73
- temp_dir, runtime_cls, run_as_openhands
74
- ):
75
- """Test loading microagents when directory paths have trailing slashes."""
76
- # Create test files
77
- _create_test_microagents(temp_dir)
78
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
79
- try:
80
- # Load microagents
81
- loaded_agents = runtime.get_microagents_from_selected_repo(None)
82
-
83
- # Verify all agents are loaded
84
- knowledge_agents = [
85
- a for a in loaded_agents if isinstance(a, KnowledgeMicroagent)
86
- ]
87
- repo_agents = [a for a in loaded_agents if isinstance(a, RepoMicroagent)]
88
-
89
- # Check knowledge agents
90
- assert len(knowledge_agents) == 1
91
- agent = knowledge_agents[0]
92
- assert agent.name == 'knowledge/knowledge'
93
- assert 'test' in agent.triggers
94
- assert 'pytest' in agent.triggers
95
-
96
- # Check repo agents (including legacy)
97
- assert len(repo_agents) == 2 # repo.md + .openhands_instructions
98
- repo_names = {a.name for a in repo_agents}
99
- assert 'repo' in repo_names
100
- assert 'repo_legacy' in repo_names
101
-
102
- finally:
103
- _close_test_runtime(runtime)
104
-
105
-
106
- def test_load_microagents_with_selected_repo(temp_dir, runtime_cls, run_as_openhands):
107
- """Test loading microagents from a selected repository."""
108
- # Create test files in a repository-like structure
109
- repo_dir = Path(temp_dir) / 'OpenHands'
110
- repo_dir.mkdir(parents=True)
111
- _create_test_microagents(str(repo_dir))
112
-
113
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
114
- try:
115
- # Load microagents with selected repository
116
- loaded_agents = runtime.get_microagents_from_selected_repo(
117
- 'All-Hands-AI/OpenHands'
118
- )
119
-
120
- # Verify all agents are loaded
121
- knowledge_agents = [
122
- a for a in loaded_agents if isinstance(a, KnowledgeMicroagent)
123
- ]
124
- repo_agents = [a for a in loaded_agents if isinstance(a, RepoMicroagent)]
125
-
126
- # Check knowledge agents
127
- assert len(knowledge_agents) == 1
128
- agent = knowledge_agents[0]
129
- assert agent.name == 'knowledge/knowledge'
130
- assert 'test' in agent.triggers
131
- assert 'pytest' in agent.triggers
132
-
133
- # Check repo agents (including legacy)
134
- assert len(repo_agents) == 2 # repo.md + .openhands_instructions
135
- repo_names = {a.name for a in repo_agents}
136
- assert 'repo' in repo_names
137
- assert 'repo_legacy' in repo_names
138
-
139
- finally:
140
- _close_test_runtime(runtime)
141
-
142
-
143
- def test_load_microagents_with_missing_files(temp_dir, runtime_cls, run_as_openhands):
144
- """Test loading microagents when some files are missing."""
145
- # Create only repo.md, no other files
146
- microagents_dir = Path(temp_dir) / '.openhands' / 'microagents'
147
- microagents_dir.mkdir(parents=True, exist_ok=True)
148
-
149
- repo_agent = """---
150
- name: test_repo_agent
151
- type: repo
152
- version: 1.0.0
153
- agent: CodeActAgent
154
- ---
155
-
156
- # Test Repository Agent
157
-
158
- Repository-specific test instructions.
159
- """
160
- (microagents_dir / 'repo.md').write_text(repo_agent)
161
-
162
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
163
- try:
164
- # Load microagents
165
- loaded_agents = runtime.get_microagents_from_selected_repo(None)
166
-
167
- # Verify only repo agent is loaded
168
- knowledge_agents = [
169
- a for a in loaded_agents if isinstance(a, KnowledgeMicroagent)
170
- ]
171
- repo_agents = [a for a in loaded_agents if isinstance(a, RepoMicroagent)]
172
-
173
- assert len(knowledge_agents) == 0
174
- assert len(repo_agents) == 1
175
-
176
- agent = repo_agents[0]
177
- assert agent.name == 'repo'
178
-
179
- finally:
180
- _close_test_runtime(runtime)
181
-
182
-
183
- def test_task_microagent_creation():
184
- """Test that a TaskMicroagent is created correctly."""
185
- content = """---
186
- name: test_task
187
- version: 1.0.0
188
- author: openhands
189
- agent: CodeActAgent
190
- triggers:
191
- - /test_task
192
- inputs:
193
- - name: TEST_VAR
194
- description: "Test variable"
195
- ---
196
-
197
- This is a test task microagent with a variable: ${test_var}.
198
- """
199
-
200
- with tempfile.NamedTemporaryFile(suffix='.md') as f:
201
- f.write(content.encode())
202
- f.flush()
203
-
204
- agent = BaseMicroagent.load(f.name)
205
-
206
- assert isinstance(agent, TaskMicroagent)
207
- assert agent.type == MicroagentType.TASK
208
- assert agent.name == 'test_task'
209
- assert '/test_task' in agent.triggers
210
- assert "If the user didn't provide any of these variables" in agent.content
211
-
212
-
213
- def test_task_microagent_variable_extraction():
214
- """Test that variables are correctly extracted from the content."""
215
- content = """---
216
- name: test_task
217
- version: 1.0.0
218
- author: openhands
219
- agent: CodeActAgent
220
- triggers:
221
- - /test_task
222
- inputs:
223
- - name: var1
224
- description: "Variable 1"
225
- ---
226
-
227
- This is a test with variables: ${var1}, ${var2}, and ${var3}.
228
- """
229
-
230
- with tempfile.NamedTemporaryFile(suffix='.md') as f:
231
- f.write(content.encode())
232
- f.flush()
233
-
234
- agent = BaseMicroagent.load(f.name)
235
-
236
- assert isinstance(agent, TaskMicroagent)
237
- variables = agent.extract_variables(agent.content)
238
- assert set(variables) == {'var1', 'var2', 'var3'}
239
- assert agent.requires_user_input()
240
-
241
-
242
- def test_knowledge_microagent_no_prompt():
243
- """Test that a regular KnowledgeMicroagent doesn't get the prompt."""
244
- content = """---
245
- name: test_knowledge
246
- version: 1.0.0
247
- author: openhands
248
- agent: CodeActAgent
249
- triggers:
250
- - test_knowledge
251
- ---
252
-
253
- This is a test knowledge microagent.
254
- """
255
-
256
- with tempfile.NamedTemporaryFile(suffix='.md') as f:
257
- f.write(content.encode())
258
- f.flush()
259
-
260
- agent = BaseMicroagent.load(f.name)
261
-
262
- assert isinstance(agent, KnowledgeMicroagent)
263
- assert agent.type == MicroagentType.KNOWLEDGE
264
- assert "If the user didn't provide any of these variables" not in agent.content
265
-
266
-
267
- def test_task_microagent_trigger_addition():
268
- """Test that a trigger is added if not present."""
269
- content = """---
270
- name: test_task
271
- version: 1.0.0
272
- author: openhands
273
- agent: CodeActAgent
274
- inputs:
275
- - name: TEST_VAR
276
- description: "Test variable"
277
- ---
278
-
279
- This is a test task microagent.
280
- """
281
-
282
- with tempfile.NamedTemporaryFile(suffix='.md') as f:
283
- f.write(content.encode())
284
- f.flush()
285
-
286
- agent = BaseMicroagent.load(f.name)
287
-
288
- assert isinstance(agent, TaskMicroagent)
289
- assert '/test_task' in agent.triggers
290
-
291
-
292
- def test_task_microagent_no_duplicate_trigger():
293
- """Test that a trigger is not duplicated if already present."""
294
- content = """---
295
- name: test_task
296
- version: 1.0.0
297
- author: openhands
298
- agent: CodeActAgent
299
- triggers:
300
- - /test_task
301
- - another_trigger
302
- inputs:
303
- - name: TEST_VAR
304
- description: "Test variable"
305
- ---
306
-
307
- This is a test task microagent.
308
- """
309
-
310
- with tempfile.NamedTemporaryFile(suffix='.md') as f:
311
- f.write(content.encode())
312
- f.flush()
313
-
314
- agent = BaseMicroagent.load(f.name)
315
-
316
- assert isinstance(agent, TaskMicroagent)
317
- assert agent.triggers.count('/test_task') == 1 # No duplicates
318
- assert len(agent.triggers) == 2
319
- assert 'another_trigger' in agent.triggers
320
- assert '/test_task' in agent.triggers
321
-
322
-
323
- def test_task_microagent_match_trigger():
324
- """Test that a task microagent matches its trigger correctly."""
325
- content = """---
326
- name: test_task
327
- version: 1.0.0
328
- author: openhands
329
- agent: CodeActAgent
330
- triggers:
331
- - /test_task
332
- inputs:
333
- - name: TEST_VAR
334
- description: "Test variable"
335
- ---
336
-
337
- This is a test task microagent.
338
- """
339
-
340
- with tempfile.NamedTemporaryFile(suffix='.md') as f:
341
- f.write(content.encode())
342
- f.flush()
343
-
344
- agent = BaseMicroagent.load(f.name)
345
-
346
- assert isinstance(agent, TaskMicroagent)
347
- assert agent.match_trigger('/test_task') == '/test_task'
348
- assert agent.match_trigger(' /test_task ') == '/test_task'
349
- assert agent.match_trigger('This contains /test_task') == '/test_task'
350
- assert agent.match_trigger('/other_task') is None
351
-
352
-
353
- def test_default_tools_microagent_exists():
354
- """Test that the default-tools microagent exists in the global microagents directory."""
355
- # Get the path to the global microagents directory
356
- import openhands
357
-
358
- project_root = os.path.dirname(openhands.__file__)
359
- parent_dir = os.path.dirname(project_root)
360
- microagents_dir = os.path.join(parent_dir, 'microagents')
361
-
362
- # Check that the default-tools.md file exists
363
- default_tools_path = os.path.join(microagents_dir, 'default-tools.md')
364
- assert os.path.exists(default_tools_path), (
365
- f'default-tools.md not found at {default_tools_path}'
366
- )
367
-
368
- # Read the file and check its content
369
- with open(default_tools_path, 'r') as f:
370
- content = f.read()
371
-
372
- # Verify it's a repo microagent (always activated)
373
- assert 'type: repo' in content, 'default-tools.md should be a repo microagent'
374
-
375
- # Verify it has the fetch tool configured
376
- assert 'name: "fetch"' in content, 'default-tools.md should have a fetch tool'
377
- assert 'command: "uvx"' in content, 'default-tools.md should use uvx command'
378
- assert 'args: ["mcp-server-fetch"]' in content, (
379
- 'default-tools.md should use mcp-server-fetch'
380
- )
381
-
382
-
383
- @pytest.mark.asyncio
384
- async def test_add_mcp_tools_from_microagents():
385
- """Test that add_mcp_tools_to_agent adds tools from microagents."""
386
- # Import ActionExecutionClient for mocking
387
-
388
- from openhands.core.config.openhands_config import OpenHandsConfig
389
- from openhands.runtime.impl.action_execution.action_execution_client import (
390
- ActionExecutionClient,
391
- )
392
-
393
- # Create mock objects
394
- mock_agent = MagicMock()
395
- mock_runtime = MagicMock(spec=ActionExecutionClient)
396
- mock_memory = MagicMock()
397
- mock_mcp_config = MCPConfig()
398
-
399
- # Create a mock OpenHandsConfig with the MCP config
400
- mock_app_config = OpenHandsConfig(mcp=mock_mcp_config, search_api_key=None)
401
-
402
- # Configure the mock memory to return a microagent MCP config
403
- mock_stdio_server = MCPStdioServerConfig(
404
- name='test-tool', command='test-command', args=['test-arg1', 'test-arg2']
405
- )
406
- mock_microagent_mcp_config = MCPConfig(stdio_servers=[mock_stdio_server])
407
- mock_memory.get_microagent_mcp_tools.return_value = [mock_microagent_mcp_config]
408
-
409
- # Configure the mock runtime
410
- mock_runtime.runtime_initialized = True
411
- mock_runtime.get_mcp_config.return_value = mock_microagent_mcp_config
412
-
413
- # Mock the fetch_mcp_tools_from_config function to return a mock tool
414
- mock_tool = {
415
- 'type': 'function',
416
- 'function': {
417
- 'name': 'test-tool',
418
- 'description': 'Test tool description',
419
- 'parameters': {},
420
- },
421
- }
422
-
423
- with patch(
424
- 'openhands.mcp.utils.fetch_mcp_tools_from_config',
425
- new=AsyncMock(return_value=[mock_tool]),
426
- ):
427
- # Call the function with the OpenHandsConfig instead of MCPConfig
428
- await add_mcp_tools_to_agent(
429
- mock_agent, mock_runtime, mock_memory, mock_app_config
430
- )
431
-
432
- # Verify that the memory's get_microagent_mcp_tools was called
433
- mock_memory.get_microagent_mcp_tools.assert_called_once()
434
-
435
- # Verify that the runtime's get_mcp_config was called with the extra stdio servers
436
- mock_runtime.get_mcp_config.assert_called_once()
437
- args, kwargs = mock_runtime.get_mcp_config.call_args
438
- assert len(args) == 1
439
- assert len(args[0]) == 1
440
- assert args[0][0].name == 'test-tool'
441
-
442
- # Verify that the agent's set_mcp_tools was called with the mock tool
443
- mock_agent.set_mcp_tools.assert_called_once_with([mock_tool])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/runtime/test_replay.py DELETED
@@ -1,161 +0,0 @@
1
- """Replay tests"""
2
-
3
- import asyncio
4
- from pathlib import Path
5
-
6
- from conftest import _close_test_runtime, _load_runtime
7
-
8
- from openhands.controller.state.state import State
9
- from openhands.core.config.config_utils import OH_DEFAULT_AGENT
10
- from openhands.core.config.openhands_config import OpenHandsConfig
11
- from openhands.core.main import run_controller
12
- from openhands.core.schema.agent import AgentState
13
- from openhands.events.action.empty import NullAction
14
- from openhands.events.action.message import MessageAction
15
- from openhands.events.event import EventSource
16
- from openhands.events.observation.commands import CmdOutputObservation
17
-
18
-
19
- def _get_config(trajectory_name: str, agent: str = OH_DEFAULT_AGENT):
20
- return OpenHandsConfig(
21
- default_agent=agent,
22
- run_as_openhands=False,
23
- # do not mount workspace
24
- workspace_base=None,
25
- workspace_mount_path=None,
26
- replay_trajectory_path=str(
27
- (Path(__file__).parent / 'trajs' / f'{trajectory_name}.json').resolve()
28
- ),
29
- )
30
-
31
-
32
- def test_simple_replay(temp_dir, runtime_cls, run_as_openhands):
33
- """
34
- A simple replay test that involves simple terminal operations and edits
35
- (creating a simple 2048 game), using the default agent
36
- """
37
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
38
- config.replay_trajectory_path = str(
39
- (Path(__file__).parent / 'trajs' / 'basic.json').resolve()
40
- )
41
- config.security.confirmation_mode = False
42
-
43
- state: State | None = asyncio.run(
44
- run_controller(
45
- config=config,
46
- initial_user_action=NullAction(),
47
- runtime=runtime,
48
- )
49
- )
50
-
51
- assert state.agent_state == AgentState.FINISHED
52
-
53
- _close_test_runtime(runtime)
54
-
55
-
56
- def test_simple_gui_replay(temp_dir, runtime_cls, run_as_openhands):
57
- """
58
- A simple replay test that involves simple terminal operations and edits
59
- (writing a Vue.js App), using the default agent
60
-
61
- Note:
62
- 1. This trajectory is exported from GUI mode, meaning it has extra
63
- environmental actions that don't appear in headless mode's trajectories
64
- 2. In GUI mode, agents typically don't finish; rather, they wait for the next
65
- task from the user, so this exported trajectory ends with awaiting_user_input
66
- """
67
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
68
-
69
- config = _get_config('basic_gui_mode')
70
- config.security.confirmation_mode = False
71
-
72
- state: State | None = asyncio.run(
73
- run_controller(
74
- config=config,
75
- initial_user_action=NullAction(),
76
- runtime=runtime,
77
- # exit on message, otherwise this would be stuck on waiting for user input
78
- exit_on_message=True,
79
- )
80
- )
81
-
82
- assert state.agent_state == AgentState.FINISHED
83
-
84
- _close_test_runtime(runtime)
85
-
86
-
87
- def test_replay_wrong_initial_state(temp_dir, runtime_cls, run_as_openhands):
88
- """
89
- Replay requires a consistent initial state to start with, otherwise it might
90
- be producing garbage. The trajectory used in this test assumes existence of
91
- a file named 'game_2048.py', which doesn't exist when we replay the trajectory
92
- (so called inconsistent initial states). This test demonstrates how this would
93
- look like: the following events would still be replayed even though they are
94
- meaningless.
95
- """
96
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
97
- config.replay_trajectory_path = str(
98
- (Path(__file__).parent / 'trajs' / 'wrong_initial_state.json').resolve()
99
- )
100
- config.security.confirmation_mode = False
101
-
102
- state: State | None = asyncio.run(
103
- run_controller(
104
- config=config,
105
- initial_user_action=NullAction(),
106
- runtime=runtime,
107
- )
108
- )
109
-
110
- assert state.agent_state == AgentState.FINISHED
111
-
112
- has_error_in_action = False
113
- for event in state.history:
114
- if isinstance(event, CmdOutputObservation) and event.exit_code != 0:
115
- has_error_in_action = True
116
- break
117
-
118
- assert has_error_in_action
119
-
120
- _close_test_runtime(runtime)
121
-
122
-
123
- def test_replay_basic_interactions(temp_dir, runtime_cls, run_as_openhands):
124
- """
125
- Replay a trajectory that involves interactions, i.e. with user messages
126
- in the middle. This tests two things:
127
- 1) The controller should be able to replay all actions without human
128
- interference (no asking for user input).
129
- 2) The user messages in the trajectory should appear in the history.
130
- """
131
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
132
-
133
- config = _get_config('basic_interactions')
134
- config.security.confirmation_mode = False
135
-
136
- state: State | None = asyncio.run(
137
- run_controller(
138
- config=config,
139
- initial_user_action=NullAction(),
140
- runtime=runtime,
141
- )
142
- )
143
-
144
- assert state.agent_state == AgentState.FINISHED
145
-
146
- # all user messages appear in the history, so that after a replay (assuming
147
- # the trajectory doesn't end with `finish` action), LLM knows about all the
148
- # context and can continue
149
- user_messages = [
150
- "what's 1+1?",
151
- "No, I mean by Goldbach's conjecture!",
152
- 'Finish please',
153
- ]
154
- i = 0
155
- for event in state.history:
156
- if isinstance(event, MessageAction) and event._source == EventSource.USER:
157
- assert event.message == user_messages[i]
158
- i += 1
159
- assert i == len(user_messages)
160
-
161
- _close_test_runtime(runtime)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/runtime/test_runtime_resource.py DELETED
@@ -1,115 +0,0 @@
1
- """Stress tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
2
-
3
- import pytest
4
- from conftest import _close_test_runtime, _load_runtime
5
-
6
- from openhands.core.logger import openhands_logger as logger
7
- from openhands.events.action import CmdRunAction
8
-
9
-
10
- def test_stress_docker_runtime(temp_dir, runtime_cls, repeat=1):
11
- pytest.skip('This test is flaky')
12
- runtime, config = _load_runtime(
13
- temp_dir,
14
- runtime_cls,
15
- docker_runtime_kwargs={
16
- 'cpu_period': 100000, # 100ms
17
- 'cpu_quota': 100000, # Can use 100ms out of each 100ms period (1 CPU)
18
- 'mem_limit': '4G', # 4 GB of memory
19
- },
20
- )
21
-
22
- action = CmdRunAction(
23
- command='sudo apt-get update && sudo apt-get install -y stress-ng'
24
- )
25
- logger.info(action, extra={'msg_type': 'ACTION'})
26
- obs = runtime.run_action(action)
27
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
28
- assert obs.exit_code == 0
29
-
30
- for _ in range(repeat):
31
- # run stress-ng stress tests for 1 minute
32
- action = CmdRunAction(command='stress-ng --all 1 -t 30s')
33
- action.set_hard_timeout(120)
34
- logger.info(action, extra={'msg_type': 'ACTION'})
35
- obs = runtime.run_action(action)
36
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
37
-
38
- _close_test_runtime(runtime)
39
-
40
-
41
- # def test_stress_docker_runtime_hit_memory_limits(temp_dir, runtime_cls):
42
- # """Test runtime behavior under resource constraints."""
43
- # runtime, config = _load_runtime(
44
- # temp_dir,
45
- # runtime_cls,
46
- # docker_runtime_kwargs={
47
- # 'cpu_period': 100000, # 100ms
48
- # 'cpu_quota': 100000, # Can use 100ms out of each 100ms period (1 CPU)
49
- # 'mem_limit': '4G', # 4 GB of memory
50
- # 'memswap_limit': '0', # No swap
51
- # 'mem_swappiness': 0, # Disable swapping
52
- # 'oom_kill_disable': False, # Enable OOM killer
53
- # },
54
- # runtime_startup_env_vars={
55
- # 'RUNTIME_MAX_MEMORY_GB': '3',
56
- # },
57
- # )
58
-
59
- # action = CmdRunAction(
60
- # command='sudo apt-get update && sudo apt-get install -y stress-ng'
61
- # )
62
- # logger.info(action, extra={'msg_type': 'ACTION'})
63
- # obs = runtime.run_action(action)
64
- # logger.info(obs, extra={'msg_type': 'OBSERVATION'})
65
- # assert obs.exit_code == 0
66
-
67
- # action = CmdRunAction(
68
- # command='stress-ng --vm 1 --vm-bytes 6G --timeout 30s --metrics'
69
- # )
70
- # action.set_hard_timeout(120)
71
- # logger.info(action, extra={'msg_type': 'ACTION'})
72
- # obs = runtime.run_action(action)
73
- # logger.info(obs, extra={'msg_type': 'OBSERVATION'})
74
- # assert 'aborted early, out of system resources' in obs.content
75
- # assert obs.exit_code == 3 # OOM killed!
76
-
77
- # _close_test_runtime(runtime)
78
-
79
-
80
- # def test_stress_docker_runtime_within_memory_limits(temp_dir, runtime_cls):
81
- # """Test runtime behavior under resource constraints."""
82
- # runtime, config = _load_runtime(
83
- # temp_dir,
84
- # runtime_cls,
85
- # docker_runtime_kwargs={
86
- # 'cpu_period': 100000, # 100ms
87
- # 'cpu_quota': 100000, # Can use 100ms out of each 100ms period (1 CPU)
88
- # 'mem_limit': '4G', # 4 GB of memory
89
- # 'memswap_limit': '0', # No swap
90
- # 'mem_swappiness': 0, # Disable swapping
91
- # 'oom_kill_disable': False, # Enable OOM killer
92
- # },
93
- # runtime_startup_env_vars={
94
- # 'RUNTIME_MAX_MEMORY_GB': '7',
95
- # },
96
- # )
97
-
98
- # action = CmdRunAction(
99
- # command='sudo apt-get update && sudo apt-get install -y stress-ng'
100
- # )
101
- # logger.info(action, extra={'msg_type': 'ACTION'})
102
- # obs = runtime.run_action(action)
103
- # logger.info(obs, extra={'msg_type': 'OBSERVATION'})
104
- # assert obs.exit_code == 0
105
-
106
- # action = CmdRunAction(
107
- # command='stress-ng --vm 1 --vm-bytes 6G --timeout 30s --metrics'
108
- # )
109
- # action.set_hard_timeout(120)
110
- # logger.info(action, extra={'msg_type': 'ACTION'})
111
- # obs = runtime.run_action(action)
112
- # logger.info(obs, extra={'msg_type': 'OBSERVATION'})
113
- # assert obs.exit_code == 0
114
-
115
- # _close_test_runtime(runtime)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/runtime/test_setup.py DELETED
@@ -1,84 +0,0 @@
1
- """Tests for the setup script."""
2
-
3
- from unittest.mock import patch
4
-
5
- from conftest import (
6
- _load_runtime,
7
- )
8
-
9
- from openhands.core.setup import initialize_repository_for_runtime
10
- from openhands.events.action import FileReadAction, FileWriteAction
11
- from openhands.events.observation import FileReadObservation, FileWriteObservation
12
- from openhands.integrations.service_types import ProviderType, Repository
13
-
14
-
15
- def test_initialize_repository_for_runtime(temp_dir, runtime_cls, run_as_openhands):
16
- """Test that the initialize_repository_for_runtime function works."""
17
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
18
- mock_repo = Repository(
19
- id=1232,
20
- full_name='All-Hands-AI/OpenHands',
21
- git_provider=ProviderType.GITHUB,
22
- is_public=True,
23
- )
24
-
25
- with patch(
26
- 'openhands.runtime.base.ProviderHandler.verify_repo_provider',
27
- return_value=mock_repo,
28
- ):
29
- repository_dir = initialize_repository_for_runtime(
30
- runtime, selected_repository='All-Hands-AI/OpenHands'
31
- )
32
-
33
- assert repository_dir is not None
34
- assert repository_dir == 'OpenHands'
35
-
36
-
37
- def test_maybe_run_setup_script(temp_dir, runtime_cls, run_as_openhands):
38
- """Test that setup script is executed when it exists."""
39
- runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
40
-
41
- setup_script = '.openhands/setup.sh'
42
- write_obs = runtime.write(
43
- FileWriteAction(
44
- path=setup_script, content="#!/bin/bash\necho 'Hello World' >> README.md\n"
45
- )
46
- )
47
- assert isinstance(write_obs, FileWriteObservation)
48
-
49
- # Run setup script
50
- runtime.maybe_run_setup_script()
51
-
52
- # Verify script was executed by checking output
53
- read_obs = runtime.read(FileReadAction(path='README.md'))
54
- assert isinstance(read_obs, FileReadObservation)
55
- assert read_obs.content == 'Hello World\n'
56
-
57
-
58
- def test_maybe_run_setup_script_with_long_timeout(
59
- temp_dir, runtime_cls, run_as_openhands
60
- ):
61
- """Test that setup script is executed when it exists."""
62
- runtime, config = _load_runtime(
63
- temp_dir,
64
- runtime_cls,
65
- run_as_openhands,
66
- runtime_startup_env_vars={'NO_CHANGE_TIMEOUT_SECONDS': '1'},
67
- )
68
-
69
- setup_script = '.openhands/setup.sh'
70
- write_obs = runtime.write(
71
- FileWriteAction(
72
- path=setup_script,
73
- content="#!/bin/bash\nsleep 3 && echo 'Hello World' >> README.md\n",
74
- )
75
- )
76
- assert isinstance(write_obs, FileWriteObservation)
77
-
78
- # Run setup script
79
- runtime.maybe_run_setup_script()
80
-
81
- # Verify script was executed by checking output
82
- read_obs = runtime.read(FileReadAction(path='README.md'))
83
- assert isinstance(read_obs, FileReadObservation)
84
- assert read_obs.content == 'Hello World\n'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/runtime/test_stress_remote_runtime.py DELETED
@@ -1,483 +0,0 @@
1
- """Bash-related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox.
2
-
3
- Example usage:
4
-
5
- ```bash
6
- export ALLHANDS_API_KEY="YOUR_API_KEY"
7
- export RUNTIME=remote
8
- export SANDBOX_REMOTE_RUNTIME_API_URL="https://runtime.staging.all-hands.dev"
9
- poetry run pytest -vvxss tests/runtime/test_stress_remote_runtime.py
10
- ```
11
-
12
- """
13
-
14
- import asyncio
15
- import os
16
- import tempfile
17
- import time
18
- from datetime import datetime
19
- from unittest.mock import MagicMock
20
-
21
- import pandas as pd
22
- import pytest
23
- from conftest import TEST_IN_CI
24
-
25
- from evaluation.utils.shared import (
26
- EvalException,
27
- EvalMetadata,
28
- EvalOutput,
29
- assert_and_raise,
30
- codeact_user_response,
31
- make_metadata,
32
- prepare_dataset,
33
- reset_logger_for_multiprocessing,
34
- run_evaluation,
35
- )
36
- from openhands.agenthub import Agent
37
- from openhands.controller.state.state import State
38
- from openhands.core.config import (
39
- AgentConfig,
40
- LLMConfig,
41
- OpenHandsConfig,
42
- SandboxConfig,
43
- )
44
- from openhands.core.logger import openhands_logger as logger
45
- from openhands.core.main import create_runtime, run_controller
46
- from openhands.events.action import (
47
- CmdRunAction,
48
- FileEditAction,
49
- FileWriteAction,
50
- MessageAction,
51
- )
52
- from openhands.events.observation import CmdOutputObservation
53
- from openhands.events.serialization.event import event_to_dict
54
- from openhands.llm import LLM
55
- from openhands.runtime.base import Runtime
56
- from openhands.utils.async_utils import call_async_from_sync
57
-
58
- AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
59
- 'CodeActAgent': codeact_user_response,
60
- }
61
-
62
-
63
- def get_config() -> OpenHandsConfig:
64
- config = OpenHandsConfig(
65
- run_as_openhands=False,
66
- runtime=os.environ.get('RUNTIME', 'remote'),
67
- sandbox=SandboxConfig(
68
- base_container_image='python:3.11-bookworm',
69
- enable_auto_lint=True,
70
- use_host_network=False,
71
- # large enough timeout, since some testcases take very long to run
72
- timeout=300,
73
- api_key=os.environ.get('ALLHANDS_API_KEY', None),
74
- remote_runtime_api_url=os.environ.get(
75
- 'SANDBOX_REMOTE_RUNTIME_API_URL', None
76
- ),
77
- keep_runtime_alive=False,
78
- remote_runtime_resource_factor=1,
79
- ),
80
- # do not mount workspace
81
- workspace_base=None,
82
- workspace_mount_path=None,
83
- )
84
- agent_config = AgentConfig(
85
- enable_jupyter=False,
86
- enable_browsing=False,
87
- enable_llm_editor=False,
88
- )
89
- config.set_agent_config(agent_config)
90
- return config
91
-
92
-
93
- @pytest.mark.skipif(
94
- TEST_IN_CI,
95
- reason='This test should only be run locally, not in CI.',
96
- )
97
- def test_stress_remote_runtime_eval(n_eval_workers: int = 64):
98
- """Mimic evaluation setting to test remote runtime in a multi-processing setting."""
99
-
100
- def _initialize_runtime(
101
- runtime: Runtime,
102
- ):
103
- """Initialize the runtime for the agent.
104
-
105
- This function is called before the runtime is used to run the agent.
106
- """
107
- logger.info('-' * 30)
108
- logger.info('BEGIN Runtime Initialization Fn')
109
- logger.info('-' * 30)
110
- obs: CmdOutputObservation
111
-
112
- action = CmdRunAction(command="""export USER=$(whoami); echo USER=${USER} """)
113
- action.set_hard_timeout(600)
114
- logger.info(action, extra={'msg_type': 'ACTION'})
115
- obs = runtime.run_action(action)
116
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
117
- assert_and_raise(obs.exit_code == 0, f'Failed to export USER: {str(obs)}')
118
-
119
- action = CmdRunAction(command='mkdir -p /dummy_dir')
120
- action.set_hard_timeout(600)
121
- logger.info(action, extra={'msg_type': 'ACTION'})
122
- obs = runtime.run_action(action)
123
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
124
- assert_and_raise(
125
- obs.exit_code == 0,
126
- f'Failed to create /dummy_dir: {str(obs)}',
127
- )
128
-
129
- with tempfile.TemporaryDirectory() as temp_dir:
130
- # Construct the full path for the desired file name within the temporary directory
131
- temp_file_path = os.path.join(temp_dir, 'dummy_file')
132
- # Write to the file with the desired name within the temporary directory
133
- with open(temp_file_path, 'w') as f:
134
- f.write('dummy content')
135
-
136
- # Copy the file to the desired location
137
- runtime.copy_to(temp_file_path, '/dummy_dir/')
138
-
139
- logger.info('-' * 30)
140
- logger.info('END Runtime Initialization Fn')
141
- logger.info('-' * 30)
142
-
143
- def _process_instance(
144
- instance: pd.Series,
145
- metadata: EvalMetadata,
146
- reset_logger: bool = True,
147
- ) -> EvalOutput:
148
- config = get_config()
149
-
150
- # Setup the logger properly, so you can run multi-processing to parallelize the evaluation
151
- if reset_logger:
152
- log_dir = os.path.join(metadata.eval_output_dir, 'infer_logs')
153
- reset_logger_for_multiprocessing(logger, instance.instance_id, log_dir)
154
- else:
155
- logger.info(f'Starting evaluation for instance {instance.instance_id}.')
156
-
157
- runtime = create_runtime(config, headless_mode=True)
158
- call_async_from_sync(runtime.connect)
159
-
160
- try:
161
- _initialize_runtime(runtime)
162
-
163
- instruction = 'dummy instruction'
164
- agent = Agent.get_cls(metadata.agent_class)(
165
- llm=LLM(config=metadata.llm_config),
166
- config=config.get_agent_config(metadata.agent_class),
167
- )
168
-
169
- def next_command(*args, **kwargs):
170
- return CmdRunAction(command='ls -lah')
171
-
172
- agent.step = MagicMock(side_effect=next_command)
173
-
174
- # Here's how you can run the agent (similar to the `main` function) and get the final task state
175
- state: State | None = asyncio.run(
176
- run_controller(
177
- config=config,
178
- initial_user_action=MessageAction(content=instruction),
179
- runtime=runtime,
180
- fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[
181
- metadata.agent_class
182
- ],
183
- agent=agent,
184
- )
185
- )
186
-
187
- # if fatal error, throw EvalError to trigger re-run
188
- if (
189
- state.last_error
190
- and 'fatal error during agent execution' in state.last_error
191
- and 'stuck in a loop' not in state.last_error
192
- ):
193
- raise EvalException('Fatal error detected: ' + state.last_error)
194
-
195
- finally:
196
- runtime.close()
197
-
198
- test_result = {}
199
- if state is None:
200
- raise ValueError('State should not be None.')
201
- histories = [event_to_dict(event) for event in state.history]
202
- metrics = state.metrics.get() if state.metrics else None
203
-
204
- # Save the output
205
- output = EvalOutput(
206
- instance_id=instance.instance_id,
207
- instruction=instruction,
208
- instance=instance.to_dict(), # SWE Bench specific
209
- test_result=test_result,
210
- metadata=metadata,
211
- history=histories,
212
- metrics=metrics,
213
- error=state.last_error if state and state.last_error else None,
214
- )
215
- return output
216
-
217
- llm_config = LLMConfig()
218
- metadata = make_metadata(
219
- llm_config,
220
- 'dummy_dataset_descrption',
221
- 'CodeActAgent',
222
- max_iterations=10,
223
- eval_note='dummy_eval_note',
224
- eval_output_dir='./dummy_eval_output_dir',
225
- details={},
226
- )
227
-
228
- # generate 300 random dummy instances
229
- dummy_instance = pd.DataFrame(
230
- {
231
- 'instance_id': [f'dummy_instance_{i}' for i in range(300)],
232
- }
233
- )
234
-
235
- output_file = os.path.join(metadata.eval_output_dir, 'output.jsonl')
236
- instances = prepare_dataset(
237
- dummy_instance, output_file, eval_n_limit=len(dummy_instance)
238
- )
239
-
240
- run_evaluation(instances, metadata, output_file, n_eval_workers, _process_instance)
241
-
242
-
243
- @pytest.mark.skipif(
244
- TEST_IN_CI,
245
- reason='This test should only be run locally, not in CI.',
246
- )
247
- def test_stress_remote_runtime_long_output_with_soft_and_hard_timeout():
248
- """Stress test for the remote runtime."""
249
- config = get_config()
250
-
251
- try:
252
- runtime = create_runtime(config, headless_mode=True)
253
- call_async_from_sync(runtime.connect)
254
- _time_for_test = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
255
-
256
- # Run a command that generates long output multiple times
257
- for i in range(10):
258
- start_time = time.time()
259
- iteration_stats = {
260
- 'iteration': i,
261
- 'timestamp': time.time(),
262
- }
263
-
264
- # Check overall system memory usage
265
- mem_action = CmdRunAction(
266
- 'free -k | grep "Mem:" | awk \'{printf "Total: %8.1f MB, Used: %8.1f MB, Free: %8.1f MB, Available: %8.1f MB\\n", $2/1024, $3/1024, $4/1024, $7/1024}\''
267
- )
268
- mem_obs = runtime.run_action(mem_action)
269
- assert mem_obs.exit_code == 0
270
- logger.info(
271
- f'System memory usage (iteration {i}): {mem_obs.content.strip()}'
272
- )
273
- # Parse memory values from output
274
- mem_parts = mem_obs.content.strip().split(',')
275
- for part in mem_parts:
276
- key, value = part.strip().split(':')
277
- iteration_stats[f'memory_{key.lower()}'] = float(
278
- value.replace('MB', '').strip()
279
- )
280
-
281
- # Check top memory-consuming processes
282
- mem_action = CmdRunAction(
283
- 'ps aux | awk \'{printf "%8.1f MB %s\\n", $6/1024, $0}\' | sort -nr | head -n 5'
284
- )
285
- mem_obs = runtime.run_action(mem_action)
286
- assert mem_obs.exit_code == 0
287
- _top_processes = [i.strip() for i in mem_obs.content.strip().split('\n')]
288
- logger.info(
289
- f'Top 5 memory-consuming processes (iteration {i}):\n{"- " + "\n- ".join(_top_processes)}'
290
- )
291
- iteration_stats['top_processes'] = _top_processes
292
-
293
- # Check tmux memory usage (in KB)
294
- mem_action = CmdRunAction(
295
- 'ps aux | awk \'{printf "%8.1f MB %s\\n", $6/1024, $0}\' | sort -nr | grep "/usr/bin/tmux" | grep -v grep | awk \'{print $1}\''
296
- )
297
- mem_obs = runtime.run_action(mem_action)
298
- assert mem_obs.exit_code == 0
299
- logger.info(
300
- f'Tmux memory usage (iteration {i}): {mem_obs.content.strip()} KB'
301
- )
302
- try:
303
- iteration_stats['tmux_memory_mb'] = float(mem_obs.content.strip())
304
- except (ValueError, AttributeError):
305
- iteration_stats['tmux_memory_mb'] = None
306
-
307
- # Check action_execution_server mem
308
- mem_action = CmdRunAction(
309
- 'ps aux | awk \'{printf "%8.1f MB %s\\n", $6/1024, $0}\' | sort -nr | grep "action_execution_server" | grep "/openhands/poetry" | grep -v grep | awk \'{print $1}\''
310
- )
311
- mem_obs = runtime.run_action(mem_action)
312
- assert mem_obs.exit_code == 0
313
- logger.info(
314
- f'Action execution server memory usage (iteration {i}): {mem_obs.content.strip()} MB'
315
- )
316
- try:
317
- iteration_stats['action_server_memory_mb'] = float(
318
- mem_obs.content.strip()
319
- )
320
- except (ValueError, AttributeError):
321
- iteration_stats['action_server_memory_mb'] = None
322
-
323
- # Test soft timeout
324
- action = CmdRunAction(
325
- 'read -p "Do you want to continue? [Y/n] " answer; if [[ $answer == "Y" ]]; then echo "Proceeding with operation..."; echo "Operation completed successfully!"; else echo "Operation cancelled."; exit 1; fi'
326
- )
327
- obs = runtime.run_action(action)
328
- assert 'Do you want to continue?' in obs.content
329
- assert obs.exit_code == -1 # Command is still running, waiting for input
330
-
331
- # Send the confirmation
332
- action = CmdRunAction('Y', is_input=True)
333
- obs = runtime.run_action(action)
334
- assert 'Proceeding with operation...' in obs.content
335
- assert 'Operation completed successfully!' in obs.content
336
- assert obs.exit_code == 0
337
- assert '[The command completed with exit code 0.]' in obs.metadata.suffix
338
-
339
- # Test hard timeout w/ long output
340
- # Generate long output with 1000 asterisks per line
341
- action = CmdRunAction(
342
- f'export i={i}; for j in $(seq 1 100); do echo "Line $j - Iteration $i - $(printf \'%1000s\' | tr " " "*")"; sleep 1; done'
343
- )
344
- action.set_hard_timeout(2)
345
- obs = runtime.run_action(action)
346
-
347
- # Verify the output
348
- assert obs.exit_code == -1
349
- assert f'Line 1 - Iteration {i}' in obs.content
350
-
351
- # Because hard-timeout is triggered, the terminal will in a weird state
352
- # where it will not accept any new commands.
353
- obs = runtime.run_action(CmdRunAction('ls'))
354
- assert obs.exit_code == -1
355
- assert 'The previous command is still running' in obs.metadata.suffix
356
-
357
- # We need to send a Ctrl+C to reset the terminal.
358
- obs = runtime.run_action(CmdRunAction('C-c', is_input=True))
359
- assert obs.exit_code == 130
360
-
361
- # Now make sure the terminal is in a good state
362
- obs = runtime.run_action(CmdRunAction('ls'))
363
- assert obs.exit_code == 0
364
-
365
- duration = time.time() - start_time
366
- iteration_stats['duration'] = duration
367
- logger.info(f'Completed iteration {i} in {duration:.2f} seconds')
368
-
369
- finally:
370
- runtime.close()
371
-
372
-
373
- @pytest.mark.skipif(
374
- TEST_IN_CI,
375
- reason='This test should only be run locally, not in CI.',
376
- )
377
- def test_stress_runtime_memory_limits():
378
- """Test runtime behavior under resource constraints."""
379
- config = get_config()
380
-
381
- # For Docker runtime, add resource constraints
382
- if config.runtime == 'docker':
383
- config.sandbox.docker_runtime_kwargs = {
384
- 'cpu_period': 100000, # 100ms
385
- 'cpu_quota': 100000, # Can use 100ms out of each 100ms period (1 CPU)
386
- 'mem_limit': '4G', # 4 GB of memory
387
- 'memswap_limit': '0', # No swap
388
- 'mem_swappiness': 0, # Disable swapping
389
- 'oom_kill_disable': False, # Enable OOM killer
390
- }
391
- config.sandbox.runtime_startup_env_vars = {
392
- 'RUNTIME_MAX_MEMORY_GB': '3',
393
- 'RUNTIME_MEMORY_MONITOR': 'true',
394
- }
395
-
396
- try:
397
- runtime = create_runtime(config, headless_mode=True)
398
- call_async_from_sync(runtime.connect)
399
-
400
- # Install stress-ng
401
- action = CmdRunAction(
402
- command='sudo apt-get update && sudo apt-get install -y stress-ng'
403
- )
404
- logger.info(action, extra={'msg_type': 'ACTION'})
405
- obs = runtime.run_action(action)
406
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
407
- assert obs.exit_code == 0
408
-
409
- action = CmdRunAction(
410
- command='stress-ng --vm 1 --vm-bytes 6G --timeout 1m --metrics'
411
- )
412
- action.set_hard_timeout(120)
413
- logger.info(action, extra={'msg_type': 'ACTION'})
414
- obs = runtime.run_action(action)
415
- logger.info(obs, extra={'msg_type': 'OBSERVATION'})
416
- assert 'aborted early, out of system resources' in obs.content
417
- assert obs.exit_code == 3 # OOM killed!
418
-
419
- finally:
420
- runtime.close()
421
-
422
-
423
- @pytest.mark.skipif(
424
- TEST_IN_CI,
425
- reason='This test should only be run locally, not in CI.',
426
- )
427
- def test_stress_runtime_memory_limits_with_repeated_file_edit():
428
- """Test runtime behavior under resource constraints with repeated file edits."""
429
- config = get_config()
430
-
431
- # For Docker runtime, add resource constraints
432
- if config.runtime == 'docker':
433
- config.sandbox.docker_runtime_kwargs = {
434
- 'cpu_period': 100000, # 100ms
435
- 'cpu_quota': 100000, # Can use 100ms out of each 100ms period (1 CPU)
436
- 'mem_limit': '4G', # 4 GB of memory
437
- 'memswap_limit': '0', # No swap
438
- 'mem_swappiness': 0, # Disable swapping
439
- 'oom_kill_disable': False, # Enable OOM killer
440
- }
441
- config.sandbox.runtime_startup_env_vars = {
442
- 'RUNTIME_MAX_MEMORY_GB': '3',
443
- 'RUNTIME_MEMORY_MONITOR': 'true',
444
- }
445
-
446
- try:
447
- runtime = create_runtime(config, headless_mode=True)
448
- call_async_from_sync(runtime.connect)
449
-
450
- # Create initial test file with base content
451
- test_file = '/tmp/test_file.txt'
452
- # base_content = 'content_1\n' * 1000 # Create a reasonably sized file
453
- base_content = ''
454
- for i in range(1000):
455
- base_content += f'content_{i:03d}\n'
456
-
457
- # Use FileWriteAction to create initial file
458
- write_action = FileWriteAction(path=test_file, content=base_content)
459
- obs = runtime.run_action(write_action)
460
-
461
- # Perform repeated file edits
462
- for i in range(1000):
463
- # Use FileEditAction with str_replace instead of IPythonRunCellAction
464
- edit_action = FileEditAction(
465
- command='str_replace',
466
- path=test_file,
467
- old_str=f'content_{i:03d}',
468
- new_str=f'-content_{i:03d}',
469
- )
470
- obs = runtime.run_action(edit_action)
471
- assert f'The file {test_file} has been edited' in obs.content, (
472
- f'Edit failed at iteration {i}'
473
- )
474
- logger.info(f'finished iteration {i}')
475
-
476
- # Verify final file state using FileEditAction view command
477
- action = FileEditAction(command='view', path=test_file)
478
- obs = runtime.run_action(action)
479
- assert '-content_999' in obs.content, 'Final content verification failed'
480
- logger.info('Final file content verified successfully')
481
-
482
- finally:
483
- runtime.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/runtime/trajs/basic.json DELETED
@@ -1,202 +0,0 @@
1
- [
2
- {
3
- "id": 0,
4
- "timestamp": "2025-01-27T22:44:56.900854",
5
- "source": "user",
6
- "message": "Please create a simple 2048 game. No need to run it. Please finish the interaction after your finish coding.",
7
- "action": "message",
8
- "args": {
9
- "content": "Please create a simple 2048 game. No need to run it. Please finish the interaction after your finish coding.",
10
- "image_urls": null,
11
- "wait_for_response": false
12
- },
13
- "timeout": 120
14
- },
15
- {
16
- "id": 3,
17
- "timestamp": "2025-01-27T22:45:13.764487",
18
- "source": "agent",
19
- "message": "",
20
- "action": "edit",
21
- "tool_call_metadata": {
22
- "function_name": "str_replace_editor",
23
- "tool_call_id": "toolu_017nBp7UfiGqyrA7j77HgtFP",
24
- "model_response": {
25
- "id": "chatcmpl-6814daca-5c62-431d-8c88-9dafead244f1",
26
- "created": 1738046713,
27
- "model": "claude-3-5-sonnet-20241022",
28
- "object": "chat.completion",
29
- "system_fingerprint": null,
30
- "choices": [
31
- {
32
- "finish_reason": "tool_calls",
33
- "index": 0,
34
- "message": {
35
- "content": "I'll help you create a simple 2048 game in Python. I'll use the str_replace_editor to create the file.",
36
- "role": "assistant",
37
- "tool_calls": [
38
- {
39
- "index": 1,
40
- "function": {
41
- "arguments": "{\"command\": \"create\", \"path\": \"/workspace/game_2048.py\", \"file_text\": \"import random\\nimport keyboard\\n\\nclass Game2048:\\n def __init__(self):\\n self.board = [[0] * 4 for _ in range(4)]\\n self.score = 0\\n self.spawn_number()\\n self.spawn_number()\\n\\n def spawn_number(self):\\n empty_cells = [(i, j) for i in range(4) for j in range(4) if self.board[i][j] == 0]\\n if empty_cells:\\n i, j = random.choice(empty_cells)\\n self.board[i][j] = 2 if random.random() < 0.9 else 4\\n\\n def move(self, direction):\\n merged = [[False] * 4 for _ in range(4)]\\n moved = False\\n\\n if direction in ['left', 'right']:\\n for i in range(4):\\n row = self.board[i][:]\\n new_row = self.merge_line(row, direction == 'right')\\n if new_row != row:\\n moved = True\\n self.board[i] = new_row\\n\\n elif direction in ['up', 'down']:\\n for j in range(4):\\n col = [self.board[i][j] for i in range(4)]\\n new_col = self.merge_line(col, direction == 'down')\\n if new_col != col:\\n moved = True\\n for i in range(4):\\n self.board[i][j] = new_col[i]\\n\\n if moved:\\n self.spawn_number()\\n\\n def merge_line(self, line, reverse=False):\\n if reverse:\\n line = line[::-1]\\n\\n new_line = [0] * 4\\n pos = 0\\n \\n # Remove zeros and merge equal numbers\\n numbers = [x for x in line if x != 0]\\n i = 0\\n while i < len(numbers):\\n if i + 1 < len(numbers) and numbers[i] == numbers[i + 1]:\\n new_line[pos] = numbers[i] * 2\\n self.score += numbers[i] * 2\\n i += 2\\n else:\\n new_line[pos] = numbers[i]\\n i += 1\\n pos += 1\\n\\n if reverse:\\n new_line = new_line[::-1]\\n \\n return new_line\\n\\n def is_game_over(self):\\n # Check if there are any empty cells\\n if any(0 in row for row in self.board):\\n return False\\n\\n # Check if there are any possible merges\\n for i in range(4):\\n for j in range(4):\\n current = self.board[i][j]\\n # Check right neighbor\\n if j < 3 and current == self.board[i][j + 1]:\\n return False\\n # Check bottom neighbor\\n if i < 3 and current == self.board[i + 1][j]:\\n return False\\n return True\\n\\n def display(self):\\n print(f\\\"\\\\nScore: {self.score}\\\\n\\\")\\n for row in self.board:\\n print(\\\" \\\".join(f\\\"{num:4}\\\" for num in row))\\n print(\\\"\\\\n\\\")\\n\\ndef main():\\n game = Game2048()\\n \\n print(\\\"Use arrow keys to move. Press 'q' to quit.\\\")\\n \\n while True:\\n game.display()\\n \\n if game.is_game_over():\\n print(\\\"Game Over!\\\")\\n break\\n \\n event = keyboard.read_event(suppress=True)\\n if event.event_type != 'down':\\n continue\\n \\n if event.name == 'q':\\n break\\n elif event.name == 'left':\\n game.move('left')\\n elif event.name == 'right':\\n game.move('right')\\n elif event.name == 'up':\\n game.move('up')\\n elif event.name == 'down':\\n game.move('down')\\n\\nif __name__ == \\\"__main__\\\":\\n main()\"}",
42
- "name": "str_replace_editor"
43
- },
44
- "id": "toolu_017nBp7UfiGqyrA7j77HgtFP",
45
- "type": "function"
46
- }
47
- ],
48
- "function_call": null
49
- }
50
- }
51
- ],
52
- "usage": {
53
- "completion_tokens": 1135,
54
- "prompt_tokens": 3486,
55
- "total_tokens": 4621,
56
- "completion_tokens_details": null,
57
- "prompt_tokens_details": {
58
- "audio_tokens": null,
59
- "cached_tokens": 0
60
- },
61
- "cache_creation_input_tokens": 3482,
62
- "cache_read_input_tokens": 0
63
- },
64
- "service_tier": null
65
- },
66
- "total_calls_in_response": 1
67
- },
68
- "args": {
69
- "path": "/workspace/game_2048.py",
70
- "content": "",
71
- "start": 1,
72
- "end": -1,
73
- "thought": "I'll help you create a simple 2048 game in Python. I'll use the str_replace_editor to create the file.",
74
- "impl_source": "oh_aci",
75
- "translated_ipython_code": "print(file_editor(**{'command': 'create', 'path': '/workspace/game_2048.py', 'file_text': 'import random\\nimport keyboard\\n\\nclass Game2048:\\n def __init__(self):\\n self.board = [[0] * 4 for _ in range(4)]\\n self.score = 0\\n self.spawn_number()\\n self.spawn_number()\\n\\n def spawn_number(self):\\n empty_cells = [(i, j) for i in range(4) for j in range(4) if self.board[i][j] == 0]\\n if empty_cells:\\n i, j = random.choice(empty_cells)\\n self.board[i][j] = 2 if random.random() < 0.9 else 4\\n\\n def move(self, direction):\\n merged = [[False] * 4 for _ in range(4)]\\n moved = False\\n\\n if direction in [\\'left\\', \\'right\\']:\\n for i in range(4):\\n row = self.board[i][:]\\n new_row = self.merge_line(row, direction == \\'right\\')\\n if new_row != row:\\n moved = True\\n self.board[i] = new_row\\n\\n elif direction in [\\'up\\', \\'down\\']:\\n for j in range(4):\\n col = [self.board[i][j] for i in range(4)]\\n new_col = self.merge_line(col, direction == \\'down\\')\\n if new_col != col:\\n moved = True\\n for i in range(4):\\n self.board[i][j] = new_col[i]\\n\\n if moved:\\n self.spawn_number()\\n\\n def merge_line(self, line, reverse=False):\\n if reverse:\\n line = line[::-1]\\n\\n new_line = [0] * 4\\n pos = 0\\n \\n # Remove zeros and merge equal numbers\\n numbers = [x for x in line if x != 0]\\n i = 0\\n while i < len(numbers):\\n if i + 1 < len(numbers) and numbers[i] == numbers[i + 1]:\\n new_line[pos] = numbers[i] * 2\\n self.score += numbers[i] * 2\\n i += 2\\n else:\\n new_line[pos] = numbers[i]\\n i += 1\\n pos += 1\\n\\n if reverse:\\n new_line = new_line[::-1]\\n \\n return new_line\\n\\n def is_game_over(self):\\n # Check if there are any empty cells\\n if any(0 in row for row in self.board):\\n return False\\n\\n # Check if there are any possible merges\\n for i in range(4):\\n for j in range(4):\\n current = self.board[i][j]\\n # Check right neighbor\\n if j < 3 and current == self.board[i][j + 1]:\\n return False\\n # Check bottom neighbor\\n if i < 3 and current == self.board[i + 1][j]:\\n return False\\n return True\\n\\n def display(self):\\n print(f\"\\\\nScore: {self.score}\\\\n\")\\n for row in self.board:\\n print(\" \".join(f\"{num:4}\" for num in row))\\n print(\"\\\\n\")\\n\\ndef main():\\n game = Game2048()\\n \\n print(\"Use arrow keys to move. Press \\'q\\' to quit.\")\\n \\n while True:\\n game.display()\\n \\n if game.is_game_over():\\n print(\"Game Over!\")\\n break\\n \\n event = keyboard.read_event(suppress=True)\\n if event.event_type != \\'down\\':\\n continue\\n \\n if event.name == \\'q\\':\\n break\\n elif event.name == \\'left\\':\\n game.move(\\'left\\')\\n elif event.name == \\'right\\':\\n game.move(\\'right\\')\\n elif event.name == \\'up\\':\\n game.move(\\'up\\')\\n elif event.name == \\'down\\':\\n game.move(\\'down\\')\\n\\nif __name__ == \"__main__\":\\n main()'}))"
76
- },
77
- "timeout": 120
78
- },
79
- {
80
- "id": 4,
81
- "timestamp": "2025-01-27T22:45:13.801384",
82
- "source": "agent",
83
- "message": "I edited the file /workspace/game_2048.py.",
84
- "cause": 3,
85
- "observation": "edit",
86
- "tool_call_metadata": {
87
- "function_name": "str_replace_editor",
88
- "tool_call_id": "toolu_017nBp7UfiGqyrA7j77HgtFP",
89
- "model_response": {
90
- "id": "chatcmpl-6814daca-5c62-431d-8c88-9dafead244f1",
91
- "created": 1738046713,
92
- "model": "claude-3-5-sonnet-20241022",
93
- "object": "chat.completion",
94
- "system_fingerprint": null,
95
- "choices": [
96
- {
97
- "finish_reason": "tool_calls",
98
- "index": 0,
99
- "message": {
100
- "content": "I'll help you create a simple 2048 game in Python. I'll use the str_replace_editor to create the file.",
101
- "role": "assistant",
102
- "tool_calls": [
103
- {
104
- "index": 1,
105
- "function": {
106
- "arguments": "{\"command\": \"create\", \"path\": \"/workspace/game_2048.py\", \"file_text\": \"import random\\nimport keyboard\\n\\nclass Game2048:\\n def __init__(self):\\n self.board = [[0] * 4 for _ in range(4)]\\n self.score = 0\\n self.spawn_number()\\n self.spawn_number()\\n\\n def spawn_number(self):\\n empty_cells = [(i, j) for i in range(4) for j in range(4) if self.board[i][j] == 0]\\n if empty_cells:\\n i, j = random.choice(empty_cells)\\n self.board[i][j] = 2 if random.random() < 0.9 else 4\\n\\n def move(self, direction):\\n merged = [[False] * 4 for _ in range(4)]\\n moved = False\\n\\n if direction in ['left', 'right']:\\n for i in range(4):\\n row = self.board[i][:]\\n new_row = self.merge_line(row, direction == 'right')\\n if new_row != row:\\n moved = True\\n self.board[i] = new_row\\n\\n elif direction in ['up', 'down']:\\n for j in range(4):\\n col = [self.board[i][j] for i in range(4)]\\n new_col = self.merge_line(col, direction == 'down')\\n if new_col != col:\\n moved = True\\n for i in range(4):\\n self.board[i][j] = new_col[i]\\n\\n if moved:\\n self.spawn_number()\\n\\n def merge_line(self, line, reverse=False):\\n if reverse:\\n line = line[::-1]\\n\\n new_line = [0] * 4\\n pos = 0\\n \\n # Remove zeros and merge equal numbers\\n numbers = [x for x in line if x != 0]\\n i = 0\\n while i < len(numbers):\\n if i + 1 < len(numbers) and numbers[i] == numbers[i + 1]:\\n new_line[pos] = numbers[i] * 2\\n self.score += numbers[i] * 2\\n i += 2\\n else:\\n new_line[pos] = numbers[i]\\n i += 1\\n pos += 1\\n\\n if reverse:\\n new_line = new_line[::-1]\\n \\n return new_line\\n\\n def is_game_over(self):\\n # Check if there are any empty cells\\n if any(0 in row for row in self.board):\\n return False\\n\\n # Check if there are any possible merges\\n for i in range(4):\\n for j in range(4):\\n current = self.board[i][j]\\n # Check right neighbor\\n if j < 3 and current == self.board[i][j + 1]:\\n return False\\n # Check bottom neighbor\\n if i < 3 and current == self.board[i + 1][j]:\\n return False\\n return True\\n\\n def display(self):\\n print(f\\\"\\\\nScore: {self.score}\\\\n\\\")\\n for row in self.board:\\n print(\\\" \\\".join(f\\\"{num:4}\\\" for num in row))\\n print(\\\"\\\\n\\\")\\n\\ndef main():\\n game = Game2048()\\n \\n print(\\\"Use arrow keys to move. Press 'q' to quit.\\\")\\n \\n while True:\\n game.display()\\n \\n if game.is_game_over():\\n print(\\\"Game Over!\\\")\\n break\\n \\n event = keyboard.read_event(suppress=True)\\n if event.event_type != 'down':\\n continue\\n \\n if event.name == 'q':\\n break\\n elif event.name == 'left':\\n game.move('left')\\n elif event.name == 'right':\\n game.move('right')\\n elif event.name == 'up':\\n game.move('up')\\n elif event.name == 'down':\\n game.move('down')\\n\\nif __name__ == \\\"__main__\\\":\\n main()\"}",
107
- "name": "str_replace_editor"
108
- },
109
- "id": "toolu_017nBp7UfiGqyrA7j77HgtFP",
110
- "type": "function"
111
- }
112
- ],
113
- "function_call": null
114
- }
115
- }
116
- ],
117
- "usage": {
118
- "completion_tokens": 1135,
119
- "prompt_tokens": 3486,
120
- "total_tokens": 4621,
121
- "completion_tokens_details": null,
122
- "prompt_tokens_details": {
123
- "audio_tokens": null,
124
- "cached_tokens": 0
125
- },
126
- "cache_creation_input_tokens": 3482,
127
- "cache_read_input_tokens": 0
128
- },
129
- "service_tier": null
130
- },
131
- "total_calls_in_response": 1
132
- },
133
- "content": "--- /workspace/game_2048.py\n+++ /workspace/game_2048.py\n@@ -0,0 +1,2 @@\n+import random\n+import keyboard\n@@ -1,0 +4,114 @@\n+class Game2048:\n+ def __init__(self):\n+ self.board = [[0] * 4 for _ in range(4)]\n+ self.score = 0\n+ self.spawn_number()\n+ self.spawn_number()\n+\n+ def spawn_number(self):\n+ empty_cells = [(i, j) for i in range(4) for j in range(4) if self.board[i][j] == 0]\n+ if empty_cells:\n+ i, j = random.choice(empty_cells)\n+ self.board[i][j] = 2 if random.random() < 0.9 else 4\n+\n+ def move(self, direction):\n+ merged = [[False] * 4 for _ in range(4)]\n+ moved = False\n+\n+ if direction in ['left', 'right']:\n+ for i in range(4):\n+ row = self.board[i][:]\n+ new_row = self.merge_line(row, direction == 'right')\n+ if new_row != row:\n+ moved = True\n+ self.board[i] = new_row\n+\n+ elif direction in ['up', 'down']:\n+ for j in range(4):\n+ col = [self.board[i][j] for i in range(4)]\n+ new_col = self.merge_line(col, direction == 'down')\n+ if new_col != col:\n+ moved = True\n+ for i in range(4):\n+ self.board[i][j] = new_col[i]\n+\n+ if moved:\n+ self.spawn_number()\n+\n+ def merge_line(self, line, reverse=False):\n+ if reverse:\n+ line = line[::-1]\n+\n+ new_line = [0] * 4\n+ pos = 0\n+\n+ # Remove zeros and merge equal numbers\n+ numbers = [x for x in line if x != 0]\n+ i = 0\n+ while i < len(numbers):\n+ if i + 1 < len(numbers) and numbers[i] == numbers[i + 1]:\n+ new_line[pos] = numbers[i] * 2\n+ self.score += numbers[i] * 2\n+ i += 2\n+ else:\n+ new_line[pos] = numbers[i]\n+ i += 1\n+ pos += 1\n+\n+ if reverse:\n+ new_line = new_line[::-1]\n+\n+ return new_line\n+\n+ def is_game_over(self):\n+ # Check if there are any empty cells\n+ if any(0 in row for row in self.board):\n+ return False\n+\n+ # Check if there are any possible merges\n+ for i in range(4):\n+ for j in range(4):\n+ current = self.board[i][j]\n+ # Check right neighbor\n+ if j < 3 and current == self.board[i][j + 1]:\n+ return False\n+ # Check bottom neighbor\n+ if i < 3 and current == self.board[i + 1][j]:\n+ return False\n+ return True\n+\n+ def display(self):\n+ print(f\"\\nScore: {self.score}\\n\")\n+ for row in self.board:\n+ print(\" \".join(f\"{num:4}\" for num in row))\n+ print(\"\\n\")\n+\n+def main():\n+ game = Game2048()\n+\n+ print(\"Use arrow keys to move. Press 'q' to quit.\")\n+\n+ while True:\n+ game.display()\n+\n+ if game.is_game_over():\n+ print(\"Game Over!\")\n+ break\n+\n+ event = keyboard.read_event(suppress=True)\n+ if event.event_type != 'down':\n+ continue\n+\n+ if event.name == 'q':\n+ break\n+ elif event.name == 'left':\n+ game.move('left')\n+ elif event.name == 'right':\n+ game.move('right')\n+ elif event.name == 'up':\n+ game.move('up')\n+ elif event.name == 'down':\n+ game.move('down')\n+\n+if __name__ == \"__main__\":\n+ main()",
134
- "extras": {
135
- "path": "/workspace/game_2048.py",
136
- "prev_exist": false,
137
- "old_content": null,
138
- "new_content": "import random\nimport keyboard\n\nclass Game2048:\n def __init__(self):\n self.board = [[0] * 4 for _ in range(4)]\n self.score = 0\n self.spawn_number()\n self.spawn_number()\n\n def spawn_number(self):\n empty_cells = [(i, j) for i in range(4) for j in range(4) if self.board[i][j] == 0]\n if empty_cells:\n i, j = random.choice(empty_cells)\n self.board[i][j] = 2 if random.random() < 0.9 else 4\n\n def move(self, direction):\n merged = [[False] * 4 for _ in range(4)]\n moved = False\n\n if direction in ['left', 'right']:\n for i in range(4):\n row = self.board[i][:]\n new_row = self.merge_line(row, direction == 'right')\n if new_row != row:\n moved = True\n self.board[i] = new_row\n\n elif direction in ['up', 'down']:\n for j in range(4):\n col = [self.board[i][j] for i in range(4)]\n new_col = self.merge_line(col, direction == 'down')\n if new_col != col:\n moved = True\n for i in range(4):\n self.board[i][j] = new_col[i]\n\n if moved:\n self.spawn_number()\n\n def merge_line(self, line, reverse=False):\n if reverse:\n line = line[::-1]\n\n new_line = [0] * 4\n pos = 0\n \n # Remove zeros and merge equal numbers\n numbers = [x for x in line if x != 0]\n i = 0\n while i < len(numbers):\n if i + 1 < len(numbers) and numbers[i] == numbers[i + 1]:\n new_line[pos] = numbers[i] * 2\n self.score += numbers[i] * 2\n i += 2\n else:\n new_line[pos] = numbers[i]\n i += 1\n pos += 1\n\n if reverse:\n new_line = new_line[::-1]\n \n return new_line\n\n def is_game_over(self):\n # Check if there are any empty cells\n if any(0 in row for row in self.board):\n return False\n\n # Check if there are any possible merges\n for i in range(4):\n for j in range(4):\n current = self.board[i][j]\n # Check right neighbor\n if j < 3 and current == self.board[i][j + 1]:\n return False\n # Check bottom neighbor\n if i < 3 and current == self.board[i + 1][j]:\n return False\n return True\n\n def display(self):\n print(f\"\\nScore: {self.score}\\n\")\n for row in self.board:\n print(\" \".join(f\"{num:4}\" for num in row))\n print(\"\\n\")\n\ndef main():\n game = Game2048()\n \n print(\"Use arrow keys to move. Press 'q' to quit.\")\n \n while True:\n game.display()\n \n if game.is_game_over():\n print(\"Game Over!\")\n break\n \n event = keyboard.read_event(suppress=True)\n if event.event_type != 'down':\n continue\n \n if event.name == 'q':\n break\n elif event.name == 'left':\n game.move('left')\n elif event.name == 'right':\n game.move('right')\n elif event.name == 'up':\n game.move('up')\n elif event.name == 'down':\n game.move('down')\n\nif __name__ == \"__main__\":\n main()",
139
- "impl_source": "oh_aci",
140
- "formatted_output_and_error": "File created successfully at: /workspace/game_2048.py"
141
- }
142
- },
143
- {
144
- "id": 5,
145
- "timestamp": "2025-01-27T22:45:18.405685",
146
- "source": "agent",
147
- "message": "I've created a simple implementation of the 2048 game with the following features:\n\n1. A 4x4 game board\n2. Arrow key controls for moving tiles\n3. Score tracking\n4. Game over detection\n5. Random spawning of new numbers (2 or 4)\n6. Merging of equal numbers when moving\n\nTo run the game, you would need to:\n1. Install the keyboard package (`pip install keyboard`)\n2. Run the script with sudo privileges (required for keyboard input)\n\nThe game uses the arrow keys for movement and 'q' to quit. The game ends when no more moves are possible.",
148
- "action": "finish",
149
- "tool_call_metadata": {
150
- "function_name": "finish",
151
- "tool_call_id": "toolu_01BUKu3B7nY574atNECkBiFk",
152
- "model_response": {
153
- "id": "chatcmpl-27cc0ad9-6b86-4f1f-9f72-d9c7ab2282f7",
154
- "created": 1738046718,
155
- "model": "claude-3-5-sonnet-20241022",
156
- "object": "chat.completion",
157
- "system_fingerprint": null,
158
- "choices": [
159
- {
160
- "finish_reason": "tool_calls",
161
- "index": 0,
162
- "message": {
163
- "content": "I've created a simple implementation of the 2048 game with the following features:\n\n1. A 4x4 game board\n2. Arrow key controls for moving tiles\n3. Score tracking\n4. Game over detection\n5. Random spawning of new numbers (2 or 4)\n6. Merging of equal numbers when moving\n\nTo run the game, you would need to:\n1. Install the keyboard package (`pip install keyboard`)\n2. Run the script with sudo privileges (required for keyboard input)\n\nThe game uses the arrow keys for movement and 'q' to quit. The game ends when no more moves are possible.",
164
- "role": "assistant",
165
- "tool_calls": [
166
- {
167
- "index": 1,
168
- "function": {
169
- "arguments": "{}",
170
- "name": "finish"
171
- },
172
- "id": "toolu_01BUKu3B7nY574atNECkBiFk",
173
- "type": "function"
174
- }
175
- ],
176
- "function_call": null
177
- }
178
- }
179
- ],
180
- "usage": {
181
- "completion_tokens": 177,
182
- "prompt_tokens": 4646,
183
- "total_tokens": 4823,
184
- "completion_tokens_details": null,
185
- "prompt_tokens_details": {
186
- "audio_tokens": null,
187
- "cached_tokens": 3482
188
- },
189
- "cache_creation_input_tokens": 1158,
190
- "cache_read_input_tokens": 3482
191
- },
192
- "service_tier": null
193
- },
194
- "total_calls_in_response": 1
195
- },
196
- "args": {
197
- "outputs": {},
198
- "thought": "I've created a simple implementation of the 2048 game with the following features:\n\n1. A 4x4 game board\n2. Arrow key controls for moving tiles\n3. Score tracking\n4. Game over detection\n5. Random spawning of new numbers (2 or 4)\n6. Merging of equal numbers when moving\n\nTo run the game, you would need to:\n1. Install the keyboard package (`pip install keyboard`)\n2. Run the script with sudo privileges (required for keyboard input)\n\nThe game uses the arrow keys for movement and 'q' to quit. The game ends when no more moves are possible."
199
- },
200
- "timeout": 120
201
- }
202
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/runtime/trajs/basic_gui_mode.json DELETED
@@ -1,631 +0,0 @@
1
- [
2
- {
3
- "id": 0,
4
- "timestamp": "2025-01-20T20:29:13.519638",
5
- "source": "environment",
6
- "message": "",
7
- "observation": "agent_state_changed",
8
- "content": "",
9
- "extras": {
10
- "agent_state": "loading"
11
- }
12
- },
13
- {
14
- "id": 1,
15
- "timestamp": "2025-01-20T20:29:32.163218",
16
- "source": "environment",
17
- "message": "Agent state changed to init",
18
- "action": "change_agent_state",
19
- "args": {
20
- "agent_state": "init",
21
- "thought": ""
22
- }
23
- },
24
- {
25
- "id": 2,
26
- "timestamp": "2025-01-20T20:29:32.165837",
27
- "source": "environment",
28
- "message": "No observation",
29
- "cause": 1,
30
- "observation": "null",
31
- "content": "",
32
- "extras": {}
33
- },
34
- {
35
- "id": 3,
36
- "timestamp": "2025-01-20T20:29:32.176175",
37
- "source": "environment",
38
- "message": "",
39
- "observation": "agent_state_changed",
40
- "content": "",
41
- "extras": {
42
- "agent_state": "init"
43
- }
44
- },
45
- {
46
- "id": 4,
47
- "timestamp": "2025-01-20T20:29:32.262843",
48
- "source": "user",
49
- "message": "I want to create a VueJS app that allows me to:\n* See all the items on my todo list\n* add a new item to the list\n* mark an item as done\n* totally remove an item from the list\n* change the text of an item\n* set a due date on the item\n\nThis should be a client-only app with no backend. The list should persist in localStorage.",
50
- "action": "message",
51
- "args": {
52
- "content": "I want to create a VueJS app that allows me to:\n* See all the items on my todo list\n* add a new item to the list\n* mark an item as done\n* totally remove an item from the list\n* change the text of an item\n* set a due date on the item\n\nThis should be a client-only app with no backend. The list should persist in localStorage.",
53
- "image_urls": [],
54
- "wait_for_response": false
55
- }
56
- },
57
- {
58
- "id": 5,
59
- "timestamp": "2025-01-20T20:29:32.286160",
60
- "source": "environment",
61
- "message": "",
62
- "observation": "agent_state_changed",
63
- "content": "",
64
- "extras": {
65
- "agent_state": "running"
66
- }
67
- },
68
- {
69
- "id": 6,
70
- "timestamp": "2025-01-20T20:29:32.289136",
71
- "source": "user",
72
- "message": "No observation",
73
- "cause": 4,
74
- "observation": "null",
75
- "content": "",
76
- "extras": {}
77
- },
78
- {
79
- "id": 7,
80
- "timestamp": "2025-01-20T20:29:35.040676",
81
- "source": "agent",
82
- "message": "Running command: mkdir -p /workspace/todo-app",
83
- "action": "run",
84
- "tool_call_metadata": {
85
- "function_name": "execute_bash",
86
- "tool_call_id": "toolu_01W9Z8jBctr8X2frZV9p1RYs",
87
- "model_response": {
88
- "id": "chatcmpl-6bb9f175-c587-44b4-8c8d-f91fc4827590",
89
- "created": 1737433775,
90
- "model": "claude-3-5-sonnet-20241022",
91
- "object": "chat.completion",
92
- "system_fingerprint": null,
93
- "choices": [
94
- {
95
- "finish_reason": "tool_calls",
96
- "index": 0,
97
- "message": {
98
- "content": "I'll help you create a Vue.js application with all the requested todo list functionality. Let's create this step by step.\n\nFirst, let's create a new directory and set up the basic files:",
99
- "role": "assistant",
100
- "tool_calls": [
101
- {
102
- "index": 1,
103
- "function": {
104
- "arguments": "{\"command\": \"mkdir -p /workspace/todo-app\"}",
105
- "name": "execute_bash"
106
- },
107
- "id": "toolu_01W9Z8jBctr8X2frZV9p1RYs",
108
- "type": "function"
109
- }
110
- ],
111
- "function_call": null
112
- }
113
- }
114
- ],
115
- "usage": {
116
- "completion_tokens": 104,
117
- "prompt_tokens": 3439,
118
- "total_tokens": 3543,
119
- "completion_tokens_details": null,
120
- "prompt_tokens_details": {
121
- "audio_tokens": null,
122
- "cached_tokens": 0
123
- },
124
- "cache_creation_input_tokens": 3435,
125
- "cache_read_input_tokens": 0
126
- },
127
- "service_tier": null
128
- },
129
- "total_calls_in_response": 1
130
- },
131
- "args": {
132
- "command": "mkdir -p /workspace/todo-app",
133
- "is_input": false,
134
- "thought": "I'll help you create a Vue.js application with all the requested todo list functionality. Let's create this step by step.\n\nFirst, let's create a new directory and set up the basic files:",
135
- "blocking": false,
136
- "hidden": false,
137
- "confirmation_state": "confirmed"
138
- }
139
- },
140
- {
141
- "id": 8,
142
- "timestamp": "2025-01-20T20:29:35.699033",
143
- "source": "agent",
144
- "message": "Command `mkdir -p /workspace/todo-app` executed with exit code 0.",
145
- "cause": 7,
146
- "observation": "run",
147
- "tool_call_metadata": {
148
- "function_name": "execute_bash",
149
- "tool_call_id": "toolu_01W9Z8jBctr8X2frZV9p1RYs",
150
- "model_response": {
151
- "id": "chatcmpl-6bb9f175-c587-44b4-8c8d-f91fc4827590",
152
- "created": 1737433775,
153
- "model": "claude-3-5-sonnet-20241022",
154
- "object": "chat.completion",
155
- "system_fingerprint": null,
156
- "choices": [
157
- {
158
- "finish_reason": "tool_calls",
159
- "index": 0,
160
- "message": {
161
- "content": "I'll help you create a Vue.js application with all the requested todo list functionality. Let's create this step by step.\n\nFirst, let's create a new directory and set up the basic files:",
162
- "role": "assistant",
163
- "tool_calls": [
164
- {
165
- "index": 1,
166
- "function": {
167
- "arguments": "{\"command\": \"mkdir -p /workspace/todo-app\"}",
168
- "name": "execute_bash"
169
- },
170
- "id": "toolu_01W9Z8jBctr8X2frZV9p1RYs",
171
- "type": "function"
172
- }
173
- ],
174
- "function_call": null
175
- }
176
- }
177
- ],
178
- "usage": {
179
- "completion_tokens": 104,
180
- "prompt_tokens": 3439,
181
- "total_tokens": 3543,
182
- "completion_tokens_details": null,
183
- "prompt_tokens_details": {
184
- "audio_tokens": null,
185
- "cached_tokens": 0
186
- },
187
- "cache_creation_input_tokens": 3435,
188
- "cache_read_input_tokens": 0
189
- },
190
- "service_tier": null
191
- },
192
- "total_calls_in_response": 1
193
- },
194
- "content": "",
195
- "extras": {
196
- "command": "mkdir -p /workspace/todo-app",
197
- "metadata": {
198
- "exit_code": 0,
199
- "pid": -1,
200
- "username": "openhands",
201
- "hostname": "18f4db943d2f",
202
- "working_dir": "/workspace",
203
- "py_interpreter_path": "/openhands/poetry/openhands-ai-5O4_aCHf-py3.12/bin/python",
204
- "prefix": "",
205
- "suffix": "\n[The command completed with exit code 0.]"
206
- },
207
- "hidden": false
208
- },
209
- "success": true
210
- },
211
- {
212
- "id": 9,
213
- "timestamp": "2025-01-20T20:29:48.925379",
214
- "source": "agent",
215
- "message": "",
216
- "action": "edit",
217
- "tool_call_metadata": {
218
- "function_name": "str_replace_editor",
219
- "tool_call_id": "toolu_01YYLXwwdBLwtMmjr5Sfsieg",
220
- "model_response": {
221
- "id": "chatcmpl-55936480-cbf9-4d24-bd4a-329bd2f1f65c",
222
- "created": 1737433788,
223
- "model": "claude-3-5-sonnet-20241022",
224
- "object": "chat.completion",
225
- "system_fingerprint": null,
226
- "choices": [
227
- {
228
- "finish_reason": "tool_calls",
229
- "index": 0,
230
- "message": {
231
- "content": "Let's create the main HTML file:",
232
- "role": "assistant",
233
- "tool_calls": [
234
- {
235
- "index": 1,
236
- "function": {
237
- "arguments": "{\"command\": \"create\", \"path\": \"/workspace/todo-app/index.html\", \"file_text\": \"<!DOCTYPE html>\\n<html lang=\\\"en\\\">\\n<head>\\n <meta charset=\\\"UTF-8\\\">\\n <meta name=\\\"viewport\\\" content=\\\"width=device-width, initial-scale=1.0\\\">\\n <title>Vue Todo App</title>\\n <script src=\\\"https://unpkg.com/vue@3/dist/vue.global.js\\\"></script>\\n <link href=\\\"https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css\\\" rel=\\\"stylesheet\\\">\\n <style>\\n .completed {\\n text-decoration: line-through;\\n color: #6c757d;\\n }\\n .todo-item {\\n display: flex;\\n align-items: center;\\n gap: 1rem;\\n padding: 0.5rem;\\n border-bottom: 1px solid #dee2e6;\\n }\\n .todo-item:last-child {\\n border-bottom: none;\\n }\\n </style>\\n</head>\\n<body>\\n <div id=\\\"app\\\" class=\\\"container mt-5\\\">\\n <h1 class=\\\"mb-4\\\">Todo List</h1>\\n \\n <!-- Add new todo form -->\\n <div class=\\\"mb-4\\\">\\n <form @submit.prevent=\\\"addTodo\\\" class=\\\"row g-3\\\">\\n <div class=\\\"col-md-6\\\">\\n <input type=\\\"text\\\" v-model=\\\"newTodoText\\\" class=\\\"form-control\\\" placeholder=\\\"Enter new todo\\\" required>\\n </div>\\n <div class=\\\"col-md-4\\\">\\n <input type=\\\"date\\\" v-model=\\\"newTodoDueDate\\\" class=\\\"form-control\\\">\\n </div>\\n <div class=\\\"col-md-2\\\">\\n <button type=\\\"submit\\\" class=\\\"btn btn-primary w-100\\\">Add Todo</button>\\n </div>\\n </form>\\n </div>\\n\\n <!-- Todo list -->\\n <div class=\\\"card\\\">\\n <div class=\\\"card-body\\\">\\n <div v-if=\\\"todos.length === 0\\\" class=\\\"text-center text-muted\\\">\\n No todos yet. Add one above!\\n </div>\\n <div v-for=\\\"todo in todos\\\" :key=\\\"todo.id\\\" class=\\\"todo-item\\\">\\n <input type=\\\"checkbox\\\" \\n :checked=\\\"todo.completed\\\" \\n @change=\\\"toggleTodo(todo)\\\"\\n class=\\\"form-check-input\\\">\\n \\n <div v-if=\\\"editingTodo !== todo.id\\\" \\n :class=\\\"{ completed: todo.completed }\\\"\\n class=\\\"flex-grow-1\\\">\\n {{ todo.text }}\\n <small v-if=\\\"todo.dueDate\\\" class=\\\"text-muted ms-2\\\">\\n Due: {{ formatDate(todo.dueDate) }}\\n </small>\\n </div>\\n \\n <div v-else class=\\\"flex-grow-1 d-flex gap-2\\\">\\n <input type=\\\"text\\\" \\n v-model=\\\"editingText\\\" \\n class=\\\"form-control\\\"\\n @keyup.enter=\\\"saveTodoEdit(todo)\\\"\\n @keyup.esc=\\\"cancelEdit\\\">\\n <input type=\\\"date\\\" \\n v-model=\\\"editingDueDate\\\" \\n class=\\\"form-control\\\">\\n </div>\\n\\n <div class=\\\"btn-group\\\">\\n <button v-if=\\\"editingTodo !== todo.id\\\"\\n @click=\\\"startEdit(todo)\\\" \\n class=\\\"btn btn-sm btn-outline-secondary\\\">\\n Edit\\n </button>\\n <button v-else\\n @click=\\\"saveTodoEdit(todo)\\\" \\n class=\\\"btn btn-sm btn-outline-primary\\\">\\n Save\\n </button>\\n <button @click=\\\"removeTodo(todo)\\\" \\n class=\\\"btn btn-sm btn-outline-danger\\\">\\n Delete\\n </button>\\n </div>\\n </div>\\n </div>\\n </div>\\n </div>\\n\\n <script src=\\\"app.js\\\"></script>\\n</body>\\n</html>\"}",
238
- "name": "str_replace_editor"
239
- },
240
- "id": "toolu_01YYLXwwdBLwtMmjr5Sfsieg",
241
- "type": "function"
242
- }
243
- ],
244
- "function_call": null
245
- }
246
- }
247
- ],
248
- "usage": {
249
- "completion_tokens": 1142,
250
- "prompt_tokens": 3601,
251
- "total_tokens": 4743,
252
- "completion_tokens_details": null,
253
- "prompt_tokens_details": {
254
- "audio_tokens": null,
255
- "cached_tokens": 3435
256
- },
257
- "cache_creation_input_tokens": 160,
258
- "cache_read_input_tokens": 3435
259
- },
260
- "service_tier": null
261
- },
262
- "total_calls_in_response": 1
263
- },
264
- "args": {
265
- "path": "/workspace/todo-app/index.html",
266
- "content": "",
267
- "start": 1,
268
- "end": -1,
269
- "thought": "Let's create the main HTML file:",
270
- "impl_source": "oh_aci",
271
- "translated_ipython_code": "print(file_editor(**{'command': 'create', 'path': '/workspace/todo-app/index.html', 'file_text': '<!DOCTYPE html>\\n<html lang=\"en\">\\n<head>\\n <meta charset=\"UTF-8\">\\n <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\\n <title>Vue Todo App</title>\\n <script src=\"https://unpkg.com/vue@3/dist/vue.global.js\"></script>\\n <link href=\"https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css\" rel=\"stylesheet\">\\n <style>\\n .completed {\\n text-decoration: line-through;\\n color: #6c757d;\\n }\\n .todo-item {\\n display: flex;\\n align-items: center;\\n gap: 1rem;\\n padding: 0.5rem;\\n border-bottom: 1px solid #dee2e6;\\n }\\n .todo-item:last-child {\\n border-bottom: none;\\n }\\n </style>\\n</head>\\n<body>\\n <div id=\"app\" class=\"container mt-5\">\\n <h1 class=\"mb-4\">Todo List</h1>\\n \\n <!-- Add new todo form -->\\n <div class=\"mb-4\">\\n <form @submit.prevent=\"addTodo\" class=\"row g-3\">\\n <div class=\"col-md-6\">\\n <input type=\"text\" v-model=\"newTodoText\" class=\"form-control\" placeholder=\"Enter new todo\" required>\\n </div>\\n <div class=\"col-md-4\">\\n <input type=\"date\" v-model=\"newTodoDueDate\" class=\"form-control\">\\n </div>\\n <div class=\"col-md-2\">\\n <button type=\"submit\" class=\"btn btn-primary w-100\">Add Todo</button>\\n </div>\\n </form>\\n </div>\\n\\n <!-- Todo list -->\\n <div class=\"card\">\\n <div class=\"card-body\">\\n <div v-if=\"todos.length === 0\" class=\"text-center text-muted\">\\n No todos yet. Add one above!\\n </div>\\n <div v-for=\"todo in todos\" :key=\"todo.id\" class=\"todo-item\">\\n <input type=\"checkbox\" \\n :checked=\"todo.completed\" \\n @change=\"toggleTodo(todo)\"\\n class=\"form-check-input\">\\n \\n <div v-if=\"editingTodo !== todo.id\" \\n :class=\"{ completed: todo.completed }\"\\n class=\"flex-grow-1\">\\n {{ todo.text }}\\n <small v-if=\"todo.dueDate\" class=\"text-muted ms-2\">\\n Due: {{ formatDate(todo.dueDate) }}\\n </small>\\n </div>\\n \\n <div v-else class=\"flex-grow-1 d-flex gap-2\">\\n <input type=\"text\" \\n v-model=\"editingText\" \\n class=\"form-control\"\\n @keyup.enter=\"saveTodoEdit(todo)\"\\n @keyup.esc=\"cancelEdit\">\\n <input type=\"date\" \\n v-model=\"editingDueDate\" \\n class=\"form-control\">\\n </div>\\n\\n <div class=\"btn-group\">\\n <button v-if=\"editingTodo !== todo.id\"\\n @click=\"startEdit(todo)\" \\n class=\"btn btn-sm btn-outline-secondary\">\\n Edit\\n </button>\\n <button v-else\\n @click=\"saveTodoEdit(todo)\" \\n class=\"btn btn-sm btn-outline-primary\">\\n Save\\n </button>\\n <button @click=\"removeTodo(todo)\" \\n class=\"btn btn-sm btn-outline-danger\">\\n Delete\\n </button>\\n </div>\\n </div>\\n </div>\\n </div>\\n </div>\\n\\n <script src=\"app.js\"></script>\\n</body>\\n</html>'}))"
272
- }
273
- },
274
- {
275
- "id": 10,
276
- "timestamp": "2025-01-20T20:29:48.967655",
277
- "source": "agent",
278
- "message": "I edited the file /workspace/todo-app/index.html.",
279
- "cause": 9,
280
- "observation": "edit",
281
- "tool_call_metadata": {
282
- "function_name": "str_replace_editor",
283
- "tool_call_id": "toolu_01YYLXwwdBLwtMmjr5Sfsieg",
284
- "model_response": {
285
- "id": "chatcmpl-55936480-cbf9-4d24-bd4a-329bd2f1f65c",
286
- "created": 1737433788,
287
- "model": "claude-3-5-sonnet-20241022",
288
- "object": "chat.completion",
289
- "system_fingerprint": null,
290
- "choices": [
291
- {
292
- "finish_reason": "tool_calls",
293
- "index": 0,
294
- "message": {
295
- "content": "Let's create the main HTML file:",
296
- "role": "assistant",
297
- "tool_calls": [
298
- {
299
- "index": 1,
300
- "function": {
301
- "arguments": "{\"command\": \"create\", \"path\": \"/workspace/todo-app/index.html\", \"file_text\": \"<!DOCTYPE html>\\n<html lang=\\\"en\\\">\\n<head>\\n <meta charset=\\\"UTF-8\\\">\\n <meta name=\\\"viewport\\\" content=\\\"width=device-width, initial-scale=1.0\\\">\\n <title>Vue Todo App</title>\\n <script src=\\\"https://unpkg.com/vue@3/dist/vue.global.js\\\"></script>\\n <link href=\\\"https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css\\\" rel=\\\"stylesheet\\\">\\n <style>\\n .completed {\\n text-decoration: line-through;\\n color: #6c757d;\\n }\\n .todo-item {\\n display: flex;\\n align-items: center;\\n gap: 1rem;\\n padding: 0.5rem;\\n border-bottom: 1px solid #dee2e6;\\n }\\n .todo-item:last-child {\\n border-bottom: none;\\n }\\n </style>\\n</head>\\n<body>\\n <div id=\\\"app\\\" class=\\\"container mt-5\\\">\\n <h1 class=\\\"mb-4\\\">Todo List</h1>\\n \\n <!-- Add new todo form -->\\n <div class=\\\"mb-4\\\">\\n <form @submit.prevent=\\\"addTodo\\\" class=\\\"row g-3\\\">\\n <div class=\\\"col-md-6\\\">\\n <input type=\\\"text\\\" v-model=\\\"newTodoText\\\" class=\\\"form-control\\\" placeholder=\\\"Enter new todo\\\" required>\\n </div>\\n <div class=\\\"col-md-4\\\">\\n <input type=\\\"date\\\" v-model=\\\"newTodoDueDate\\\" class=\\\"form-control\\\">\\n </div>\\n <div class=\\\"col-md-2\\\">\\n <button type=\\\"submit\\\" class=\\\"btn btn-primary w-100\\\">Add Todo</button>\\n </div>\\n </form>\\n </div>\\n\\n <!-- Todo list -->\\n <div class=\\\"card\\\">\\n <div class=\\\"card-body\\\">\\n <div v-if=\\\"todos.length === 0\\\" class=\\\"text-center text-muted\\\">\\n No todos yet. Add one above!\\n </div>\\n <div v-for=\\\"todo in todos\\\" :key=\\\"todo.id\\\" class=\\\"todo-item\\\">\\n <input type=\\\"checkbox\\\" \\n :checked=\\\"todo.completed\\\" \\n @change=\\\"toggleTodo(todo)\\\"\\n class=\\\"form-check-input\\\">\\n \\n <div v-if=\\\"editingTodo !== todo.id\\\" \\n :class=\\\"{ completed: todo.completed }\\\"\\n class=\\\"flex-grow-1\\\">\\n {{ todo.text }}\\n <small v-if=\\\"todo.dueDate\\\" class=\\\"text-muted ms-2\\\">\\n Due: {{ formatDate(todo.dueDate) }}\\n </small>\\n </div>\\n \\n <div v-else class=\\\"flex-grow-1 d-flex gap-2\\\">\\n <input type=\\\"text\\\" \\n v-model=\\\"editingText\\\" \\n class=\\\"form-control\\\"\\n @keyup.enter=\\\"saveTodoEdit(todo)\\\"\\n @keyup.esc=\\\"cancelEdit\\\">\\n <input type=\\\"date\\\" \\n v-model=\\\"editingDueDate\\\" \\n class=\\\"form-control\\\">\\n </div>\\n\\n <div class=\\\"btn-group\\\">\\n <button v-if=\\\"editingTodo !== todo.id\\\"\\n @click=\\\"startEdit(todo)\\\" \\n class=\\\"btn btn-sm btn-outline-secondary\\\">\\n Edit\\n </button>\\n <button v-else\\n @click=\\\"saveTodoEdit(todo)\\\" \\n class=\\\"btn btn-sm btn-outline-primary\\\">\\n Save\\n </button>\\n <button @click=\\\"removeTodo(todo)\\\" \\n class=\\\"btn btn-sm btn-outline-danger\\\">\\n Delete\\n </button>\\n </div>\\n </div>\\n </div>\\n </div>\\n </div>\\n\\n <script src=\\\"app.js\\\"></script>\\n</body>\\n</html>\"}",
302
- "name": "str_replace_editor"
303
- },
304
- "id": "toolu_01YYLXwwdBLwtMmjr5Sfsieg",
305
- "type": "function"
306
- }
307
- ],
308
- "function_call": null
309
- }
310
- }
311
- ],
312
- "usage": {
313
- "completion_tokens": 1142,
314
- "prompt_tokens": 3601,
315
- "total_tokens": 4743,
316
- "completion_tokens_details": null,
317
- "prompt_tokens_details": {
318
- "audio_tokens": null,
319
- "cached_tokens": 3435
320
- },
321
- "cache_creation_input_tokens": 160,
322
- "cache_read_input_tokens": 3435
323
- },
324
- "service_tier": null
325
- },
326
- "total_calls_in_response": 1
327
- },
328
- "content": "--- /workspace/todo-app/index.html\n+++ /workspace/todo-app/index.html\n@@ -0,0 +1,43 @@\n+<!DOCTYPE html>\n+<html lang=\"en\">\n+<head>\n+ <meta charset=\"UTF-8\">\n+ <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n+ <title>Vue Todo App</title>\n+ <script src=\"https://unpkg.com/vue@3/dist/vue.global.js\"></script>\n+ <link href=\"https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css\" rel=\"stylesheet\">\n+ <style>\n+ .completed {\n+ text-decoration: line-through;\n+ color: #6c757d;\n+ }\n+ .todo-item {\n+ display: flex;\n+ align-items: center;\n+ gap: 1rem;\n+ padding: 0.5rem;\n+ border-bottom: 1px solid #dee2e6;\n+ }\n+ .todo-item:last-child {\n+ border-bottom: none;\n+ }\n+ </style>\n+</head>\n+<body>\n+ <div id=\"app\" class=\"container mt-5\">\n+ <h1 class=\"mb-4\">Todo List</h1>\n+\n+ <!-- Add new todo form -->\n+ <div class=\"mb-4\">\n+ <form @submit.prevent=\"addTodo\" class=\"row g-3\">\n+ <div class=\"col-md-6\">\n+ <input type=\"text\" v-model=\"newTodoText\" class=\"form-control\" placeholder=\"Enter new todo\" required>\n+ </div>\n+ <div class=\"col-md-4\">\n+ <input type=\"date\" v-model=\"newTodoDueDate\" class=\"form-control\">\n+ </div>\n+ <div class=\"col-md-2\">\n+ <button type=\"submit\" class=\"btn btn-primary w-100\">Add Todo</button>\n+ </div>\n+ </form>\n+ </div>\n@@ -1,0 +45,56 @@\n+ <!-- Todo list -->\n+ <div class=\"card\">\n+ <div class=\"card-body\">\n+ <div v-if=\"todos.length === 0\" class=\"text-center text-muted\">\n+ No todos yet. Add one above!\n+ </div>\n+ <div v-for=\"todo in todos\" :key=\"todo.id\" class=\"todo-item\">\n+ <input type=\"checkbox\"\n+ :checked=\"todo.completed\"\n+ @change=\"toggleTodo(todo)\"\n+ class=\"form-check-input\">\n+\n+ <div v-if=\"editingTodo !== todo.id\"\n+ :class=\"{ completed: todo.completed }\"\n+ class=\"flex-grow-1\">\n+ {{ todo.text }}\n+ <small v-if=\"todo.dueDate\" class=\"text-muted ms-2\">\n+ Due: {{ formatDate(todo.dueDate) }}\n+ </small>\n+ </div>\n+\n+ <div v-else class=\"flex-grow-1 d-flex gap-2\">\n+ <input type=\"text\"\n+ v-model=\"editingText\"\n+ class=\"form-control\"\n+ @keyup.enter=\"saveTodoEdit(todo)\"\n+ @keyup.esc=\"cancelEdit\">\n+ <input type=\"date\"\n+ v-model=\"editingDueDate\"\n+ class=\"form-control\">\n+ </div>\n+\n+ <div class=\"btn-group\">\n+ <button v-if=\"editingTodo !== todo.id\"\n+ @click=\"startEdit(todo)\"\n+ class=\"btn btn-sm btn-outline-secondary\">\n+ Edit\n+ </button>\n+ <button v-else\n+ @click=\"saveTodoEdit(todo)\"\n+ class=\"btn btn-sm btn-outline-primary\">\n+ Save\n+ </button>\n+ <button @click=\"removeTodo(todo)\"\n+ class=\"btn btn-sm btn-outline-danger\">\n+ Delete\n+ </button>\n+ </div>\n+ </div>\n+ </div>\n+ </div>\n+ </div>\n+\n+ <script src=\"app.js\"></script>\n+</body>\n+</html>",
329
- "extras": {
330
- "path": "/workspace/todo-app/index.html",
331
- "prev_exist": false,
332
- "old_content": null,
333
- "new_content": "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n <meta charset=\"UTF-8\">\n <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n <title>Vue Todo App</title>\n <script src=\"https://unpkg.com/vue@3/dist/vue.global.js\"></script>\n <link href=\"https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css\" rel=\"stylesheet\">\n <style>\n .completed {\n text-decoration: line-through;\n color: #6c757d;\n }\n .todo-item {\n display: flex;\n align-items: center;\n gap: 1rem;\n padding: 0.5rem;\n border-bottom: 1px solid #dee2e6;\n }\n .todo-item:last-child {\n border-bottom: none;\n }\n </style>\n</head>\n<body>\n <div id=\"app\" class=\"container mt-5\">\n <h1 class=\"mb-4\">Todo List</h1>\n \n <!-- Add new todo form -->\n <div class=\"mb-4\">\n <form @submit.prevent=\"addTodo\" class=\"row g-3\">\n <div class=\"col-md-6\">\n <input type=\"text\" v-model=\"newTodoText\" class=\"form-control\" placeholder=\"Enter new todo\" required>\n </div>\n <div class=\"col-md-4\">\n <input type=\"date\" v-model=\"newTodoDueDate\" class=\"form-control\">\n </div>\n <div class=\"col-md-2\">\n <button type=\"submit\" class=\"btn btn-primary w-100\">Add Todo</button>\n </div>\n </form>\n </div>\n\n <!-- Todo list -->\n <div class=\"card\">\n <div class=\"card-body\">\n <div v-if=\"todos.length === 0\" class=\"text-center text-muted\">\n No todos yet. Add one above!\n </div>\n <div v-for=\"todo in todos\" :key=\"todo.id\" class=\"todo-item\">\n <input type=\"checkbox\" \n :checked=\"todo.completed\" \n @change=\"toggleTodo(todo)\"\n class=\"form-check-input\">\n \n <div v-if=\"editingTodo !== todo.id\" \n :class=\"{ completed: todo.completed }\"\n class=\"flex-grow-1\">\n {{ todo.text }}\n <small v-if=\"todo.dueDate\" class=\"text-muted ms-2\">\n Due: {{ formatDate(todo.dueDate) }}\n </small>\n </div>\n \n <div v-else class=\"flex-grow-1 d-flex gap-2\">\n <input type=\"text\" \n v-model=\"editingText\" \n class=\"form-control\"\n @keyup.enter=\"saveTodoEdit(todo)\"\n @keyup.esc=\"cancelEdit\">\n <input type=\"date\" \n v-model=\"editingDueDate\" \n class=\"form-control\">\n </div>\n\n <div class=\"btn-group\">\n <button v-if=\"editingTodo !== todo.id\"\n @click=\"startEdit(todo)\" \n class=\"btn btn-sm btn-outline-secondary\">\n Edit\n </button>\n <button v-else\n @click=\"saveTodoEdit(todo)\" \n class=\"btn btn-sm btn-outline-primary\">\n Save\n </button>\n <button @click=\"removeTodo(todo)\" \n class=\"btn btn-sm btn-outline-danger\">\n Delete\n </button>\n </div>\n </div>\n </div>\n </div>\n </div>\n\n <script src=\"app.js\"></script>\n</body>\n</html>",
334
- "impl_source": "oh_aci",
335
- "formatted_output_and_error": "File created successfully at: /workspace/todo-app/index.html"
336
- }
337
- },
338
- {
339
- "id": 11,
340
- "timestamp": "2025-01-20T20:29:56.899226",
341
- "source": "agent",
342
- "message": "",
343
- "action": "edit",
344
- "tool_call_metadata": {
345
- "function_name": "str_replace_editor",
346
- "tool_call_id": "toolu_01GpJj2ShdFjb9ZTY4CVxi9C",
347
- "model_response": {
348
- "id": "chatcmpl-34fef176-276b-4a18-892f-688b08f51225",
349
- "created": 1737433796,
350
- "model": "claude-3-5-sonnet-20241022",
351
- "object": "chat.completion",
352
- "system_fingerprint": null,
353
- "choices": [
354
- {
355
- "finish_reason": "tool_calls",
356
- "index": 0,
357
- "message": {
358
- "content": "Now, let's create the JavaScript file with the Vue application logic:",
359
- "role": "assistant",
360
- "tool_calls": [
361
- {
362
- "index": 1,
363
- "function": {
364
- "arguments": "{\"command\": \"create\", \"path\": \"/workspace/todo-app/app.js\", \"file_text\": \"const { createApp } = Vue\\n\\nconst STORAGE_KEY = 'vue-todo-list'\\n\\ncreateApp({\\n data() {\\n return {\\n todos: [],\\n newTodoText: '',\\n newTodoDueDate: '',\\n editingTodo: null,\\n editingText: '',\\n editingDueDate: ''\\n }\\n },\\n methods: {\\n loadTodos() {\\n const savedTodos = localStorage.getItem(STORAGE_KEY)\\n if (savedTodos) {\\n this.todos = JSON.parse(savedTodos)\\n }\\n },\\n saveTodos() {\\n localStorage.setItem(STORAGE_KEY, JSON.stringify(this.todos))\\n },\\n addTodo() {\\n if (!this.newTodoText.trim()) return\\n\\n const todo = {\\n id: Date.now(),\\n text: this.newTodoText.trim(),\\n completed: false,\\n dueDate: this.newTodoDueDate || null\\n }\\n\\n this.todos.push(todo)\\n this.newTodoText = ''\\n this.newTodoDueDate = ''\\n this.saveTodos()\\n },\\n toggleTodo(todo) {\\n todo.completed = !todo.completed\\n this.saveTodos()\\n },\\n removeTodo(todo) {\\n const index = this.todos.indexOf(todo)\\n if (index > -1) {\\n this.todos.splice(index, 1)\\n this.saveTodos()\\n }\\n },\\n startEdit(todo) {\\n this.editingTodo = todo.id\\n this.editingText = todo.text\\n this.editingDueDate = todo.dueDate || ''\\n },\\n saveTodoEdit(todo) {\\n if (!this.editingText.trim()) return\\n\\n todo.text = this.editingText.trim()\\n todo.dueDate = this.editingDueDate || null\\n this.editingTodo = null\\n this.saveTodos()\\n },\\n cancelEdit() {\\n this.editingTodo = null\\n },\\n formatDate(dateString) {\\n return new Date(dateString).toLocaleDateString()\\n }\\n },\\n mounted() {\\n this.loadTodos()\\n }\\n}).mount('#app')\"}",
365
- "name": "str_replace_editor"
366
- },
367
- "id": "toolu_01GpJj2ShdFjb9ZTY4CVxi9C",
368
- "type": "function"
369
- }
370
- ],
371
- "function_call": null
372
- }
373
- }
374
- ],
375
- "usage": {
376
- "completion_tokens": 693,
377
- "prompt_tokens": 4769,
378
- "total_tokens": 5462,
379
- "completion_tokens_details": null,
380
- "prompt_tokens_details": {
381
- "audio_tokens": null,
382
- "cached_tokens": 3595
383
- },
384
- "cache_creation_input_tokens": 1168,
385
- "cache_read_input_tokens": 3595
386
- },
387
- "service_tier": null
388
- },
389
- "total_calls_in_response": 1
390
- },
391
- "args": {
392
- "path": "/workspace/todo-app/app.js",
393
- "content": "",
394
- "start": 1,
395
- "end": -1,
396
- "thought": "Now, let's create the JavaScript file with the Vue application logic:",
397
- "impl_source": "oh_aci",
398
- "translated_ipython_code": "print(file_editor(**{'command': 'create', 'path': '/workspace/todo-app/app.js', 'file_text': \"const { createApp } = Vue\\n\\nconst STORAGE_KEY = 'vue-todo-list'\\n\\ncreateApp({\\n data() {\\n return {\\n todos: [],\\n newTodoText: '',\\n newTodoDueDate: '',\\n editingTodo: null,\\n editingText: '',\\n editingDueDate: ''\\n }\\n },\\n methods: {\\n loadTodos() {\\n const savedTodos = localStorage.getItem(STORAGE_KEY)\\n if (savedTodos) {\\n this.todos = JSON.parse(savedTodos)\\n }\\n },\\n saveTodos() {\\n localStorage.setItem(STORAGE_KEY, JSON.stringify(this.todos))\\n },\\n addTodo() {\\n if (!this.newTodoText.trim()) return\\n\\n const todo = {\\n id: Date.now(),\\n text: this.newTodoText.trim(),\\n completed: false,\\n dueDate: this.newTodoDueDate || null\\n }\\n\\n this.todos.push(todo)\\n this.newTodoText = ''\\n this.newTodoDueDate = ''\\n this.saveTodos()\\n },\\n toggleTodo(todo) {\\n todo.completed = !todo.completed\\n this.saveTodos()\\n },\\n removeTodo(todo) {\\n const index = this.todos.indexOf(todo)\\n if (index > -1) {\\n this.todos.splice(index, 1)\\n this.saveTodos()\\n }\\n },\\n startEdit(todo) {\\n this.editingTodo = todo.id\\n this.editingText = todo.text\\n this.editingDueDate = todo.dueDate || ''\\n },\\n saveTodoEdit(todo) {\\n if (!this.editingText.trim()) return\\n\\n todo.text = this.editingText.trim()\\n todo.dueDate = this.editingDueDate || null\\n this.editingTodo = null\\n this.saveTodos()\\n },\\n cancelEdit() {\\n this.editingTodo = null\\n },\\n formatDate(dateString) {\\n return new Date(dateString).toLocaleDateString()\\n }\\n },\\n mounted() {\\n this.loadTodos()\\n }\\n}).mount('#app')\"}))"
399
- }
400
- },
401
- {
402
- "id": 12,
403
- "timestamp": "2025-01-20T20:29:56.933329",
404
- "source": "agent",
405
- "message": "I edited the file /workspace/todo-app/app.js.",
406
- "cause": 11,
407
- "observation": "edit",
408
- "tool_call_metadata": {
409
- "function_name": "str_replace_editor",
410
- "tool_call_id": "toolu_01GpJj2ShdFjb9ZTY4CVxi9C",
411
- "model_response": {
412
- "id": "chatcmpl-34fef176-276b-4a18-892f-688b08f51225",
413
- "created": 1737433796,
414
- "model": "claude-3-5-sonnet-20241022",
415
- "object": "chat.completion",
416
- "system_fingerprint": null,
417
- "choices": [
418
- {
419
- "finish_reason": "tool_calls",
420
- "index": 0,
421
- "message": {
422
- "content": "Now, let's create the JavaScript file with the Vue application logic:",
423
- "role": "assistant",
424
- "tool_calls": [
425
- {
426
- "index": 1,
427
- "function": {
428
- "arguments": "{\"command\": \"create\", \"path\": \"/workspace/todo-app/app.js\", \"file_text\": \"const { createApp } = Vue\\n\\nconst STORAGE_KEY = 'vue-todo-list'\\n\\ncreateApp({\\n data() {\\n return {\\n todos: [],\\n newTodoText: '',\\n newTodoDueDate: '',\\n editingTodo: null,\\n editingText: '',\\n editingDueDate: ''\\n }\\n },\\n methods: {\\n loadTodos() {\\n const savedTodos = localStorage.getItem(STORAGE_KEY)\\n if (savedTodos) {\\n this.todos = JSON.parse(savedTodos)\\n }\\n },\\n saveTodos() {\\n localStorage.setItem(STORAGE_KEY, JSON.stringify(this.todos))\\n },\\n addTodo() {\\n if (!this.newTodoText.trim()) return\\n\\n const todo = {\\n id: Date.now(),\\n text: this.newTodoText.trim(),\\n completed: false,\\n dueDate: this.newTodoDueDate || null\\n }\\n\\n this.todos.push(todo)\\n this.newTodoText = ''\\n this.newTodoDueDate = ''\\n this.saveTodos()\\n },\\n toggleTodo(todo) {\\n todo.completed = !todo.completed\\n this.saveTodos()\\n },\\n removeTodo(todo) {\\n const index = this.todos.indexOf(todo)\\n if (index > -1) {\\n this.todos.splice(index, 1)\\n this.saveTodos()\\n }\\n },\\n startEdit(todo) {\\n this.editingTodo = todo.id\\n this.editingText = todo.text\\n this.editingDueDate = todo.dueDate || ''\\n },\\n saveTodoEdit(todo) {\\n if (!this.editingText.trim()) return\\n\\n todo.text = this.editingText.trim()\\n todo.dueDate = this.editingDueDate || null\\n this.editingTodo = null\\n this.saveTodos()\\n },\\n cancelEdit() {\\n this.editingTodo = null\\n },\\n formatDate(dateString) {\\n return new Date(dateString).toLocaleDateString()\\n }\\n },\\n mounted() {\\n this.loadTodos()\\n }\\n}).mount('#app')\"}",
429
- "name": "str_replace_editor"
430
- },
431
- "id": "toolu_01GpJj2ShdFjb9ZTY4CVxi9C",
432
- "type": "function"
433
- }
434
- ],
435
- "function_call": null
436
- }
437
- }
438
- ],
439
- "usage": {
440
- "completion_tokens": 693,
441
- "prompt_tokens": 4769,
442
- "total_tokens": 5462,
443
- "completion_tokens_details": null,
444
- "prompt_tokens_details": {
445
- "audio_tokens": null,
446
- "cached_tokens": 3595
447
- },
448
- "cache_creation_input_tokens": 1168,
449
- "cache_read_input_tokens": 3595
450
- },
451
- "service_tier": null
452
- },
453
- "total_calls_in_response": 1
454
- },
455
- "content": "--- /workspace/todo-app/app.js\n+++ /workspace/todo-app/app.js\n@@ -0,0 +1 @@\n+const { createApp } = Vue\n@@ -1,0 +3,73 @@\n+const STORAGE_KEY = 'vue-todo-list'\n+\n+createApp({\n+ data() {\n+ return {\n+ todos: [],\n+ newTodoText: '',\n+ newTodoDueDate: '',\n+ editingTodo: null,\n+ editingText: '',\n+ editingDueDate: ''\n+ }\n+ },\n+ methods: {\n+ loadTodos() {\n+ const savedTodos = localStorage.getItem(STORAGE_KEY)\n+ if (savedTodos) {\n+ this.todos = JSON.parse(savedTodos)\n+ }\n+ },\n+ saveTodos() {\n+ localStorage.setItem(STORAGE_KEY, JSON.stringify(this.todos))\n+ },\n+ addTodo() {\n+ if (!this.newTodoText.trim()) return\n+\n+ const todo = {\n+ id: Date.now(),\n+ text: this.newTodoText.trim(),\n+ completed: false,\n+ dueDate: this.newTodoDueDate || null\n+ }\n+\n+ this.todos.push(todo)\n+ this.newTodoText = ''\n+ this.newTodoDueDate = ''\n+ this.saveTodos()\n+ },\n+ toggleTodo(todo) {\n+ todo.completed = !todo.completed\n+ this.saveTodos()\n+ },\n+ removeTodo(todo) {\n+ const index = this.todos.indexOf(todo)\n+ if (index > -1) {\n+ this.todos.splice(index, 1)\n+ this.saveTodos()\n+ }\n+ },\n+ startEdit(todo) {\n+ this.editingTodo = todo.id\n+ this.editingText = todo.text\n+ this.editingDueDate = todo.dueDate || ''\n+ },\n+ saveTodoEdit(todo) {\n+ if (!this.editingText.trim()) return\n+\n+ todo.text = this.editingText.trim()\n+ todo.dueDate = this.editingDueDate || null\n+ this.editingTodo = null\n+ this.saveTodos()\n+ },\n+ cancelEdit() {\n+ this.editingTodo = null\n+ },\n+ formatDate(dateString) {\n+ return new Date(dateString).toLocaleDateString()\n+ }\n+ },\n+ mounted() {\n+ this.loadTodos()\n+ }\n+}).mount('#app')",
456
- "extras": {
457
- "path": "/workspace/todo-app/app.js",
458
- "prev_exist": false,
459
- "old_content": null,
460
- "new_content": "const { createApp } = Vue\n\nconst STORAGE_KEY = 'vue-todo-list'\n\ncreateApp({\n data() {\n return {\n todos: [],\n newTodoText: '',\n newTodoDueDate: '',\n editingTodo: null,\n editingText: '',\n editingDueDate: ''\n }\n },\n methods: {\n loadTodos() {\n const savedTodos = localStorage.getItem(STORAGE_KEY)\n if (savedTodos) {\n this.todos = JSON.parse(savedTodos)\n }\n },\n saveTodos() {\n localStorage.setItem(STORAGE_KEY, JSON.stringify(this.todos))\n },\n addTodo() {\n if (!this.newTodoText.trim()) return\n\n const todo = {\n id: Date.now(),\n text: this.newTodoText.trim(),\n completed: false,\n dueDate: this.newTodoDueDate || null\n }\n\n this.todos.push(todo)\n this.newTodoText = ''\n this.newTodoDueDate = ''\n this.saveTodos()\n },\n toggleTodo(todo) {\n todo.completed = !todo.completed\n this.saveTodos()\n },\n removeTodo(todo) {\n const index = this.todos.indexOf(todo)\n if (index > -1) {\n this.todos.splice(index, 1)\n this.saveTodos()\n }\n },\n startEdit(todo) {\n this.editingTodo = todo.id\n this.editingText = todo.text\n this.editingDueDate = todo.dueDate || ''\n },\n saveTodoEdit(todo) {\n if (!this.editingText.trim()) return\n\n todo.text = this.editingText.trim()\n todo.dueDate = this.editingDueDate || null\n this.editingTodo = null\n this.saveTodos()\n },\n cancelEdit() {\n this.editingTodo = null\n },\n formatDate(dateString) {\n return new Date(dateString).toLocaleDateString()\n }\n },\n mounted() {\n this.loadTodos()\n }\n}).mount('#app')",
461
- "impl_source": "oh_aci",
462
- "formatted_output_and_error": "File created successfully at: /workspace/todo-app/app.js"
463
- }
464
- },
465
- {
466
- "id": 13,
467
- "timestamp": "2025-01-20T20:29:59.282034",
468
- "source": "agent",
469
- "message": "Running command: cd /workspace/todo-app && python3 -m http.server 8000",
470
- "action": "run",
471
- "tool_call_metadata": {
472
- "function_name": "execute_bash",
473
- "tool_call_id": "toolu_01DwihWr4ooPZS1CGTLAWrFP",
474
- "model_response": {
475
- "id": "chatcmpl-51a85fbb-dafc-4519-a7d2-df369e38d76f",
476
- "created": 1737433799,
477
- "model": "claude-3-5-sonnet-20241022",
478
- "object": "chat.completion",
479
- "system_fingerprint": null,
480
- "choices": [
481
- {
482
- "finish_reason": "tool_calls",
483
- "index": 0,
484
- "message": {
485
- "content": "Let's serve this application using a simple Python HTTP server:",
486
- "role": "assistant",
487
- "tool_calls": [
488
- {
489
- "index": 1,
490
- "function": {
491
- "arguments": "{\"command\": \"cd /workspace/todo-app && python3 -m http.server 8000\"}",
492
- "name": "execute_bash"
493
- },
494
- "id": "toolu_01DwihWr4ooPZS1CGTLAWrFP",
495
- "type": "function"
496
- }
497
- ],
498
- "function_call": null
499
- }
500
- }
501
- ],
502
- "usage": {
503
- "completion_tokens": 85,
504
- "prompt_tokens": 5488,
505
- "total_tokens": 5573,
506
- "completion_tokens_details": null,
507
- "prompt_tokens_details": {
508
- "audio_tokens": null,
509
- "cached_tokens": 4763
510
- },
511
- "cache_creation_input_tokens": 719,
512
- "cache_read_input_tokens": 4763
513
- },
514
- "service_tier": null
515
- },
516
- "total_calls_in_response": 1
517
- },
518
- "args": {
519
- "command": "cd /workspace/todo-app && python3 -m http.server 8000",
520
- "is_input": false,
521
- "thought": "Let's serve this application using a simple Python HTTP server:",
522
- "blocking": false,
523
- "hidden": false,
524
- "confirmation_state": "confirmed"
525
- }
526
- },
527
- {
528
- "id": 14,
529
- "timestamp": "2025-01-20T20:30:29.862264",
530
- "source": "agent",
531
- "message": "Command `cd /workspace/todo-app && python3 -m http.server 8000` executed with exit code -1.",
532
- "cause": 13,
533
- "observation": "run",
534
- "tool_call_metadata": {
535
- "function_name": "execute_bash",
536
- "tool_call_id": "toolu_01DwihWr4ooPZS1CGTLAWrFP",
537
- "model_response": {
538
- "id": "chatcmpl-51a85fbb-dafc-4519-a7d2-df369e38d76f",
539
- "created": 1737433799,
540
- "model": "claude-3-5-sonnet-20241022",
541
- "object": "chat.completion",
542
- "system_fingerprint": null,
543
- "choices": [
544
- {
545
- "finish_reason": "tool_calls",
546
- "index": 0,
547
- "message": {
548
- "content": "Let's serve this application using a simple Python HTTP server:",
549
- "role": "assistant",
550
- "tool_calls": [
551
- {
552
- "index": 1,
553
- "function": {
554
- "arguments": "{\"command\": \"cd /workspace/todo-app && python3 -m http.server 8000\"}",
555
- "name": "execute_bash"
556
- },
557
- "id": "toolu_01DwihWr4ooPZS1CGTLAWrFP",
558
- "type": "function"
559
- }
560
- ],
561
- "function_call": null
562
- }
563
- }
564
- ],
565
- "usage": {
566
- "completion_tokens": 85,
567
- "prompt_tokens": 5488,
568
- "total_tokens": 5573,
569
- "completion_tokens_details": null,
570
- "prompt_tokens_details": {
571
- "audio_tokens": null,
572
- "cached_tokens": 4763
573
- },
574
- "cache_creation_input_tokens": 719,
575
- "cache_read_input_tokens": 4763
576
- },
577
- "service_tier": null
578
- },
579
- "total_calls_in_response": 1
580
- },
581
- "content": "Serving HTTP on 0.0.0.0 port 8000 (http://0.0.0.0:8000/) ...",
582
- "extras": {
583
- "command": "cd /workspace/todo-app && python3 -m http.server 8000",
584
- "metadata": {
585
- "exit_code": -1,
586
- "pid": -1,
587
- "username": null,
588
- "hostname": null,
589
- "working_dir": null,
590
- "py_interpreter_path": null,
591
- "prefix": "",
592
- "suffix": "\n[The command has no new output after 30 seconds. You may wait longer to see additional output by sending empty command '', send other commands to interact with the current process, send keys to interrupt/kill the command, or use the timeout parameter in execute_bash for future commands.]"
593
- },
594
- "hidden": false
595
- },
596
- "success": false
597
- },
598
- {
599
- "id": 15,
600
- "timestamp": "2025-01-20T20:30:35.414886",
601
- "source": "agent",
602
- "message": "I've created a complete Vue.js todo application with all the requested features. Here's what the application includes:\n\n1. Todo List Features:\n - View all todos\n - Add new todos with optional due dates\n - Mark todos as complete/incomplete\n - Delete todos\n - Edit todo text and due date\n - Persistent storage using localStorage\n\n2. UI Features:\n - Clean, responsive design using Bootstrap\n - Completed todos are shown with strikethrough\n - Due dates are displayed in a readable format\n - Edit mode with save/cancel functionality\n - Form validation to prevent empty todos\n\nThe application is now running on port 8000. You can access it by opening a web browser and navigating to http://localhost:8000\n\nTo use the application:\n1. Add a new todo by typing in the text field and optionally selecting a due date\n2. Click the checkbox to mark a todo as complete\n3. Click the \"Edit\" button to modify a todo's text or due date\n4. Click the \"Delete\" button to remove a todo\n5. All changes are automatically saved to localStorage\n\nThe data will persist even if you close and reopen your browser. Would you like me to explain any specific part of the implementation or make any adjustments to the functionality?",
603
- "action": "message",
604
- "args": {
605
- "content": "I've created a complete Vue.js todo application with all the requested features. Here's what the application includes:\n\n1. Todo List Features:\n - View all todos\n - Add new todos with optional due dates\n - Mark todos as complete/incomplete\n - Delete todos\n - Edit todo text and due date\n - Persistent storage using localStorage\n\n2. UI Features:\n - Clean, responsive design using Bootstrap\n - Completed todos are shown with strikethrough\n - Due dates are displayed in a readable format\n - Edit mode with save/cancel functionality\n - Form validation to prevent empty todos\n\nThe application is now running on port 8000. You can access it by opening a web browser and navigating to http://localhost:8000\n\nTo use the application:\n1. Add a new todo by typing in the text field and optionally selecting a due date\n2. Click the checkbox to mark a todo as complete\n3. Click the \"Edit\" button to modify a todo's text or due date\n4. Click the \"Delete\" button to remove a todo\n5. All changes are automatically saved to localStorage\n\nThe data will persist even if you close and reopen your browser. Would you like me to explain any specific part of the implementation or make any adjustments to the functionality?",
606
- "image_urls": null,
607
- "wait_for_response": true
608
- }
609
- },
610
- {
611
- "id": 16,
612
- "timestamp": "2025-01-20T20:30:35.421080",
613
- "source": "agent",
614
- "message": "No observation",
615
- "cause": 15,
616
- "observation": "null",
617
- "content": "",
618
- "extras": {}
619
- },
620
- {
621
- "id": 17,
622
- "timestamp": "2025-01-20T20:30:35.429675",
623
- "source": "environment",
624
- "message": "",
625
- "observation": "agent_state_changed",
626
- "content": "",
627
- "extras": {
628
- "agent_state": "awaiting_user_input"
629
- }
630
- }
631
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/runtime/trajs/basic_interactions.json DELETED
@@ -1,128 +0,0 @@
1
- [
2
- {
3
- "id": 0,
4
- "timestamp": "2025-02-01T00:14:10.787622",
5
- "source": "user",
6
- "message": "what's 1+1?",
7
- "action": "message",
8
- "args": {
9
- "content": "what's 1+1?",
10
- "image_urls": null,
11
- "wait_for_response": false
12
- },
13
- "timeout": 120
14
- },
15
- {
16
- "id": 3,
17
- "timestamp": "2025-02-01T00:14:12.686046",
18
- "source": "agent",
19
- "message": "1 + 1 equals 2.",
20
- "action": "message",
21
- "args": {
22
- "content": "1 + 1 equals 2.",
23
- "image_urls": null,
24
- "wait_for_response": true
25
- },
26
- "timeout": 120
27
- },
28
- {
29
- "id": 6,
30
- "timestamp": "2025-02-01T00:17:23.228442",
31
- "source": "user",
32
- "message": "No, I mean by Goldbach's conjecture!",
33
- "action": "message",
34
- "args": {
35
- "content": "No, I mean by Goldbach's conjecture!",
36
- "image_urls": null,
37
- "wait_for_response": false
38
- },
39
- "timeout": 120
40
- },
41
- {
42
- "id": 9,
43
- "timestamp": "2025-02-01T00:17:25.586532",
44
- "source": "agent",
45
- "message": "Goldbach's conjecture is an unsolved problem in number theory that suggests every even integer greater than 2 can be expressed as the sum of two prime numbers. However, it doesn't directly apply to the sum of 1 + 1, as 1 is not considered a prime number. The conjecture is more relevant to larger even numbers.",
46
- "action": "message",
47
- "args": {
48
- "content": "Goldbach's conjecture is an unsolved problem in number theory that suggests every even integer greater than 2 can be expressed as the sum of two prime numbers. However, it doesn't directly apply to the sum of 1 + 1, as 1 is not considered a prime number. The conjecture is more relevant to larger even numbers.",
49
- "image_urls": null,
50
- "wait_for_response": true
51
- },
52
- "timeout": 120
53
- },
54
- {
55
- "id": 12,
56
- "timestamp": "2025-02-01T00:18:59.847433",
57
- "source": "user",
58
- "message": "Finish please",
59
- "action": "message",
60
- "args": {
61
- "content": "Finish please",
62
- "image_urls": null,
63
- "wait_for_response": false
64
- },
65
- "timeout": 120
66
- },
67
- {
68
- "id": 15,
69
- "timestamp": "2025-02-01T00:19:01.168854",
70
- "source": "agent",
71
- "message": "All done! What's next on the agenda?",
72
- "action": "finish",
73
- "tool_call_metadata": {
74
- "function_name": "finish",
75
- "tool_call_id": "call_IpyczPgEAwtpRWFZ2RNokTdH",
76
- "model_response": {
77
- "id": "chatcmpl-Aw30O6Ka8n1X3F6F7YUh91NxXBE0y",
78
- "created": 1738397940,
79
- "model": "gpt-4o-2024-08-06",
80
- "object": "chat.completion",
81
- "system_fingerprint": "fp_4691090a87",
82
- "choices": [
83
- {
84
- "finish_reason": "tool_calls",
85
- "index": 0,
86
- "message": {
87
- "content": null,
88
- "role": "assistant",
89
- "tool_calls": [
90
- {
91
- "function": {
92
- "arguments": "{}",
93
- "name": "finish"
94
- },
95
- "id": "call_IpyczPgEAwtpRWFZ2RNokTdH",
96
- "type": "function"
97
- }
98
- ],
99
- "function_call": null
100
- }
101
- }
102
- ],
103
- "usage": {
104
- "completion_tokens": 10,
105
- "prompt_tokens": 2473,
106
- "total_tokens": 2483,
107
- "completion_tokens_details": {
108
- "accepted_prediction_tokens": 0,
109
- "audio_tokens": 0,
110
- "reasoning_tokens": 0,
111
- "rejected_prediction_tokens": 0
112
- },
113
- "prompt_tokens_details": {
114
- "audio_tokens": 0,
115
- "cached_tokens": 2304
116
- }
117
- },
118
- "service_tier": "default"
119
- },
120
- "total_calls_in_response": 1
121
- },
122
- "args": {
123
- "outputs": {},
124
- "thought": ""
125
- },
126
- "timeout": 120
127
- }
128
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/runtime/trajs/wrong_initial_state.json DELETED
@@ -1,454 +0,0 @@
1
- [
2
- {
3
- "id": 0,
4
- "timestamp": "2025-01-27T23:16:50.239610",
5
- "source": "user",
6
- "message": "Please rename game_2048.py to 2048.py",
7
- "action": "message",
8
- "args": {
9
- "content": "Please rename game_2048.py to 2048.py",
10
- "image_urls": null,
11
- "wait_for_response": false
12
- },
13
- "timeout": 120
14
- },
15
- {
16
- "id": 3,
17
- "timestamp": "2025-01-27T23:16:54.105108",
18
- "source": "agent",
19
- "message": "Reading file: /workspace",
20
- "action": "read",
21
- "tool_call_metadata": {
22
- "function_name": "str_replace_editor",
23
- "tool_call_id": "toolu_01USom24cuiyvAembLhEryrn",
24
- "model_response": {
25
- "id": "chatcmpl-90c35dea-eb25-484c-aaa7-d9d00048f64a",
26
- "created": 1738048613,
27
- "model": "claude-3-5-sonnet-20241022",
28
- "object": "chat.completion",
29
- "system_fingerprint": null,
30
- "choices": [
31
- {
32
- "finish_reason": "tool_calls",
33
- "index": 0,
34
- "message": {
35
- "content": "I'll help you rename the file. First, let me check if the file exists and its location.",
36
- "role": "assistant",
37
- "tool_calls": [
38
- {
39
- "index": 1,
40
- "function": {
41
- "arguments": "{\"command\": \"view\", \"path\": \"/workspace\"}",
42
- "name": "str_replace_editor"
43
- },
44
- "id": "toolu_01USom24cuiyvAembLhEryrn",
45
- "type": "function"
46
- }
47
- ],
48
- "function_call": null
49
- }
50
- }
51
- ],
52
- "usage": {
53
- "completion_tokens": 95,
54
- "prompt_tokens": 3475,
55
- "total_tokens": 3570,
56
- "completion_tokens_details": null,
57
- "prompt_tokens_details": {
58
- "audio_tokens": null,
59
- "cached_tokens": 0
60
- },
61
- "cache_creation_input_tokens": 3471,
62
- "cache_read_input_tokens": 0
63
- },
64
- "service_tier": null
65
- },
66
- "total_calls_in_response": 1
67
- },
68
- "args": {
69
- "path": "/workspace",
70
- "start": 0,
71
- "end": -1,
72
- "thought": "I'll help you rename the file. First, let me check if the file exists and its location.",
73
- "impl_source": "oh_aci",
74
- "translated_ipython_code": "print(file_editor(**{'command': 'view', 'path': '/workspace'}))"
75
- },
76
- "timeout": 120
77
- },
78
- {
79
- "id": 4,
80
- "timestamp": "2025-01-27T23:16:54.140581",
81
- "source": "agent",
82
- "message": "I read the file /workspace.",
83
- "cause": 3,
84
- "observation": "read",
85
- "tool_call_metadata": {
86
- "function_name": "str_replace_editor",
87
- "tool_call_id": "toolu_01USom24cuiyvAembLhEryrn",
88
- "model_response": {
89
- "id": "chatcmpl-90c35dea-eb25-484c-aaa7-d9d00048f64a",
90
- "created": 1738048613,
91
- "model": "claude-3-5-sonnet-20241022",
92
- "object": "chat.completion",
93
- "system_fingerprint": null,
94
- "choices": [
95
- {
96
- "finish_reason": "tool_calls",
97
- "index": 0,
98
- "message": {
99
- "content": "I'll help you rename the file. First, let me check if the file exists and its location.",
100
- "role": "assistant",
101
- "tool_calls": [
102
- {
103
- "index": 1,
104
- "function": {
105
- "arguments": "{\"command\": \"view\", \"path\": \"/workspace\"}",
106
- "name": "str_replace_editor"
107
- },
108
- "id": "toolu_01USom24cuiyvAembLhEryrn",
109
- "type": "function"
110
- }
111
- ],
112
- "function_call": null
113
- }
114
- }
115
- ],
116
- "usage": {
117
- "completion_tokens": 95,
118
- "prompt_tokens": 3475,
119
- "total_tokens": 3570,
120
- "completion_tokens_details": null,
121
- "prompt_tokens_details": {
122
- "audio_tokens": null,
123
- "cached_tokens": 0
124
- },
125
- "cache_creation_input_tokens": 3471,
126
- "cache_read_input_tokens": 0
127
- },
128
- "service_tier": null
129
- },
130
- "total_calls_in_response": 1
131
- },
132
- "content": "Here's the files and directories up to 2 levels deep in /workspace, excluding hidden items:\n/workspace/\n/workspace/game_2048.py",
133
- "extras": {
134
- "path": "/workspace",
135
- "impl_source": "oh_aci"
136
- }
137
- },
138
- {
139
- "id": 5,
140
- "timestamp": "2025-01-27T23:16:56.951967",
141
- "source": "agent",
142
- "message": "Running command: mv /workspace/game_2048.py /workspace/2048.py",
143
- "action": "run",
144
- "tool_call_metadata": {
145
- "function_name": "execute_bash",
146
- "tool_call_id": "toolu_01PuY3mXrE6fm3j52GscR4rj",
147
- "model_response": {
148
- "id": "chatcmpl-1f2bc7b4-848a-45ec-a2d8-aff83e5adbd6",
149
- "created": 1738048616,
150
- "model": "claude-3-5-sonnet-20241022",
151
- "object": "chat.completion",
152
- "system_fingerprint": null,
153
- "choices": [
154
- {
155
- "finish_reason": "tool_calls",
156
- "index": 0,
157
- "message": {
158
- "content": "Great, I found the file. Now I'll use the bash command `mv` to rename it.",
159
- "role": "assistant",
160
- "tool_calls": [
161
- {
162
- "index": 1,
163
- "function": {
164
- "arguments": "{\"command\": \"mv /workspace/game_2048.py /workspace/2048.py\", \"is_input\": \"false\"}",
165
- "name": "execute_bash"
166
- },
167
- "id": "toolu_01PuY3mXrE6fm3j52GscR4rj",
168
- "type": "function"
169
- }
170
- ],
171
- "function_call": null
172
- }
173
- }
174
- ],
175
- "usage": {
176
- "completion_tokens": 111,
177
- "prompt_tokens": 3616,
178
- "total_tokens": 3727,
179
- "completion_tokens_details": null,
180
- "prompt_tokens_details": {
181
- "audio_tokens": null,
182
- "cached_tokens": 3471
183
- },
184
- "cache_creation_input_tokens": 139,
185
- "cache_read_input_tokens": 3471
186
- },
187
- "service_tier": null
188
- },
189
- "total_calls_in_response": 1
190
- },
191
- "args": {
192
- "command": "mv /workspace/game_2048.py /workspace/2048.py",
193
- "is_input": false,
194
- "thought": "Great, I found the file. Now I'll use the bash command `mv` to rename it.",
195
- "blocking": false,
196
- "hidden": false,
197
- "confirmation_state": "confirmed"
198
- },
199
- "timeout": 120
200
- },
201
- {
202
- "id": 6,
203
- "timestamp": "2025-01-27T23:16:57.600730",
204
- "source": "agent",
205
- "message": "Command `mv /workspace/game_2048.py /workspace/2048.py` executed with exit code 0.",
206
- "cause": 5,
207
- "observation": "run",
208
- "tool_call_metadata": {
209
- "function_name": "execute_bash",
210
- "tool_call_id": "toolu_01PuY3mXrE6fm3j52GscR4rj",
211
- "model_response": {
212
- "id": "chatcmpl-1f2bc7b4-848a-45ec-a2d8-aff83e5adbd6",
213
- "created": 1738048616,
214
- "model": "claude-3-5-sonnet-20241022",
215
- "object": "chat.completion",
216
- "system_fingerprint": null,
217
- "choices": [
218
- {
219
- "finish_reason": "tool_calls",
220
- "index": 0,
221
- "message": {
222
- "content": "Great, I found the file. Now I'll use the bash command `mv` to rename it.",
223
- "role": "assistant",
224
- "tool_calls": [
225
- {
226
- "index": 1,
227
- "function": {
228
- "arguments": "{\"command\": \"mv /workspace/game_2048.py /workspace/2048.py\", \"is_input\": \"false\"}",
229
- "name": "execute_bash"
230
- },
231
- "id": "toolu_01PuY3mXrE6fm3j52GscR4rj",
232
- "type": "function"
233
- }
234
- ],
235
- "function_call": null
236
- }
237
- }
238
- ],
239
- "usage": {
240
- "completion_tokens": 111,
241
- "prompt_tokens": 3616,
242
- "total_tokens": 3727,
243
- "completion_tokens_details": null,
244
- "prompt_tokens_details": {
245
- "audio_tokens": null,
246
- "cached_tokens": 3471
247
- },
248
- "cache_creation_input_tokens": 139,
249
- "cache_read_input_tokens": 3471
250
- },
251
- "service_tier": null
252
- },
253
- "total_calls_in_response": 1
254
- },
255
- "content": "",
256
- "extras": {
257
- "command": "mv /workspace/game_2048.py /workspace/2048.py",
258
- "metadata": {
259
- "exit_code": 0,
260
- "pid": -1,
261
- "username": "openhands",
262
- "hostname": "0301ebbced1e",
263
- "working_dir": "/workspace",
264
- "py_interpreter_path": "/openhands/poetry/openhands-ai-5O4_aCHf-py3.12/bin/python",
265
- "prefix": "",
266
- "suffix": "\n[The command completed with exit code 0.]"
267
- },
268
- "hidden": false
269
- },
270
- "success": true
271
- },
272
- {
273
- "id": 7,
274
- "timestamp": "2025-01-27T23:16:59.803261",
275
- "source": "agent",
276
- "message": "Reading file: /workspace",
277
- "action": "read",
278
- "tool_call_metadata": {
279
- "function_name": "str_replace_editor",
280
- "tool_call_id": "toolu_01DodVjoeVmEY3gU5ALbqmGj",
281
- "model_response": {
282
- "id": "chatcmpl-ea298ce5-bc1c-435d-b5f9-6eb6e6e864df",
283
- "created": 1738048619,
284
- "model": "claude-3-5-sonnet-20241022",
285
- "object": "chat.completion",
286
- "system_fingerprint": null,
287
- "choices": [
288
- {
289
- "finish_reason": "tool_calls",
290
- "index": 0,
291
- "message": {
292
- "content": "Let me verify that the file was renamed successfully:",
293
- "role": "assistant",
294
- "tool_calls": [
295
- {
296
- "index": 1,
297
- "function": {
298
- "arguments": "{\"command\": \"view\", \"path\": \"/workspace\"}",
299
- "name": "str_replace_editor"
300
- },
301
- "id": "toolu_01DodVjoeVmEY3gU5ALbqmGj",
302
- "type": "function"
303
- }
304
- ],
305
- "function_call": null
306
- }
307
- }
308
- ],
309
- "usage": {
310
- "completion_tokens": 85,
311
- "prompt_tokens": 3804,
312
- "total_tokens": 3889,
313
- "completion_tokens_details": null,
314
- "prompt_tokens_details": {
315
- "audio_tokens": null,
316
- "cached_tokens": 3610
317
- },
318
- "cache_creation_input_tokens": 188,
319
- "cache_read_input_tokens": 3610
320
- },
321
- "service_tier": null
322
- },
323
- "total_calls_in_response": 1
324
- },
325
- "args": {
326
- "path": "/workspace",
327
- "start": 0,
328
- "end": -1,
329
- "thought": "Let me verify that the file was renamed successfully:",
330
- "impl_source": "oh_aci",
331
- "translated_ipython_code": "print(file_editor(**{'command': 'view', 'path': '/workspace'}))"
332
- },
333
- "timeout": 120
334
- },
335
- {
336
- "id": 8,
337
- "timestamp": "2025-01-27T23:16:59.824084",
338
- "source": "agent",
339
- "message": "I read the file /workspace.",
340
- "cause": 7,
341
- "observation": "read",
342
- "tool_call_metadata": {
343
- "function_name": "str_replace_editor",
344
- "tool_call_id": "toolu_01DodVjoeVmEY3gU5ALbqmGj",
345
- "model_response": {
346
- "id": "chatcmpl-ea298ce5-bc1c-435d-b5f9-6eb6e6e864df",
347
- "created": 1738048619,
348
- "model": "claude-3-5-sonnet-20241022",
349
- "object": "chat.completion",
350
- "system_fingerprint": null,
351
- "choices": [
352
- {
353
- "finish_reason": "tool_calls",
354
- "index": 0,
355
- "message": {
356
- "content": "Let me verify that the file was renamed successfully:",
357
- "role": "assistant",
358
- "tool_calls": [
359
- {
360
- "index": 1,
361
- "function": {
362
- "arguments": "{\"command\": \"view\", \"path\": \"/workspace\"}",
363
- "name": "str_replace_editor"
364
- },
365
- "id": "toolu_01DodVjoeVmEY3gU5ALbqmGj",
366
- "type": "function"
367
- }
368
- ],
369
- "function_call": null
370
- }
371
- }
372
- ],
373
- "usage": {
374
- "completion_tokens": 85,
375
- "prompt_tokens": 3804,
376
- "total_tokens": 3889,
377
- "completion_tokens_details": null,
378
- "prompt_tokens_details": {
379
- "audio_tokens": null,
380
- "cached_tokens": 3610
381
- },
382
- "cache_creation_input_tokens": 188,
383
- "cache_read_input_tokens": 3610
384
- },
385
- "service_tier": null
386
- },
387
- "total_calls_in_response": 1
388
- },
389
- "content": "Here's the files and directories up to 2 levels deep in /workspace, excluding hidden items:\n/workspace/\n/workspace/2048.py",
390
- "extras": {
391
- "path": "/workspace",
392
- "impl_source": "oh_aci"
393
- }
394
- },
395
- {
396
- "id": 9,
397
- "timestamp": "2025-01-27T23:17:02.182015",
398
- "source": "agent",
399
- "message": "The file has been successfully renamed from `game_2048.py` to `2048.py`. Is there anything else you need help with?",
400
- "action": "finish",
401
- "tool_call_metadata": {
402
- "function_name": "finish",
403
- "tool_call_id": "toolu_01G98EmHEC8HgktRjJBYXqEa",
404
- "model_response": {
405
- "id": "chatcmpl-fcf8f59b-beb3-4412-94d5-fbe4c02d64a1",
406
- "created": 1738048621,
407
- "model": "claude-3-5-sonnet-20241022",
408
- "object": "chat.completion",
409
- "system_fingerprint": null,
410
- "choices": [
411
- {
412
- "finish_reason": "tool_calls",
413
- "index": 0,
414
- "message": {
415
- "content": "The file has been successfully renamed from `game_2048.py` to `2048.py`. Is there anything else you need help with?",
416
- "role": "assistant",
417
- "tool_calls": [
418
- {
419
- "index": 1,
420
- "function": {
421
- "arguments": "{}",
422
- "name": "finish"
423
- },
424
- "id": "toolu_01G98EmHEC8HgktRjJBYXqEa",
425
- "type": "function"
426
- }
427
- ],
428
- "function_call": null
429
- }
430
- }
431
- ],
432
- "usage": {
433
- "completion_tokens": 67,
434
- "prompt_tokens": 3933,
435
- "total_tokens": 4000,
436
- "completion_tokens_details": null,
437
- "prompt_tokens_details": {
438
- "audio_tokens": null,
439
- "cached_tokens": 3798
440
- },
441
- "cache_creation_input_tokens": 129,
442
- "cache_read_input_tokens": 3798
443
- },
444
- "service_tier": null
445
- },
446
- "total_calls_in_response": 1
447
- },
448
- "args": {
449
- "outputs": {},
450
- "thought": "The file has been successfully renamed from `game_2048.py` to `2048.py`. Is there anything else you need help with?"
451
- },
452
- "timeout": 120
453
- }
454
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/runtime/utils/test_system_stats.py DELETED
@@ -1,60 +0,0 @@
1
- """Tests for system stats utilities."""
2
-
3
- import psutil
4
-
5
- from openhands.runtime.utils.system_stats import get_system_stats
6
-
7
-
8
- def test_get_system_stats():
9
- """Test that get_system_stats returns valid system statistics."""
10
- stats = get_system_stats()
11
-
12
- # Test structure
13
- assert isinstance(stats, dict)
14
- assert set(stats.keys()) == {'cpu_percent', 'memory', 'disk', 'io'}
15
-
16
- # Test CPU stats
17
- assert isinstance(stats['cpu_percent'], float)
18
- assert 0 <= stats['cpu_percent'] <= 100 * psutil.cpu_count()
19
-
20
- # Test memory stats
21
- assert isinstance(stats['memory'], dict)
22
- assert set(stats['memory'].keys()) == {'rss', 'vms', 'percent'}
23
- assert isinstance(stats['memory']['rss'], int)
24
- assert isinstance(stats['memory']['vms'], int)
25
- assert isinstance(stats['memory']['percent'], float)
26
- assert stats['memory']['rss'] > 0
27
- assert stats['memory']['vms'] > 0
28
- assert 0 <= stats['memory']['percent'] <= 100
29
-
30
- # Test disk stats
31
- assert isinstance(stats['disk'], dict)
32
- assert set(stats['disk'].keys()) == {'total', 'used', 'free', 'percent'}
33
- assert isinstance(stats['disk']['total'], int)
34
- assert isinstance(stats['disk']['used'], int)
35
- assert isinstance(stats['disk']['free'], int)
36
- assert isinstance(stats['disk']['percent'], float)
37
- assert stats['disk']['total'] > 0
38
- assert stats['disk']['used'] >= 0
39
- assert stats['disk']['free'] >= 0
40
- assert 0 <= stats['disk']['percent'] <= 100
41
- # Verify that used + free is less than or equal to total
42
- # (might not be exactly equal due to filesystem overhead)
43
- assert stats['disk']['used'] + stats['disk']['free'] <= stats['disk']['total']
44
-
45
- # Test I/O stats
46
- assert isinstance(stats['io'], dict)
47
- assert set(stats['io'].keys()) == {'read_bytes', 'write_bytes'}
48
- assert isinstance(stats['io']['read_bytes'], int)
49
- assert isinstance(stats['io']['write_bytes'], int)
50
- assert stats['io']['read_bytes'] >= 0
51
- assert stats['io']['write_bytes'] >= 0
52
-
53
-
54
- def test_get_system_stats_stability():
55
- """Test that get_system_stats can be called multiple times without errors."""
56
- # Call multiple times to ensure stability
57
- for _ in range(3):
58
- stats = get_system_stats()
59
- assert isinstance(stats, dict)
60
- assert stats['cpu_percent'] >= 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/test_fileops.py DELETED
@@ -1,66 +0,0 @@
1
- from pathlib import Path
2
-
3
- import pytest
4
-
5
- from openhands.runtime.utils import files
6
-
7
- SANDBOX_PATH_PREFIX = '/workspace'
8
- CONTAINER_PATH = '/workspace'
9
- HOST_PATH = 'workspace'
10
-
11
-
12
- def test_resolve_path():
13
- assert (
14
- files.resolve_path('test.txt', '/workspace', HOST_PATH, CONTAINER_PATH)
15
- == Path(HOST_PATH) / 'test.txt'
16
- )
17
- assert (
18
- files.resolve_path('subdir/test.txt', '/workspace', HOST_PATH, CONTAINER_PATH)
19
- == Path(HOST_PATH) / 'subdir' / 'test.txt'
20
- )
21
- assert (
22
- files.resolve_path(
23
- Path(SANDBOX_PATH_PREFIX) / 'test.txt',
24
- '/workspace',
25
- HOST_PATH,
26
- CONTAINER_PATH,
27
- )
28
- == Path(HOST_PATH) / 'test.txt'
29
- )
30
- assert (
31
- files.resolve_path(
32
- Path(SANDBOX_PATH_PREFIX) / 'subdir' / 'test.txt',
33
- '/workspace',
34
- HOST_PATH,
35
- CONTAINER_PATH,
36
- )
37
- == Path(HOST_PATH) / 'subdir' / 'test.txt'
38
- )
39
- assert (
40
- files.resolve_path(
41
- Path(SANDBOX_PATH_PREFIX) / 'subdir' / '..' / 'test.txt',
42
- '/workspace',
43
- HOST_PATH,
44
- CONTAINER_PATH,
45
- )
46
- == Path(HOST_PATH) / 'test.txt'
47
- )
48
- with pytest.raises(PermissionError):
49
- files.resolve_path(
50
- Path(SANDBOX_PATH_PREFIX) / '..' / 'test.txt',
51
- '/workspace',
52
- HOST_PATH,
53
- CONTAINER_PATH,
54
- )
55
- with pytest.raises(PermissionError):
56
- files.resolve_path(
57
- Path('..') / 'test.txt', '/workspace', HOST_PATH, CONTAINER_PATH
58
- )
59
- with pytest.raises(PermissionError):
60
- files.resolve_path(
61
- Path('/') / 'test.txt', '/workspace', HOST_PATH, CONTAINER_PATH
62
- )
63
- assert (
64
- files.resolve_path('test.txt', '/workspace/test', HOST_PATH, CONTAINER_PATH)
65
- == Path(HOST_PATH) / 'test' / 'test.txt'
66
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/unit/README.md DELETED
@@ -1,29 +0,0 @@
1
- ## Introduction
2
-
3
- This folder contains unit tests that could be run locally.
4
-
5
- Run all test:
6
-
7
- ```bash
8
- poetry run pytest ./tests/unit
9
- ```
10
-
11
- Run specific test file:
12
-
13
- ```bash
14
- poetry run pytest ./tests/unit/test_llm_fncall_converter.py
15
- ```
16
-
17
- Run specific unit test
18
-
19
- ```bash
20
- poetry run pytest ./tests/unit/test_llm_fncall_converter.py::test_convert_tool_call_to_string
21
- ```
22
-
23
- For a more verbose output, to above calls the `-v` flag can be used (even more verbose: `-vv` and `-vvv`):
24
-
25
- ```bash
26
- poetry run pytest -v ./tests/unit/test_llm_fncall_converter.py
27
- ```
28
-
29
- More details see [pytest doc](https://docs.pytest.org/en/latest/contents.html)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/unit/core/config/test_config_utils.py DELETED
@@ -1,168 +0,0 @@
1
- import pytest
2
-
3
- from openhands.core.config.agent_config import AgentConfig
4
- from openhands.core.config.openhands_config import OpenHandsConfig
5
- from openhands.core.config.utils import finalize_config
6
-
7
- # Define a dummy agent name often used in tests or as a default
8
- DEFAULT_AGENT_NAME = 'CodeActAgent'
9
-
10
-
11
- def test_finalize_config_cli_disables_jupyter_and_browsing_when_true():
12
- """
13
- Test that finalize_config sets enable_jupyter and enable_browsing to False
14
- when runtime is 'cli' and they were initially True.
15
- """
16
- app_config = OpenHandsConfig()
17
- app_config.runtime = 'cli'
18
-
19
- agent_config = AgentConfig(enable_jupyter=True, enable_browsing=True)
20
- app_config.agents[DEFAULT_AGENT_NAME] = agent_config
21
-
22
- finalize_config(app_config)
23
-
24
- assert not app_config.agents[DEFAULT_AGENT_NAME].enable_jupyter, (
25
- "enable_jupyter should be False when runtime is 'cli'"
26
- )
27
- assert not app_config.agents[DEFAULT_AGENT_NAME].enable_browsing, (
28
- "enable_browsing should be False when runtime is 'cli'"
29
- )
30
-
31
-
32
- def test_finalize_config_cli_keeps_jupyter_and_browsing_false_when_false():
33
- """
34
- Test that finalize_config keeps enable_jupyter and enable_browsing as False
35
- when runtime is 'cli' and they were initially False.
36
- """
37
- app_config = OpenHandsConfig()
38
- app_config.runtime = 'cli'
39
-
40
- agent_config = AgentConfig(enable_jupyter=False, enable_browsing=False)
41
- app_config.agents[DEFAULT_AGENT_NAME] = agent_config
42
-
43
- finalize_config(app_config)
44
-
45
- assert not app_config.agents[DEFAULT_AGENT_NAME].enable_jupyter, (
46
- "enable_jupyter should remain False when runtime is 'cli' and initially False"
47
- )
48
- assert not app_config.agents[DEFAULT_AGENT_NAME].enable_browsing, (
49
- "enable_browsing should remain False when runtime is 'cli' and initially False"
50
- )
51
-
52
-
53
- def test_finalize_config_other_runtime_keeps_jupyter_and_browsing_true_by_default():
54
- """
55
- Test that finalize_config keeps enable_jupyter and enable_browsing as True (default)
56
- when runtime is not 'cli'.
57
- """
58
- app_config = OpenHandsConfig()
59
- app_config.runtime = 'docker' # A non-cli runtime
60
-
61
- # AgentConfig defaults enable_jupyter and enable_browsing to True
62
- agent_config = AgentConfig()
63
- app_config.agents[DEFAULT_AGENT_NAME] = agent_config
64
-
65
- finalize_config(app_config)
66
-
67
- assert app_config.agents[DEFAULT_AGENT_NAME].enable_jupyter, (
68
- 'enable_jupyter should remain True by default for non-cli runtimes'
69
- )
70
- assert app_config.agents[DEFAULT_AGENT_NAME].enable_browsing, (
71
- 'enable_browsing should remain True by default for non-cli runtimes'
72
- )
73
-
74
-
75
- def test_finalize_config_other_runtime_keeps_jupyter_and_browsing_false_if_set():
76
- """
77
- Test that finalize_config keeps enable_jupyter and enable_browsing as False
78
- when runtime is not 'cli' but they were explicitly set to False.
79
- """
80
- app_config = OpenHandsConfig()
81
- app_config.runtime = 'docker' # A non-cli runtime
82
-
83
- agent_config = AgentConfig(enable_jupyter=False, enable_browsing=False)
84
- app_config.agents[DEFAULT_AGENT_NAME] = agent_config
85
-
86
- finalize_config(app_config)
87
-
88
- assert not app_config.agents[DEFAULT_AGENT_NAME].enable_jupyter, (
89
- 'enable_jupyter should remain False for non-cli runtimes if explicitly set to False'
90
- )
91
- assert not app_config.agents[DEFAULT_AGENT_NAME].enable_browsing, (
92
- 'enable_browsing should remain False for non-cli runtimes if explicitly set to False'
93
- )
94
-
95
-
96
- def test_finalize_config_no_agents_defined():
97
- """
98
- Test that finalize_config runs without error if no agents are defined in the config,
99
- even when runtime is 'cli'.
100
- """
101
- app_config = OpenHandsConfig()
102
- app_config.runtime = 'cli'
103
- # No agents are added to app_config.agents
104
-
105
- try:
106
- finalize_config(app_config)
107
- except Exception as e:
108
- pytest.fail(f'finalize_config raised an exception with no agents defined: {e}')
109
-
110
-
111
- def test_finalize_config_multiple_agents_cli_runtime():
112
- """
113
- Test that finalize_config correctly disables jupyter and browsing for multiple agents
114
- when runtime is 'cli'.
115
- """
116
- app_config = OpenHandsConfig()
117
- app_config.runtime = 'cli'
118
-
119
- agent_config1 = AgentConfig(enable_jupyter=True, enable_browsing=True)
120
- agent_config2 = AgentConfig(enable_jupyter=True, enable_browsing=True)
121
- app_config.agents['Agent1'] = agent_config1
122
- app_config.agents['Agent2'] = agent_config2
123
-
124
- finalize_config(app_config)
125
-
126
- assert not app_config.agents['Agent1'].enable_jupyter, (
127
- 'Jupyter should be disabled for Agent1'
128
- )
129
- assert not app_config.agents['Agent1'].enable_browsing, (
130
- 'Browsing should be disabled for Agent1'
131
- )
132
- assert not app_config.agents['Agent2'].enable_jupyter, (
133
- 'Jupyter should be disabled for Agent2'
134
- )
135
- assert not app_config.agents['Agent2'].enable_browsing, (
136
- 'Browsing should be disabled for Agent2'
137
- )
138
-
139
-
140
- def test_finalize_config_multiple_agents_other_runtime():
141
- """
142
- Test that finalize_config correctly keeps jupyter and browsing enabled (or as set)
143
- for multiple agents when runtime is not 'cli'.
144
- """
145
- app_config = OpenHandsConfig()
146
- app_config.runtime = 'docker'
147
-
148
- agent_config1 = AgentConfig(enable_jupyter=True, enable_browsing=True) # Defaults
149
- agent_config2 = AgentConfig(
150
- enable_jupyter=False, enable_browsing=False
151
- ) # Explicitly false
152
- app_config.agents['Agent1'] = agent_config1
153
- app_config.agents['Agent2'] = agent_config2
154
-
155
- finalize_config(app_config)
156
-
157
- assert app_config.agents['Agent1'].enable_jupyter, (
158
- 'Jupyter should be True for Agent1'
159
- )
160
- assert app_config.agents['Agent1'].enable_browsing, (
161
- 'Browsing should be True for Agent1'
162
- )
163
- assert not app_config.agents['Agent2'].enable_jupyter, (
164
- 'Jupyter should be False for Agent2'
165
- )
166
- assert not app_config.agents['Agent2'].enable_browsing, (
167
- 'Browsing should be False for Agent2'
168
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/unit/frontend/test_translation_completeness.py DELETED
@@ -1,33 +0,0 @@
1
- """Test that the translation completeness check works correctly."""
2
-
3
- import os
4
- import subprocess
5
- import unittest
6
-
7
-
8
- class TestTranslationCompleteness(unittest.TestCase):
9
- """Test that the translation completeness check works correctly."""
10
-
11
- def test_translation_completeness_check_runs(self):
12
- """Test that the translation completeness check script can be executed."""
13
- frontend_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), "frontend")
14
- script_path = os.path.join(frontend_dir, "scripts", "check-translation-completeness.cjs")
15
-
16
- # Verify the script exists
17
- self.assertTrue(os.path.exists(script_path), f"Script not found at {script_path}")
18
-
19
- # Verify the script is executable
20
- self.assertTrue(os.access(script_path, os.X_OK), f"Script at {script_path} is not executable")
21
-
22
- # Run the script (it may fail due to missing translations, but we just want to verify it runs)
23
- try:
24
- subprocess.run(
25
- ["node", script_path],
26
- cwd=frontend_dir,
27
- check=False,
28
- capture_output=True,
29
- text=True
30
- )
31
- # We don't assert on the return code because it might fail due to missing translations
32
- except Exception as e:
33
- self.fail(f"Failed to run translation completeness check: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/unit/resolver/github/test_guess_success.py DELETED
@@ -1,202 +0,0 @@
1
- import json
2
- from unittest.mock import MagicMock, patch
3
-
4
- from openhands.core.config import LLMConfig
5
- from openhands.events.action.message import MessageAction
6
- from openhands.llm import LLM
7
- from openhands.resolver.interfaces.github import GithubIssueHandler, GithubPRHandler
8
- from openhands.resolver.interfaces.issue import Issue
9
- from openhands.resolver.interfaces.issue_definitions import (
10
- ServiceContextIssue,
11
- ServiceContextPR,
12
- )
13
-
14
-
15
- def test_guess_success_multiline_explanation():
16
- # Mock data
17
- issue = Issue(
18
- owner='test',
19
- repo='test',
20
- number=1,
21
- title='Test Issue',
22
- body='Test body',
23
- thread_comments=None,
24
- review_comments=None,
25
- )
26
- history = [MessageAction(content='Test message')]
27
- llm_config = LLMConfig(model='test', api_key='test')
28
-
29
- # Create a mock response with multi-line explanation
30
- mock_response = MagicMock()
31
- mock_response.choices = [
32
- MagicMock(
33
- message=MagicMock(
34
- content="""--- success
35
- true
36
-
37
- --- explanation
38
- The PR successfully addressed the issue by:
39
- - Fixed bug A
40
- - Added test B
41
- - Updated documentation C
42
-
43
- Automatic fix generated by OpenHands 🙌"""
44
- )
45
- )
46
- ]
47
-
48
- # Use patch to mock the LLM completion call
49
- with patch.object(LLM, 'completion', return_value=mock_response) as mock_completion:
50
- # Create a handler instance
51
- handler = ServiceContextIssue(
52
- GithubIssueHandler('test', 'test', 'test'), llm_config
53
- )
54
-
55
- # Call guess_success
56
- success, _, explanation = handler.guess_success(issue, history)
57
-
58
- # Verify the results
59
- assert success is True
60
- assert 'The PR successfully addressed the issue by:' in explanation
61
- assert 'Fixed bug A' in explanation
62
- assert 'Added test B' in explanation
63
- assert 'Updated documentation C' in explanation
64
- assert 'Automatic fix generated by OpenHands' in explanation
65
-
66
- # Verify that LLM completion was called exactly once
67
- mock_completion.assert_called_once()
68
-
69
-
70
- def test_pr_handler_guess_success_with_thread_comments():
71
- # Create a PR handler instance
72
- llm_config = LLMConfig(model='test', api_key='test')
73
- handler = ServiceContextPR(GithubPRHandler('test', 'test', 'test'), llm_config)
74
-
75
- # Create a mock issue with thread comments but no review comments
76
- issue = Issue(
77
- owner='test-owner',
78
- repo='test-repo',
79
- number=1,
80
- title='Test PR',
81
- body='Test Body',
82
- thread_comments=['First comment', 'Second comment'],
83
- closing_issues=['Issue description'],
84
- review_comments=None,
85
- thread_ids=None,
86
- head_branch='test-branch',
87
- )
88
-
89
- # Create mock history
90
- history = [MessageAction(content='Fixed the issue by implementing X and Y')]
91
-
92
- # Create mock LLM config
93
- llm_config = LLMConfig(model='test-model', api_key='test-key')
94
-
95
- # Mock the LLM response
96
- mock_response = MagicMock()
97
- mock_response.choices = [
98
- MagicMock(
99
- message=MagicMock(
100
- content="""--- success
101
- true
102
-
103
- --- explanation
104
- The changes successfully address the feedback."""
105
- )
106
- )
107
- ]
108
-
109
- # Test the guess_success method
110
- with patch.object(LLM, 'completion', return_value=mock_response):
111
- success, success_list, explanation = handler.guess_success(issue, history)
112
-
113
- # Verify the results
114
- assert success is True
115
- assert success_list == [True]
116
- assert 'successfully address' in explanation
117
- assert len(json.loads(explanation)) == 1
118
-
119
-
120
- def test_pr_handler_guess_success_only_review_comments():
121
- # Create a PR handler instance
122
- llm_config = LLMConfig(model='test', api_key='test')
123
- handler = ServiceContextPR(
124
- GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
125
- )
126
-
127
- # Create a mock issue with only review comments
128
- issue = Issue(
129
- owner='test-owner',
130
- repo='test-repo',
131
- number=1,
132
- title='Test PR',
133
- body='Test Body',
134
- thread_comments=None,
135
- closing_issues=['Issue description'],
136
- review_comments=['Please fix the formatting', 'Add more tests'],
137
- thread_ids=None,
138
- head_branch='test-branch',
139
- )
140
-
141
- # Create mock history
142
- history = [MessageAction(content='Fixed the formatting and added more tests')]
143
-
144
- # Create mock LLM config
145
- llm_config = LLMConfig(model='test-model', api_key='test-key')
146
-
147
- # Mock the LLM response
148
- mock_response = MagicMock()
149
- mock_response.choices = [
150
- MagicMock(
151
- message=MagicMock(
152
- content="""--- success
153
- true
154
-
155
- --- explanation
156
- The changes successfully address the review comments."""
157
- )
158
- )
159
- ]
160
-
161
- # Test the guess_success method
162
- with patch.object(LLM, 'completion', return_value=mock_response):
163
- success, success_list, explanation = handler.guess_success(issue, history)
164
-
165
- # Verify the results
166
- assert success is True
167
- assert success_list == [True]
168
- assert (
169
- '["The changes successfully address the review comments."]' in explanation
170
- )
171
-
172
-
173
- def test_pr_handler_guess_success_no_comments():
174
- # Create a PR handler instance
175
- llm_config = LLMConfig(model='test', api_key='test')
176
- handler = ServiceContextPR(GithubPRHandler('test', 'test', 'test'), llm_config)
177
-
178
- # Create a mock issue with no comments
179
- issue = Issue(
180
- owner='test-owner',
181
- repo='test-repo',
182
- number=1,
183
- title='Test PR',
184
- body='Test Body',
185
- thread_comments=None,
186
- closing_issues=['Issue description'],
187
- review_comments=None,
188
- thread_ids=None,
189
- head_branch='test-branch',
190
- )
191
-
192
- # Create mock history
193
- history = [MessageAction(content='Fixed the issue')]
194
-
195
- # Create mock LLM config
196
- llm_config = LLMConfig(model='test-model', api_key='test-key')
197
-
198
- # Test that it returns appropriate message when no comments are present
199
- success, success_list, explanation = handler.guess_success(issue, history)
200
- assert success is False
201
- assert success_list is None
202
- assert explanation == 'No feedback was found to process'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/unit/resolver/github/test_issue_handler.py DELETED
@@ -1,645 +0,0 @@
1
- from unittest.mock import MagicMock, patch
2
-
3
- from openhands.core.config import LLMConfig
4
- from openhands.resolver.interfaces.github import GithubIssueHandler, GithubPRHandler
5
- from openhands.resolver.interfaces.issue import ReviewThread
6
- from openhands.resolver.interfaces.issue_definitions import (
7
- ServiceContextIssue,
8
- ServiceContextPR,
9
- )
10
-
11
-
12
- def test_get_converted_issues_initializes_review_comments():
13
- # Mock the necessary dependencies
14
- with patch('httpx.get') as mock_get:
15
- # Mock the response for issues
16
- mock_issues_response = MagicMock()
17
- mock_issues_response.json.return_value = [
18
- {'number': 1, 'title': 'Test Issue', 'body': 'Test Body'}
19
- ]
20
- # Mock the response for comments
21
- mock_comments_response = MagicMock()
22
- mock_comments_response.json.return_value = []
23
-
24
- # Set up the mock to return different responses for different calls
25
- # First call is for issues, second call is for comments
26
- mock_get.side_effect = [
27
- mock_issues_response,
28
- mock_comments_response,
29
- mock_comments_response,
30
- ] # Need two comment responses because we make two API calls
31
-
32
- # Create an instance of IssueHandler
33
- llm_config = LLMConfig(model='test', api_key='test')
34
- handler = ServiceContextIssue(
35
- GithubIssueHandler('test-owner', 'test-repo', 'test-token'), llm_config
36
- )
37
-
38
- # Get converted issues
39
- issues = handler.get_converted_issues(issue_numbers=[1])
40
-
41
- # Verify that we got exactly one issue
42
- assert len(issues) == 1
43
-
44
- # Verify that review_comments is initialized as None
45
- assert issues[0].review_comments is None
46
-
47
- # Verify other fields are set correctly
48
- assert issues[0].number == 1
49
- assert issues[0].title == 'Test Issue'
50
- assert issues[0].body == 'Test Body'
51
- assert issues[0].owner == 'test-owner'
52
- assert issues[0].repo == 'test-repo'
53
-
54
-
55
- def test_get_converted_issues_handles_empty_body():
56
- # Mock the necessary dependencies
57
- with patch('httpx.get') as mock_get:
58
- # Mock the response for issues
59
- mock_issues_response = MagicMock()
60
- mock_issues_response.json.return_value = [
61
- {'number': 1, 'title': 'Test Issue', 'body': None}
62
- ]
63
- # Mock the response for comments
64
- mock_comments_response = MagicMock()
65
- mock_comments_response.json.return_value = []
66
- # Set up the mock to return different responses
67
- mock_get.side_effect = [
68
- mock_issues_response,
69
- mock_comments_response,
70
- mock_comments_response,
71
- ]
72
-
73
- # Create an instance of IssueHandler
74
- llm_config = LLMConfig(model='test', api_key='test')
75
- handler = ServiceContextIssue(
76
- GithubIssueHandler('test-owner', 'test-repo', 'test-token'), llm_config
77
- )
78
-
79
- # Get converted issues
80
- issues = handler.get_converted_issues(issue_numbers=[1])
81
-
82
- # Verify that we got exactly one issue
83
- assert len(issues) == 1
84
-
85
- # Verify that body is empty string when None
86
- assert issues[0].body == ''
87
-
88
- # Verify other fields are set correctly
89
- assert issues[0].number == 1
90
- assert issues[0].title == 'Test Issue'
91
- assert issues[0].owner == 'test-owner'
92
- assert issues[0].repo == 'test-repo'
93
-
94
- # Verify that review_comments is initialized as None
95
- assert issues[0].review_comments is None
96
-
97
-
98
- def test_pr_handler_get_converted_issues_with_comments():
99
- # Mock the necessary dependencies
100
- with patch('httpx.get') as mock_get:
101
- # Mock the response for PRs
102
- mock_prs_response = MagicMock()
103
- mock_prs_response.json.return_value = [
104
- {
105
- 'number': 1,
106
- 'title': 'Test PR',
107
- 'body': 'Test Body fixes #1',
108
- 'head': {'ref': 'test-branch'},
109
- }
110
- ]
111
-
112
- # Mock the response for PR comments
113
- mock_comments_response = MagicMock()
114
- mock_comments_response.json.return_value = [
115
- {'body': 'First comment'},
116
- {'body': 'Second comment'},
117
- ]
118
-
119
- # Mock the response for PR metadata (GraphQL)
120
- mock_graphql_response = MagicMock()
121
- mock_graphql_response.json.return_value = {
122
- 'data': {
123
- 'repository': {
124
- 'pullRequest': {
125
- 'closingIssuesReferences': {'edges': []},
126
- 'reviews': {'nodes': []},
127
- 'reviewThreads': {'edges': []},
128
- }
129
- }
130
- }
131
- }
132
-
133
- # Set up the mock to return different responses
134
- # We need to return empty responses for subsequent pages
135
- mock_empty_response = MagicMock()
136
- mock_empty_response.json.return_value = []
137
-
138
- # Mock the response for fetching the external issue referenced in PR body
139
- mock_external_issue_response = MagicMock()
140
- mock_external_issue_response.json.return_value = {
141
- 'body': 'This is additional context from an externally referenced issue.'
142
- }
143
-
144
- mock_get.side_effect = [
145
- mock_prs_response, # First call for PRs
146
- mock_empty_response, # Second call for PRs (empty page)
147
- mock_comments_response, # Third call for PR comments
148
- mock_empty_response, # Fourth call for PR comments (empty page)
149
- mock_external_issue_response, # Mock response for the external issue reference #1
150
- ]
151
-
152
- # Mock the post request for GraphQL
153
- with patch('httpx.post') as mock_post:
154
- mock_post.return_value = mock_graphql_response
155
-
156
- # Create an instance of PRHandler
157
- llm_config = LLMConfig(model='test', api_key='test')
158
- handler = ServiceContextPR(
159
- GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
160
- )
161
-
162
- # Get converted issues
163
- prs = handler.get_converted_issues(issue_numbers=[1])
164
-
165
- # Verify that we got exactly one PR
166
- assert len(prs) == 1
167
-
168
- # Verify that thread_comments are set correctly
169
- assert prs[0].thread_comments == ['First comment', 'Second comment']
170
-
171
- # Verify other fields are set correctly
172
- assert prs[0].number == 1
173
- assert prs[0].title == 'Test PR'
174
- assert prs[0].body == 'Test Body fixes #1'
175
- assert prs[0].owner == 'test-owner'
176
- assert prs[0].repo == 'test-repo'
177
- assert prs[0].head_branch == 'test-branch'
178
- assert prs[0].closing_issues == [
179
- 'This is additional context from an externally referenced issue.'
180
- ]
181
-
182
-
183
- def test_get_issue_comments_with_specific_comment_id():
184
- # Mock the necessary dependencies
185
- with patch('httpx.get') as mock_get:
186
- # Mock the response for comments
187
- mock_comments_response = MagicMock()
188
- mock_comments_response.json.return_value = [
189
- {'id': 123, 'body': 'First comment'},
190
- {'id': 456, 'body': 'Second comment'},
191
- ]
192
-
193
- mock_get.return_value = mock_comments_response
194
-
195
- # Create an instance of IssueHandler
196
- llm_config = LLMConfig(model='test', api_key='test')
197
- handler = ServiceContextIssue(
198
- GithubIssueHandler('test-owner', 'test-repo', 'test-token'), llm_config
199
- )
200
-
201
- # Get comments with a specific comment_id
202
- specific_comment = handler.get_issue_comments(issue_number=1, comment_id=123)
203
-
204
- # Verify only the specific comment is returned
205
- assert specific_comment == ['First comment']
206
-
207
-
208
- def test_pr_handler_get_converted_issues_with_specific_thread_comment():
209
- # Define the specific comment_id to filter
210
- specific_comment_id = 123
211
-
212
- # Mock GraphQL response for review threads
213
- with patch('httpx.get') as mock_get:
214
- # Mock the response for PRs
215
- mock_prs_response = MagicMock()
216
- mock_prs_response.json.return_value = [
217
- {
218
- 'number': 1,
219
- 'title': 'Test PR',
220
- 'body': 'Test Body',
221
- 'head': {'ref': 'test-branch'},
222
- }
223
- ]
224
-
225
- # Mock the response for PR comments
226
- mock_comments_response = MagicMock()
227
- mock_comments_response.json.return_value = [
228
- {'body': 'First comment', 'id': 123},
229
- {'body': 'Second comment', 'id': 124},
230
- ]
231
-
232
- # Mock the response for PR metadata (GraphQL)
233
- mock_graphql_response = MagicMock()
234
- mock_graphql_response.json.return_value = {
235
- 'data': {
236
- 'repository': {
237
- 'pullRequest': {
238
- 'closingIssuesReferences': {'edges': []},
239
- 'reviews': {'nodes': []},
240
- 'reviewThreads': {
241
- 'edges': [
242
- {
243
- 'node': {
244
- 'id': 'review-thread-1',
245
- 'isResolved': False,
246
- 'comments': {
247
- 'nodes': [
248
- {
249
- 'fullDatabaseId': 121,
250
- 'body': 'Specific review comment',
251
- 'path': 'file1.txt',
252
- },
253
- {
254
- 'fullDatabaseId': 456,
255
- 'body': 'Another review comment',
256
- 'path': 'file2.txt',
257
- },
258
- ]
259
- },
260
- }
261
- }
262
- ]
263
- },
264
- }
265
- }
266
- }
267
- }
268
-
269
- # Set up the mock to return different responses
270
- # We need to return empty responses for subsequent pages
271
- mock_empty_response = MagicMock()
272
- mock_empty_response.json.return_value = []
273
-
274
- mock_get.side_effect = [
275
- mock_prs_response, # First call for PRs
276
- mock_empty_response, # Second call for PRs (empty page)
277
- mock_comments_response, # Third call for PR comments
278
- mock_empty_response, # Fourth call for PR comments (empty page)
279
- ]
280
-
281
- # Mock the post request for GraphQL
282
- with patch('httpx.post') as mock_post:
283
- mock_post.return_value = mock_graphql_response
284
-
285
- # Create an instance of PRHandler
286
- llm_config = LLMConfig(model='test', api_key='test')
287
- handler = ServiceContextPR(
288
- GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
289
- )
290
-
291
- # Get converted issues
292
- prs = handler.get_converted_issues(
293
- issue_numbers=[1], comment_id=specific_comment_id
294
- )
295
-
296
- # Verify that we got exactly one PR
297
- assert len(prs) == 1
298
-
299
- # Verify that thread_comments are set correctly
300
- assert prs[0].thread_comments == ['First comment']
301
- assert prs[0].review_comments == []
302
- assert prs[0].review_threads == []
303
-
304
- # Verify other fields are set correctly
305
- assert prs[0].number == 1
306
- assert prs[0].title == 'Test PR'
307
- assert prs[0].body == 'Test Body'
308
- assert prs[0].owner == 'test-owner'
309
- assert prs[0].repo == 'test-repo'
310
- assert prs[0].head_branch == 'test-branch'
311
-
312
-
313
- def test_pr_handler_get_converted_issues_with_specific_review_thread_comment():
314
- # Define the specific comment_id to filter
315
- specific_comment_id = 123
316
-
317
- # Mock GraphQL response for review threads
318
- with patch('httpx.get') as mock_get:
319
- # Mock the response for PRs
320
- mock_prs_response = MagicMock()
321
- mock_prs_response.json.return_value = [
322
- {
323
- 'number': 1,
324
- 'title': 'Test PR',
325
- 'body': 'Test Body',
326
- 'head': {'ref': 'test-branch'},
327
- }
328
- ]
329
-
330
- # Mock the response for PR comments
331
- mock_comments_response = MagicMock()
332
- mock_comments_response.json.return_value = [
333
- {'body': 'First comment', 'id': 120},
334
- {'body': 'Second comment', 'id': 124},
335
- ]
336
-
337
- # Mock the response for PR metadata (GraphQL)
338
- mock_graphql_response = MagicMock()
339
- mock_graphql_response.json.return_value = {
340
- 'data': {
341
- 'repository': {
342
- 'pullRequest': {
343
- 'closingIssuesReferences': {'edges': []},
344
- 'reviews': {'nodes': []},
345
- 'reviewThreads': {
346
- 'edges': [
347
- {
348
- 'node': {
349
- 'id': 'review-thread-1',
350
- 'isResolved': False,
351
- 'comments': {
352
- 'nodes': [
353
- {
354
- 'fullDatabaseId': specific_comment_id,
355
- 'body': 'Specific review comment',
356
- 'path': 'file1.txt',
357
- },
358
- {
359
- 'fullDatabaseId': 456,
360
- 'body': 'Another review comment',
361
- 'path': 'file1.txt',
362
- },
363
- ]
364
- },
365
- }
366
- }
367
- ]
368
- },
369
- }
370
- }
371
- }
372
- }
373
-
374
- # Set up the mock to return different responses
375
- # We need to return empty responses for subsequent pages
376
- mock_empty_response = MagicMock()
377
- mock_empty_response.json.return_value = []
378
-
379
- mock_get.side_effect = [
380
- mock_prs_response, # First call for PRs
381
- mock_empty_response, # Second call for PRs (empty page)
382
- mock_comments_response, # Third call for PR comments
383
- mock_empty_response, # Fourth call for PR comments (empty page)
384
- ]
385
-
386
- # Mock the post request for GraphQL
387
- with patch('httpx.post') as mock_post:
388
- mock_post.return_value = mock_graphql_response
389
-
390
- # Create an instance of PRHandler
391
- llm_config = LLMConfig(model='test', api_key='test')
392
- handler = ServiceContextPR(
393
- GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
394
- )
395
-
396
- # Get converted issues
397
- prs = handler.get_converted_issues(
398
- issue_numbers=[1], comment_id=specific_comment_id
399
- )
400
-
401
- # Verify that we got exactly one PR
402
- assert len(prs) == 1
403
-
404
- # Verify that thread_comments are set correctly
405
- assert prs[0].thread_comments is None
406
- assert prs[0].review_comments == []
407
- assert len(prs[0].review_threads) == 1
408
- assert isinstance(prs[0].review_threads[0], ReviewThread)
409
- assert (
410
- prs[0].review_threads[0].comment
411
- == 'Specific review comment\n---\nlatest feedback:\nAnother review comment\n'
412
- )
413
- assert prs[0].review_threads[0].files == ['file1.txt']
414
-
415
- # Verify other fields are set correctly
416
- assert prs[0].number == 1
417
- assert prs[0].title == 'Test PR'
418
- assert prs[0].body == 'Test Body'
419
- assert prs[0].owner == 'test-owner'
420
- assert prs[0].repo == 'test-repo'
421
- assert prs[0].head_branch == 'test-branch'
422
-
423
-
424
- def test_pr_handler_get_converted_issues_with_specific_comment_and_issue_refs():
425
- # Define the specific comment_id to filter
426
- specific_comment_id = 123
427
-
428
- # Mock GraphQL response for review threads
429
- with patch('httpx.get') as mock_get:
430
- # Mock the response for PRs
431
- mock_prs_response = MagicMock()
432
- mock_prs_response.json.return_value = [
433
- {
434
- 'number': 1,
435
- 'title': 'Test PR fixes #3',
436
- 'body': 'Test Body',
437
- 'head': {'ref': 'test-branch'},
438
- }
439
- ]
440
-
441
- # Mock the response for PR comments
442
- mock_comments_response = MagicMock()
443
- mock_comments_response.json.return_value = [
444
- {'body': 'First comment', 'id': 120},
445
- {'body': 'Second comment', 'id': 124},
446
- ]
447
-
448
- # Mock the response for PR metadata (GraphQL)
449
- mock_graphql_response = MagicMock()
450
- mock_graphql_response.json.return_value = {
451
- 'data': {
452
- 'repository': {
453
- 'pullRequest': {
454
- 'closingIssuesReferences': {'edges': []},
455
- 'reviews': {'nodes': []},
456
- 'reviewThreads': {
457
- 'edges': [
458
- {
459
- 'node': {
460
- 'id': 'review-thread-1',
461
- 'isResolved': False,
462
- 'comments': {
463
- 'nodes': [
464
- {
465
- 'fullDatabaseId': specific_comment_id,
466
- 'body': 'Specific review comment that references #6',
467
- 'path': 'file1.txt',
468
- },
469
- {
470
- 'fullDatabaseId': 456,
471
- 'body': 'Another review comment referencing #7',
472
- 'path': 'file2.txt',
473
- },
474
- ]
475
- },
476
- }
477
- }
478
- ]
479
- },
480
- }
481
- }
482
- }
483
- }
484
-
485
- # Set up the mock to return different responses
486
- # We need to return empty responses for subsequent pages
487
- mock_empty_response = MagicMock()
488
- mock_empty_response.json.return_value = []
489
-
490
- # Mock the response for fetching the external issue referenced in PR body
491
- mock_external_issue_response_in_body = MagicMock()
492
- mock_external_issue_response_in_body.json.return_value = {
493
- 'body': 'External context #1.'
494
- }
495
-
496
- # Mock the response for fetching the external issue referenced in review thread
497
- mock_external_issue_response_review_thread = MagicMock()
498
- mock_external_issue_response_review_thread.json.return_value = {
499
- 'body': 'External context #2.'
500
- }
501
-
502
- mock_get.side_effect = [
503
- mock_prs_response, # First call for PRs
504
- mock_empty_response, # Second call for PRs (empty page)
505
- mock_comments_response, # Third call for PR comments
506
- mock_empty_response, # Fourth call for PR comments (empty page)
507
- mock_external_issue_response_in_body,
508
- mock_external_issue_response_review_thread,
509
- ]
510
-
511
- # Mock the post request for GraphQL
512
- with patch('httpx.post') as mock_post:
513
- mock_post.return_value = mock_graphql_response
514
-
515
- # Create an instance of PRHandler
516
- llm_config = LLMConfig(model='test', api_key='test')
517
- handler = ServiceContextPR(
518
- GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
519
- )
520
-
521
- # Get converted issues
522
- prs = handler.get_converted_issues(
523
- issue_numbers=[1], comment_id=specific_comment_id
524
- )
525
-
526
- # Verify that we got exactly one PR
527
- assert len(prs) == 1
528
-
529
- # Verify that thread_comments are set correctly
530
- assert prs[0].thread_comments is None
531
- assert prs[0].review_comments == []
532
- assert len(prs[0].review_threads) == 1
533
- assert isinstance(prs[0].review_threads[0], ReviewThread)
534
- assert (
535
- prs[0].review_threads[0].comment
536
- == 'Specific review comment that references #6\n---\nlatest feedback:\nAnother review comment referencing #7\n'
537
- )
538
- assert prs[0].closing_issues == [
539
- 'External context #1.',
540
- 'External context #2.',
541
- ] # Only includes references inside comment ID and body PR
542
-
543
- # Verify other fields are set correctly
544
- assert prs[0].number == 1
545
- assert prs[0].title == 'Test PR fixes #3'
546
- assert prs[0].body == 'Test Body'
547
- assert prs[0].owner == 'test-owner'
548
- assert prs[0].repo == 'test-repo'
549
- assert prs[0].head_branch == 'test-branch'
550
-
551
-
552
- def test_pr_handler_get_converted_issues_with_duplicate_issue_refs():
553
- # Mock the necessary dependencies
554
- with patch('httpx.get') as mock_get:
555
- # Mock the response for PRs
556
- mock_prs_response = MagicMock()
557
- mock_prs_response.json.return_value = [
558
- {
559
- 'number': 1,
560
- 'title': 'Test PR',
561
- 'body': 'Test Body fixes #1',
562
- 'head': {'ref': 'test-branch'},
563
- }
564
- ]
565
-
566
- # Mock the response for PR comments
567
- mock_comments_response = MagicMock()
568
- mock_comments_response.json.return_value = [
569
- {'body': 'First comment addressing #1'},
570
- {'body': 'Second comment addressing #2'},
571
- ]
572
-
573
- # Mock the response for PR metadata (GraphQL)
574
- mock_graphql_response = MagicMock()
575
- mock_graphql_response.json.return_value = {
576
- 'data': {
577
- 'repository': {
578
- 'pullRequest': {
579
- 'closingIssuesReferences': {'edges': []},
580
- 'reviews': {'nodes': []},
581
- 'reviewThreads': {'edges': []},
582
- }
583
- }
584
- }
585
- }
586
-
587
- # Set up the mock to return different responses
588
- # We need to return empty responses for subsequent pages
589
- mock_empty_response = MagicMock()
590
- mock_empty_response.json.return_value = []
591
-
592
- # Mock the response for fetching the external issue referenced in PR body
593
- mock_external_issue_response_in_body = MagicMock()
594
- mock_external_issue_response_in_body.json.return_value = {
595
- 'body': 'External context #1.'
596
- }
597
-
598
- # Mock the response for fetching the external issue referenced in review thread
599
- mock_external_issue_response_in_comment = MagicMock()
600
- mock_external_issue_response_in_comment.json.return_value = {
601
- 'body': 'External context #2.'
602
- }
603
-
604
- mock_get.side_effect = [
605
- mock_prs_response, # First call for PRs
606
- mock_empty_response, # Second call for PRs (empty page)
607
- mock_comments_response, # Third call for PR comments
608
- mock_empty_response, # Fourth call for PR comments (empty page)
609
- mock_external_issue_response_in_body, # Mock response for the external issue reference #1
610
- mock_external_issue_response_in_comment,
611
- ]
612
-
613
- # Mock the post request for GraphQL
614
- with patch('httpx.post') as mock_post:
615
- mock_post.return_value = mock_graphql_response
616
-
617
- # Create an instance of PRHandler
618
- llm_config = LLMConfig(model='test', api_key='test')
619
- handler = ServiceContextPR(
620
- GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
621
- )
622
-
623
- # Get converted issues
624
- prs = handler.get_converted_issues(issue_numbers=[1])
625
-
626
- # Verify that we got exactly one PR
627
- assert len(prs) == 1
628
-
629
- # Verify that thread_comments are set correctly
630
- assert prs[0].thread_comments == [
631
- 'First comment addressing #1',
632
- 'Second comment addressing #2',
633
- ]
634
-
635
- # Verify other fields are set correctly
636
- assert prs[0].number == 1
637
- assert prs[0].title == 'Test PR'
638
- assert prs[0].body == 'Test Body fixes #1'
639
- assert prs[0].owner == 'test-owner'
640
- assert prs[0].repo == 'test-repo'
641
- assert prs[0].head_branch == 'test-branch'
642
- assert prs[0].closing_issues == [
643
- 'External context #1.',
644
- 'External context #2.',
645
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/unit/resolver/github/test_issue_handler_error_handling.py DELETED
@@ -1,281 +0,0 @@
1
- from unittest.mock import MagicMock, patch
2
-
3
- import httpx
4
- import pytest
5
- from litellm.exceptions import RateLimitError
6
-
7
- from openhands.core.config import LLMConfig
8
- from openhands.events.action.message import MessageAction
9
- from openhands.llm.llm import LLM
10
- from openhands.resolver.interfaces.github import GithubIssueHandler, GithubPRHandler
11
- from openhands.resolver.interfaces.issue import Issue
12
- from openhands.resolver.interfaces.issue_definitions import (
13
- ServiceContextIssue,
14
- ServiceContextPR,
15
- )
16
-
17
-
18
- @pytest.fixture(autouse=True)
19
- def mock_logger(monkeypatch):
20
- # suppress logging of completion data to file
21
- mock_logger = MagicMock()
22
- monkeypatch.setattr('openhands.llm.debug_mixin.llm_prompt_logger', mock_logger)
23
- monkeypatch.setattr('openhands.llm.debug_mixin.llm_response_logger', mock_logger)
24
- return mock_logger
25
-
26
-
27
- @pytest.fixture
28
- def default_config():
29
- return LLMConfig(
30
- model='gpt-4o',
31
- api_key='test_key',
32
- num_retries=2,
33
- retry_min_wait=1,
34
- retry_max_wait=2,
35
- )
36
-
37
-
38
- def test_handle_nonexistent_issue_reference():
39
- llm_config = LLMConfig(model='test', api_key='test')
40
- handler = ServiceContextPR(
41
- GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
42
- )
43
-
44
- # Mock the requests.get to simulate a 404 error
45
- mock_response = MagicMock()
46
- mock_response.raise_for_status.side_effect = httpx.HTTPError(
47
- '404 Client Error: Not Found'
48
- )
49
-
50
- with patch('httpx.get', return_value=mock_response):
51
- # Call the method with a non-existent issue reference
52
- result = handler._strategy.get_context_from_external_issues_references(
53
- closing_issues=[],
54
- closing_issue_numbers=[],
55
- issue_body='This references #999999', # Non-existent issue
56
- review_comments=[],
57
- review_threads=[],
58
- thread_comments=None,
59
- )
60
-
61
- # The method should return an empty list since the referenced issue couldn't be fetched
62
- assert result == []
63
-
64
-
65
- def test_handle_rate_limit_error():
66
- llm_config = LLMConfig(model='test', api_key='test')
67
- handler = ServiceContextPR(
68
- GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
69
- )
70
-
71
- # Mock the requests.get to simulate a rate limit error
72
- mock_response = MagicMock()
73
- mock_response.raise_for_status.side_effect = httpx.HTTPError(
74
- '403 Client Error: Rate Limit Exceeded'
75
- )
76
-
77
- with patch('httpx.get', return_value=mock_response):
78
- # Call the method with an issue reference
79
- result = handler._strategy.get_context_from_external_issues_references(
80
- closing_issues=[],
81
- closing_issue_numbers=[],
82
- issue_body='This references #123',
83
- review_comments=[],
84
- review_threads=[],
85
- thread_comments=None,
86
- )
87
-
88
- # The method should return an empty list since the request was rate limited
89
- assert result == []
90
-
91
-
92
- def test_handle_network_error():
93
- llm_config = LLMConfig(model='test', api_key='test')
94
- handler = ServiceContextPR(
95
- GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
96
- )
97
-
98
- # Mock the requests.get to simulate a network error
99
- with patch('httpx.get', side_effect=httpx.NetworkError('Network Error')):
100
- # Call the method with an issue reference
101
- result = handler._strategy.get_context_from_external_issues_references(
102
- closing_issues=[],
103
- closing_issue_numbers=[],
104
- issue_body='This references #123',
105
- review_comments=[],
106
- review_threads=[],
107
- thread_comments=None,
108
- )
109
-
110
- # The method should return an empty list since the network request failed
111
- assert result == []
112
-
113
-
114
- def test_successful_issue_reference():
115
- llm_config = LLMConfig(model='test', api_key='test')
116
- handler = ServiceContextPR(
117
- GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
118
- )
119
-
120
- # Mock a successful response
121
- mock_response = MagicMock()
122
- mock_response.raise_for_status.return_value = None
123
- mock_response.json.return_value = {'body': 'This is the referenced issue body'}
124
-
125
- with patch('httpx.get', return_value=mock_response):
126
- # Call the method with an issue reference
127
- result = handler._strategy.get_context_from_external_issues_references(
128
- closing_issues=[],
129
- closing_issue_numbers=[],
130
- issue_body='This references #123',
131
- review_comments=[],
132
- review_threads=[],
133
- thread_comments=None,
134
- )
135
-
136
- # The method should return a list with the referenced issue body
137
- assert result == ['This is the referenced issue body']
138
-
139
-
140
- class MockLLMResponse:
141
- """Mock LLM Response class to mimic the actual LLM response structure."""
142
-
143
- class Choice:
144
- class Message:
145
- def __init__(self, content):
146
- self.content = content
147
-
148
- def __init__(self, content):
149
- self.message = self.Message(content)
150
-
151
- def __init__(self, content):
152
- self.choices = [self.Choice(content)]
153
-
154
-
155
- class DotDict(dict):
156
- """
157
- A dictionary that supports dot notation access.
158
- """
159
-
160
- def __init__(self, *args, **kwargs):
161
- super().__init__(*args, **kwargs)
162
- for key, value in self.items():
163
- if isinstance(value, dict):
164
- self[key] = DotDict(value)
165
- elif isinstance(value, list):
166
- self[key] = [
167
- DotDict(item) if isinstance(item, dict) else item for item in value
168
- ]
169
-
170
- def __getattr__(self, key):
171
- if key in self:
172
- return self[key]
173
- else:
174
- raise AttributeError(
175
- f"'{self.__class__.__name__}' object has no attribute '{key}'"
176
- )
177
-
178
- def __setattr__(self, key, value):
179
- self[key] = value
180
-
181
- def __delattr__(self, key):
182
- if key in self:
183
- del self[key]
184
- else:
185
- raise AttributeError(
186
- f"'{self.__class__.__name__}' object has no attribute '{key}'"
187
- )
188
-
189
-
190
- @patch('openhands.llm.llm.litellm_completion')
191
- def test_guess_success_rate_limit_wait_time(mock_litellm_completion, default_config):
192
- """Test that the retry mechanism in guess_success respects wait time between retries."""
193
-
194
- with patch('time.sleep') as mock_sleep:
195
- # Simulate a rate limit error followed by a successful response
196
- mock_litellm_completion.side_effect = [
197
- RateLimitError(
198
- 'Rate limit exceeded', llm_provider='test_provider', model='test_model'
199
- ),
200
- DotDict(
201
- {
202
- 'choices': [
203
- {
204
- 'message': {
205
- 'content': '--- success\ntrue\n--- explanation\nRetry successful'
206
- }
207
- }
208
- ]
209
- }
210
- ),
211
- ]
212
-
213
- llm = LLM(config=default_config)
214
- handler = ServiceContextIssue(
215
- GithubIssueHandler('test-owner', 'test-repo', 'test-token'), default_config
216
- )
217
- handler.llm = llm
218
-
219
- # Mock issue and history
220
- issue = Issue(
221
- owner='test-owner',
222
- repo='test-repo',
223
- number=1,
224
- title='Test Issue',
225
- body='This is a test issue.',
226
- thread_comments=['Please improve error handling'],
227
- )
228
- history = [MessageAction(content='Fixed error handling.')]
229
-
230
- # Call guess_success
231
- success, _, explanation = handler.guess_success(issue, history)
232
-
233
- # Assertions
234
- assert success is True
235
- assert explanation == 'Retry successful'
236
- assert mock_litellm_completion.call_count == 2 # Two attempts made
237
- mock_sleep.assert_called_once() # Sleep called once between retries
238
-
239
- # Validate wait time
240
- wait_time = mock_sleep.call_args[0][0]
241
- assert (
242
- default_config.retry_min_wait <= wait_time <= default_config.retry_max_wait
243
- ), (
244
- f'Expected wait time between {default_config.retry_min_wait} and {default_config.retry_max_wait} seconds, but got {wait_time}'
245
- )
246
-
247
-
248
- @patch('openhands.llm.llm.litellm_completion')
249
- def test_guess_success_exhausts_retries(mock_completion, default_config):
250
- """Test the retry mechanism in guess_success exhausts retries and raises an error."""
251
- # Simulate persistent rate limit errors by always raising RateLimitError
252
- mock_completion.side_effect = RateLimitError(
253
- 'Rate limit exceeded', llm_provider='test_provider', model='test_model'
254
- )
255
-
256
- # Initialize LLM and handler
257
- llm = LLM(config=default_config)
258
- handler = ServiceContextPR(
259
- GithubPRHandler('test-owner', 'test-repo', 'test-token'), default_config
260
- )
261
- handler.llm = llm
262
-
263
- # Mock issue and history
264
- issue = Issue(
265
- owner='test-owner',
266
- repo='test-repo',
267
- number=1,
268
- title='Test Issue',
269
- body='This is a test issue.',
270
- thread_comments=['Please improve error handling'],
271
- )
272
- history = [MessageAction(content='Fixed error handling.')]
273
-
274
- # Call guess_success and expect it to raise an error after retries
275
- with pytest.raises(RateLimitError):
276
- handler.guess_success(issue, history)
277
-
278
- # Assertions
279
- assert (
280
- mock_completion.call_count == default_config.num_retries
281
- ) # Initial call + retries
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/unit/resolver/github/test_pr_handler_guess_success.py DELETED
@@ -1,672 +0,0 @@
1
- import json
2
- from unittest.mock import MagicMock, patch
3
-
4
- import pytest
5
-
6
- from openhands.core.config import LLMConfig
7
- from openhands.events.action.message import MessageAction
8
- from openhands.llm.llm import LLM
9
- from openhands.resolver.interfaces.github import GithubPRHandler
10
- from openhands.resolver.interfaces.issue import Issue, ReviewThread
11
- from openhands.resolver.interfaces.issue_definitions import ServiceContextPR
12
-
13
-
14
- @pytest.fixture
15
- def pr_handler():
16
- llm_config = LLMConfig(model='test-model')
17
- handler = ServiceContextPR(
18
- GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
19
- )
20
- return handler
21
-
22
-
23
- @pytest.fixture
24
- def mock_llm_success_response():
25
- return MagicMock(
26
- choices=[
27
- MagicMock(
28
- message=MagicMock(
29
- content="""--- success
30
- true
31
-
32
- --- explanation
33
- The changes look good"""
34
- )
35
- )
36
- ]
37
- )
38
-
39
-
40
- def test_guess_success_review_threads_litellm_call():
41
- """Test that the completion() call for review threads contains the expected content."""
42
- # Create a PR handler instance
43
- llm_config = LLMConfig(model='test', api_key='test')
44
- handler = ServiceContextPR(
45
- GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
46
- )
47
-
48
- # Create a mock issue with review threads
49
- issue = Issue(
50
- owner='test-owner',
51
- repo='test-repo',
52
- number=1,
53
- title='Test PR',
54
- body='Test Body',
55
- thread_comments=None,
56
- closing_issues=['Issue 1 description', 'Issue 2 description'],
57
- review_comments=None,
58
- review_threads=[
59
- ReviewThread(
60
- comment='Please fix the formatting\n---\nlatest feedback:\nAdd docstrings',
61
- files=['/src/file1.py', '/src/file2.py'],
62
- ),
63
- ReviewThread(
64
- comment='Add more tests\n---\nlatest feedback:\nAdd test cases',
65
- files=['/tests/test_file.py'],
66
- ),
67
- ],
68
- thread_ids=['1', '2'],
69
- head_branch='test-branch',
70
- )
71
-
72
- # Create mock history with a detailed response
73
- history = [
74
- MessageAction(
75
- content="""I have made the following changes:
76
- 1. Fixed formatting in file1.py and file2.py
77
- 2. Added docstrings to all functions
78
- 3. Added test cases in test_file.py"""
79
- )
80
- ]
81
-
82
- # Create mock LLM config
83
- llm_config = LLMConfig(model='test-model', api_key='test-key')
84
-
85
- # Mock the LLM response
86
- mock_response = MagicMock()
87
- mock_response.choices = [
88
- MagicMock(
89
- message=MagicMock(
90
- content="""--- success
91
- true
92
-
93
- --- explanation
94
- The changes successfully address the feedback."""
95
- )
96
- )
97
- ]
98
-
99
- # Test the guess_success method
100
- with patch.object(LLM, 'completion') as mock_completion:
101
- mock_completion.return_value = mock_response
102
- success, success_list, explanation = handler.guess_success(issue, history)
103
-
104
- # Verify the completion() calls
105
- assert mock_completion.call_count == 2 # One call per review thread
106
-
107
- # Check first call
108
- first_call = mock_completion.call_args_list[0]
109
- first_prompt = first_call[1]['messages'][0]['content']
110
- assert (
111
- 'Issue descriptions:\n'
112
- + json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
113
- in first_prompt
114
- )
115
- assert (
116
- 'Feedback:\nPlease fix the formatting\n---\nlatest feedback:\nAdd docstrings'
117
- in first_prompt
118
- )
119
- assert (
120
- 'Files locations:\n'
121
- + json.dumps(['/src/file1.py', '/src/file2.py'], indent=4)
122
- in first_prompt
123
- )
124
- assert 'Last message from AI agent:\n' + history[0].content in first_prompt
125
-
126
- # Check second call
127
- second_call = mock_completion.call_args_list[1]
128
- second_prompt = second_call[1]['messages'][0]['content']
129
- assert (
130
- 'Issue descriptions:\n'
131
- + json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
132
- in second_prompt
133
- )
134
- assert (
135
- 'Feedback:\nAdd more tests\n---\nlatest feedback:\nAdd test cases'
136
- in second_prompt
137
- )
138
- assert (
139
- 'Files locations:\n' + json.dumps(['/tests/test_file.py'], indent=4)
140
- in second_prompt
141
- )
142
- assert 'Last message from AI agent:\n' + history[0].content in second_prompt
143
-
144
- assert len(json.loads(explanation)) == 2
145
-
146
-
147
- def test_guess_success_thread_comments_litellm_call():
148
- """Test that the completion() call for thread comments contains the expected content."""
149
- # Create a PR handler instance
150
- llm_config = LLMConfig(model='test', api_key='test')
151
- handler = ServiceContextPR(
152
- GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
153
- )
154
-
155
- # Create a mock issue with thread comments
156
- issue = Issue(
157
- owner='test-owner',
158
- repo='test-repo',
159
- number=1,
160
- title='Test PR',
161
- body='Test Body',
162
- thread_comments=[
163
- 'Please improve error handling',
164
- 'Add input validation',
165
- 'latest feedback:\nHandle edge cases',
166
- ],
167
- closing_issues=['Issue 1 description', 'Issue 2 description'],
168
- review_comments=None,
169
- thread_ids=None,
170
- head_branch='test-branch',
171
- )
172
-
173
- # Create mock history with a detailed response
174
- history = [
175
- MessageAction(
176
- content="""I have made the following changes:
177
- 1. Added try/catch blocks for error handling
178
- 2. Added input validation checks
179
- 3. Added handling for edge cases"""
180
- )
181
- ]
182
-
183
- # Create mock LLM config
184
- llm_config = LLMConfig(model='test-model', api_key='test-key')
185
-
186
- # Mock the LLM response
187
- mock_response = MagicMock()
188
- mock_response.choices = [
189
- MagicMock(
190
- message=MagicMock(
191
- content="""--- success
192
- true
193
-
194
- --- explanation
195
- The changes successfully address the feedback."""
196
- )
197
- )
198
- ]
199
-
200
- # Test the guess_success method
201
- with patch.object(LLM, 'completion') as mock_completion:
202
- mock_completion.return_value = mock_response
203
- success, success_list, explanation = handler.guess_success(issue, history)
204
-
205
- # Verify the completion() call
206
- mock_completion.assert_called_once()
207
- call_args = mock_completion.call_args
208
- prompt = call_args[1]['messages'][0]['content']
209
-
210
- # Check prompt content
211
- assert (
212
- 'Issue descriptions:\n'
213
- + json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
214
- in prompt
215
- )
216
- assert 'PR Thread Comments:\n' + '\n---\n'.join(issue.thread_comments) in prompt
217
- assert 'Last message from AI agent:\n' + history[0].content in prompt
218
-
219
- assert len(json.loads(explanation)) == 1
220
-
221
-
222
- def test_check_feedback_with_llm():
223
- """Test the _check_feedback_with_llm helper function."""
224
- # Create a PR handler instance
225
- llm_config = LLMConfig(model='test', api_key='test')
226
- handler = ServiceContextPR(
227
- GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
228
- )
229
-
230
- # Test cases for different LLM responses
231
- test_cases = [
232
- {
233
- 'response': '--- success\ntrue\n--- explanation\nChanges look good',
234
- 'expected': (True, 'Changes look good'),
235
- },
236
- {
237
- 'response': '--- success\nfalse\n--- explanation\nNot all issues fixed',
238
- 'expected': (False, 'Not all issues fixed'),
239
- },
240
- {
241
- 'response': 'Invalid response format',
242
- 'expected': (
243
- False,
244
- 'Failed to decode answer from LLM response: Invalid response format',
245
- ),
246
- },
247
- {
248
- 'response': '--- success\ntrue\n--- explanation\nMultiline\nexplanation\nhere',
249
- 'expected': (True, 'Multiline\nexplanation\nhere'),
250
- },
251
- ]
252
-
253
- for case in test_cases:
254
- # Mock the LLM response
255
- mock_response = MagicMock()
256
- mock_response.choices = [MagicMock(message=MagicMock(content=case['response']))]
257
-
258
- # Test the function
259
- with patch.object(LLM, 'completion', return_value=mock_response):
260
- success, explanation = handler._check_feedback_with_llm('test prompt')
261
- assert (success, explanation) == case['expected']
262
-
263
-
264
- def test_check_review_thread_with_git_patch():
265
- """Test that git patch from complete_runtime is included in the prompt."""
266
- # Create a PR handler instance
267
- llm_config = LLMConfig(model='test', api_key='test')
268
- handler = ServiceContextPR(
269
- GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
270
- )
271
-
272
- # Create test data
273
- review_thread = ReviewThread(
274
- comment='Please fix the formatting\n---\nlatest feedback:\nAdd docstrings',
275
- files=['/src/file1.py', '/src/file2.py'],
276
- )
277
- issues_context = json.dumps(
278
- ['Issue 1 description', 'Issue 2 description'], indent=4
279
- )
280
- last_message = 'I have fixed the formatting and added docstrings'
281
- git_patch = 'diff --git a/src/file1.py b/src/file1.py\n+"""Added docstring."""\n'
282
-
283
- # Mock the LLM response
284
- mock_response = MagicMock()
285
- mock_response.choices = [
286
- MagicMock(
287
- message=MagicMock(
288
- content="""--- success
289
- true
290
-
291
- --- explanation
292
- Changes look good"""
293
- )
294
- )
295
- ]
296
-
297
- # Test the function
298
- with patch.object(LLM, 'completion') as mock_completion:
299
- mock_completion.return_value = mock_response
300
- success, explanation = handler._check_review_thread(
301
- review_thread, issues_context, last_message, git_patch
302
- )
303
-
304
- # Verify the completion() call
305
- mock_completion.assert_called_once()
306
- call_args = mock_completion.call_args
307
- prompt = call_args[1]['messages'][0]['content']
308
-
309
- # Check prompt content
310
- assert 'Issue descriptions:\n' + issues_context in prompt
311
- assert 'Feedback:\n' + review_thread.comment in prompt
312
- assert (
313
- 'Files locations:\n' + json.dumps(review_thread.files, indent=4) in prompt
314
- )
315
- assert 'Last message from AI agent:\n' + last_message in prompt
316
- assert 'Changes made (git patch):\n' + git_patch in prompt
317
-
318
- # Check result
319
- assert success is True
320
- assert explanation == 'Changes look good'
321
-
322
-
323
- def test_check_review_thread():
324
- """Test the _check_review_thread helper function."""
325
- # Create a PR handler instance
326
- llm_config = LLMConfig(model='test', api_key='test')
327
- handler = ServiceContextPR(
328
- GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
329
- )
330
-
331
- # Create test data
332
- review_thread = ReviewThread(
333
- comment='Please fix the formatting\n---\nlatest feedback:\nAdd docstrings',
334
- files=['/src/file1.py', '/src/file2.py'],
335
- )
336
- issues_context = json.dumps(
337
- ['Issue 1 description', 'Issue 2 description'], indent=4
338
- )
339
- last_message = 'I have fixed the formatting and added docstrings'
340
-
341
- # Mock the LLM response
342
- mock_response = MagicMock()
343
- mock_response.choices = [
344
- MagicMock(
345
- message=MagicMock(
346
- content="""--- success
347
- true
348
-
349
- --- explanation
350
- Changes look good"""
351
- )
352
- )
353
- ]
354
-
355
- # Test the function
356
- with patch.object(LLM, 'completion') as mock_completion:
357
- mock_completion.return_value = mock_response
358
- success, explanation = handler._check_review_thread(
359
- review_thread, issues_context, last_message
360
- )
361
-
362
- # Verify the completion() call
363
- mock_completion.assert_called_once()
364
- call_args = mock_completion.call_args
365
- prompt = call_args[1]['messages'][0]['content']
366
-
367
- # Check prompt content
368
- assert 'Issue descriptions:\n' + issues_context in prompt
369
- assert 'Feedback:\n' + review_thread.comment in prompt
370
- assert (
371
- 'Files locations:\n' + json.dumps(review_thread.files, indent=4) in prompt
372
- )
373
- assert 'Last message from AI agent:\n' + last_message in prompt
374
-
375
- # Check result
376
- assert success is True
377
- assert explanation == 'Changes look good'
378
-
379
-
380
- def test_check_thread_comments_with_git_patch():
381
- """Test that git patch from complete_runtime is included in the prompt."""
382
- # Create a PR handler instance
383
- llm_config = LLMConfig(model='test', api_key='test')
384
- handler = ServiceContextPR(
385
- GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
386
- )
387
-
388
- # Create test data
389
- thread_comments = [
390
- 'Please improve error handling',
391
- 'Add input validation',
392
- 'latest feedback:\nHandle edge cases',
393
- ]
394
- issues_context = json.dumps(
395
- ['Issue 1 description', 'Issue 2 description'], indent=4
396
- )
397
- last_message = 'I have added error handling and input validation'
398
- git_patch = 'diff --git a/src/file1.py b/src/file1.py\n+try:\n+ validate_input()\n+except ValueError:\n+ handle_error()\n'
399
-
400
- # Mock the LLM response
401
- mock_response = MagicMock()
402
- mock_response.choices = [
403
- MagicMock(
404
- message=MagicMock(
405
- content="""--- success
406
- true
407
-
408
- --- explanation
409
- Changes look good"""
410
- )
411
- )
412
- ]
413
-
414
- # Test the function
415
- with patch.object(LLM, 'completion') as mock_completion:
416
- mock_completion.return_value = mock_response
417
- success, explanation = handler._check_thread_comments(
418
- thread_comments, issues_context, last_message, git_patch
419
- )
420
-
421
- # Verify the completion() call
422
- mock_completion.assert_called_once()
423
- call_args = mock_completion.call_args
424
- prompt = call_args[1]['messages'][0]['content']
425
-
426
- # Check prompt content
427
- assert 'Issue descriptions:\n' + issues_context in prompt
428
- assert 'PR Thread Comments:\n' + '\n---\n'.join(thread_comments) in prompt
429
- assert 'Last message from AI agent:\n' + last_message in prompt
430
- assert 'Changes made (git patch):\n' + git_patch in prompt
431
-
432
- # Check result
433
- assert success is True
434
- assert explanation == 'Changes look good'
435
-
436
-
437
- def test_check_thread_comments():
438
- """Test the _check_thread_comments helper function."""
439
- # Create a PR handler instance
440
- llm_config = LLMConfig(model='test', api_key='test')
441
- handler = ServiceContextPR(
442
- GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
443
- )
444
-
445
- # Create test data
446
- thread_comments = [
447
- 'Please improve error handling',
448
- 'Add input validation',
449
- 'latest feedback:\nHandle edge cases',
450
- ]
451
- issues_context = json.dumps(
452
- ['Issue 1 description', 'Issue 2 description'], indent=4
453
- )
454
- last_message = 'I have added error handling and input validation'
455
-
456
- # Mock the LLM response
457
- mock_response = MagicMock()
458
- mock_response.choices = [
459
- MagicMock(
460
- message=MagicMock(
461
- content="""--- success
462
- true
463
-
464
- --- explanation
465
- Changes look good"""
466
- )
467
- )
468
- ]
469
-
470
- # Test the function
471
- with patch.object(LLM, 'completion') as mock_completion:
472
- mock_completion.return_value = mock_response
473
- success, explanation = handler._check_thread_comments(
474
- thread_comments, issues_context, last_message
475
- )
476
-
477
- # Verify the completion() call
478
- mock_completion.assert_called_once()
479
- call_args = mock_completion.call_args
480
- prompt = call_args[1]['messages'][0]['content']
481
-
482
- # Check prompt content
483
- assert 'Issue descriptions:\n' + issues_context in prompt
484
- assert 'PR Thread Comments:\n' + '\n---\n'.join(thread_comments) in prompt
485
- assert 'Last message from AI agent:\n' + last_message in prompt
486
-
487
- # Check result
488
- assert success is True
489
- assert explanation == 'Changes look good'
490
-
491
-
492
- def test_check_review_comments_with_git_patch():
493
- """Test that git patch from complete_runtime is included in the prompt."""
494
- # Create a PR handler instance
495
- llm_config = LLMConfig(model='test', api_key='test')
496
- handler = ServiceContextPR(
497
- GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
498
- )
499
-
500
- # Create test data
501
- review_comments = [
502
- 'Please fix the code style',
503
- 'Add more test cases',
504
- 'latest feedback:\nImprove documentation',
505
- ]
506
- issues_context = json.dumps(
507
- ['Issue 1 description', 'Issue 2 description'], indent=4
508
- )
509
- last_message = 'I have fixed the code style and added tests'
510
- git_patch = 'diff --git a/src/file1.py b/src/file1.py\n+"""This module does X."""\n+def func():\n+ """Do Y."""\n'
511
-
512
- # Mock the LLM response
513
- mock_response = MagicMock()
514
- mock_response.choices = [
515
- MagicMock(
516
- message=MagicMock(
517
- content="""--- success
518
- true
519
-
520
- --- explanation
521
- Changes look good"""
522
- )
523
- )
524
- ]
525
-
526
- # Test the function
527
- with patch.object(LLM, 'completion') as mock_completion:
528
- mock_completion.return_value = mock_response
529
- success, explanation = handler._check_review_comments(
530
- review_comments, issues_context, last_message, git_patch
531
- )
532
-
533
- # Verify the completion() call
534
- mock_completion.assert_called_once()
535
- call_args = mock_completion.call_args
536
- prompt = call_args[1]['messages'][0]['content']
537
-
538
- # Check prompt content
539
- assert 'Issue descriptions:\n' + issues_context in prompt
540
- assert 'PR Review Comments:\n' + '\n---\n'.join(review_comments) in prompt
541
- assert 'Last message from AI agent:\n' + last_message in prompt
542
- assert 'Changes made (git patch):\n' + git_patch in prompt
543
-
544
- # Check result
545
- assert success is True
546
- assert explanation == 'Changes look good'
547
-
548
-
549
- def test_check_review_comments():
550
- """Test the _check_review_comments helper function."""
551
- # Create a PR handler instance
552
- llm_config = LLMConfig(model='test', api_key='test')
553
- handler = ServiceContextPR(
554
- GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
555
- )
556
-
557
- # Create test data
558
- review_comments = [
559
- 'Please improve code readability',
560
- 'Add comments to complex functions',
561
- 'Follow PEP 8 style guide',
562
- ]
563
- issues_context = json.dumps(
564
- ['Issue 1 description', 'Issue 2 description'], indent=4
565
- )
566
- last_message = 'I have improved code readability and added comments'
567
-
568
- # Mock the LLM response
569
- mock_response = MagicMock()
570
- mock_response.choices = [
571
- MagicMock(
572
- message=MagicMock(
573
- content="""--- success
574
- true
575
-
576
- --- explanation
577
- Changes look good"""
578
- )
579
- )
580
- ]
581
-
582
- # Test the function
583
- with patch.object(LLM, 'completion') as mock_completion:
584
- mock_completion.return_value = mock_response
585
- success, explanation = handler._check_review_comments(
586
- review_comments, issues_context, last_message
587
- )
588
-
589
- # Verify the completion() call
590
- mock_completion.assert_called_once()
591
- call_args = mock_completion.call_args
592
- prompt = call_args[1]['messages'][0]['content']
593
-
594
- # Check prompt content
595
- assert 'Issue descriptions:\n' + issues_context in prompt
596
- assert 'PR Review Comments:\n' + '\n---\n'.join(review_comments) in prompt
597
- assert 'Last message from AI agent:\n' + last_message in prompt
598
-
599
- # Check result
600
- assert success is True
601
- assert explanation == 'Changes look good'
602
-
603
-
604
- def test_guess_success_review_comments_litellm_call():
605
- """Test that the completion() call for review comments contains the expected content."""
606
- # Create a PR handler instance
607
- llm_config = LLMConfig(model='test', api_key='test')
608
- handler = ServiceContextPR(
609
- GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
610
- )
611
-
612
- # Create a mock issue with review comments
613
- issue = Issue(
614
- owner='test-owner',
615
- repo='test-repo',
616
- number=1,
617
- title='Test PR',
618
- body='Test Body',
619
- thread_comments=None,
620
- closing_issues=['Issue 1 description', 'Issue 2 description'],
621
- review_comments=[
622
- 'Please improve code readability',
623
- 'Add comments to complex functions',
624
- 'Follow PEP 8 style guide',
625
- ],
626
- thread_ids=None,
627
- head_branch='test-branch',
628
- )
629
-
630
- # Create mock history with a detailed response
631
- history = [
632
- MessageAction(
633
- content="""I have made the following changes:
634
- 1. Improved code readability by breaking down complex functions
635
- 2. Added detailed comments to all complex functions
636
- 3. Fixed code style to follow PEP 8"""
637
- )
638
- ]
639
-
640
- # Mock the LLM response
641
- mock_response = MagicMock()
642
- mock_response.choices = [
643
- MagicMock(
644
- message=MagicMock(
645
- content="""--- success
646
- true
647
-
648
- --- explanation
649
- The changes successfully address the feedback."""
650
- )
651
- )
652
- ]
653
-
654
- with patch.object(LLM, 'completion') as mock_completion:
655
- mock_completion.return_value = mock_response
656
- success, success_list, explanation = handler.guess_success(issue, history)
657
-
658
- # Verify the completion() call
659
- mock_completion.assert_called_once()
660
- call_args = mock_completion.call_args
661
- prompt = call_args[1]['messages'][0]['content']
662
-
663
- # Check prompt content
664
- assert (
665
- 'Issue descriptions:\n'
666
- + json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
667
- in prompt
668
- )
669
- assert 'PR Review Comments:\n' + '\n---\n'.join(issue.review_comments) in prompt
670
- assert 'Last message from AI agent:\n' + history[0].content in prompt
671
-
672
- assert len(json.loads(explanation)) == 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/unit/resolver/github/test_pr_title_escaping.py DELETED
@@ -1,166 +0,0 @@
1
- import os
2
- import subprocess
3
- import tempfile
4
-
5
- from openhands.integrations.service_types import ProviderType
6
- from openhands.resolver.interfaces.issue import Issue
7
- from openhands.resolver.send_pull_request import make_commit
8
-
9
-
10
- def test_commit_message_with_quotes():
11
- # Create a temporary directory and initialize git repo
12
- with tempfile.TemporaryDirectory() as temp_dir:
13
- subprocess.run(['git', 'init', temp_dir], check=True)
14
-
15
- # Create a test file and add it to git
16
- test_file = os.path.join(temp_dir, 'test.txt')
17
- with open(test_file, 'w') as f:
18
- f.write('test content')
19
-
20
- subprocess.run(['git', '-C', temp_dir, 'add', 'test.txt'], check=True)
21
-
22
- # Create a test issue with problematic title
23
- issue = Issue(
24
- owner='test-owner',
25
- repo='test-repo',
26
- number=123,
27
- title="Issue with 'quotes' and \"double quotes\" and <class 'ValueError'>",
28
- body='Test body',
29
- labels=[],
30
- assignees=[],
31
- state='open',
32
- created_at='2024-01-01T00:00:00Z',
33
- updated_at='2024-01-01T00:00:00Z',
34
- closed_at=None,
35
- head_branch=None,
36
- thread_ids=None,
37
- )
38
-
39
- # Make the commit
40
- make_commit(temp_dir, issue, 'issue')
41
-
42
- # Get the commit message
43
- result = subprocess.run(
44
- ['git', '-C', temp_dir, 'log', '-1', '--pretty=%B'],
45
- capture_output=True,
46
- text=True,
47
- check=True,
48
- )
49
- commit_msg = result.stdout.strip()
50
-
51
- # The commit message should contain the quotes without excessive escaping
52
- expected = "Fix issue #123: Issue with 'quotes' and \"double quotes\" and <class 'ValueError'>"
53
- assert commit_msg == expected, f'Expected: {expected}\nGot: {commit_msg}'
54
-
55
-
56
- def test_pr_title_with_quotes(monkeypatch):
57
- # Mock requests.post to avoid actual API calls
58
- class MockResponse:
59
- def __init__(self, status_code=201):
60
- self.status_code = status_code
61
- self.text = ''
62
-
63
- def json(self):
64
- return {'html_url': 'https://github.com/test/test/pull/1'}
65
-
66
- def raise_for_status(self):
67
- pass
68
-
69
- def mock_post(*args, **kwargs):
70
- # Verify that the PR title is not over-escaped
71
- data = kwargs.get('json', {})
72
- title = data.get('title', '')
73
- expected = "Fix issue #123: Issue with 'quotes' and \"double quotes\" and <class 'ValueError'>"
74
- assert title == expected, (
75
- f'PR title was incorrectly escaped.\nExpected: {expected}\nGot: {title}'
76
- )
77
- return MockResponse()
78
-
79
- class MockGetResponse:
80
- def __init__(self, status_code=200):
81
- self.status_code = status_code
82
- self.text = ''
83
-
84
- def json(self):
85
- return {'default_branch': 'main'}
86
-
87
- def raise_for_status(self):
88
- pass
89
-
90
- monkeypatch.setattr('httpx.post', mock_post)
91
- monkeypatch.setattr('httpx.get', lambda *args, **kwargs: MockGetResponse())
92
- monkeypatch.setattr(
93
- 'openhands.resolver.interfaces.github.GithubIssueHandler.branch_exists',
94
- lambda *args, **kwargs: False,
95
- )
96
-
97
- # Mock subprocess.run to avoid actual git commands
98
- original_run = subprocess.run
99
-
100
- def mock_run(*args, **kwargs):
101
- print(f'Running command: {args[0] if args else kwargs.get("args", [])}')
102
- if isinstance(args[0], list) and args[0][0] == 'git':
103
- if 'push' in args[0]:
104
- return subprocess.CompletedProcess(
105
- args[0], returncode=0, stdout='', stderr=''
106
- )
107
- return original_run(*args, **kwargs)
108
- return original_run(*args, **kwargs)
109
-
110
- monkeypatch.setattr('subprocess.run', mock_run)
111
-
112
- # Create a temporary directory and initialize git repo
113
- with tempfile.TemporaryDirectory() as temp_dir:
114
- print('Initializing git repo...')
115
- subprocess.run(['git', 'init', temp_dir], check=True)
116
-
117
- # Add these lines to configure git
118
- subprocess.run(
119
- ['git', '-C', temp_dir, 'config', 'user.name', 'Test User'], check=True
120
- )
121
- subprocess.run(
122
- ['git', '-C', temp_dir, 'config', 'user.email', '[email protected]'],
123
- check=True,
124
- )
125
-
126
- # Create a test file and add it to git
127
- test_file = os.path.join(temp_dir, 'test.txt')
128
- with open(test_file, 'w') as f:
129
- f.write('test content')
130
-
131
- print('Adding and committing test file...')
132
- subprocess.run(['git', '-C', temp_dir, 'add', 'test.txt'], check=True)
133
- subprocess.run(
134
- ['git', '-C', temp_dir, 'commit', '-m', 'Initial commit'], check=True
135
- )
136
-
137
- # Create a test issue with problematic title
138
- print('Creating test issue...')
139
- issue = Issue(
140
- owner='test-owner',
141
- repo='test-repo',
142
- number=123,
143
- title="Issue with 'quotes' and \"double quotes\" and <class 'ValueError'>",
144
- body='Test body',
145
- labels=[],
146
- assignees=[],
147
- state='open',
148
- created_at='2024-01-01T00:00:00Z',
149
- updated_at='2024-01-01T00:00:00Z',
150
- closed_at=None,
151
- head_branch=None,
152
- thread_ids=None,
153
- )
154
-
155
- # Try to send a PR - this will fail if the title is incorrectly escaped
156
- print('Sending PR...')
157
- from openhands.resolver.send_pull_request import send_pull_request
158
-
159
- send_pull_request(
160
- issue=issue,
161
- token='dummy-token',
162
- username='test-user',
163
- platform=ProviderType.GITHUB,
164
- patch_dir=temp_dir,
165
- pr_type='ready',
166
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/unit/resolver/github/test_resolve_issues.py DELETED
@@ -1,1035 +0,0 @@
1
- import os
2
- import tempfile
3
- from unittest.mock import AsyncMock, MagicMock, patch
4
-
5
- import pytest
6
-
7
- from openhands.core.config import LLMConfig
8
- from openhands.events.action import CmdRunAction, MessageAction
9
- from openhands.events.observation import (
10
- CmdOutputMetadata,
11
- CmdOutputObservation,
12
- NullObservation,
13
- )
14
- from openhands.integrations.service_types import ProviderType
15
- from openhands.llm.llm import LLM
16
- from openhands.resolver.interfaces.github import GithubIssueHandler, GithubPRHandler
17
- from openhands.resolver.interfaces.issue import Issue, ReviewThread
18
- from openhands.resolver.interfaces.issue_definitions import (
19
- ServiceContextIssue,
20
- ServiceContextPR,
21
- )
22
- from openhands.resolver.issue_resolver import IssueResolver
23
- from openhands.resolver.resolver_output import ResolverOutput
24
-
25
-
26
- @pytest.fixture
27
- def default_mock_args():
28
- """Fixture that provides a default mock args object with common values.
29
-
30
- Tests can override specific attributes as needed.
31
- """
32
- mock_args = MagicMock()
33
- mock_args.selected_repo = 'test-owner/test-repo'
34
- mock_args.token = 'test-token'
35
- mock_args.username = 'test-user'
36
- mock_args.max_iterations = 5
37
- mock_args.output_dir = '/tmp'
38
- mock_args.llm_model = 'test'
39
- mock_args.llm_api_key = 'test'
40
- mock_args.llm_base_url = None
41
- mock_args.base_domain = None
42
- mock_args.runtime_container_image = None
43
- mock_args.base_container_image = None
44
- mock_args.is_experimental = False
45
- mock_args.issue_number = None
46
- mock_args.comment_id = None
47
- mock_args.repo_instruction_file = None
48
- mock_args.issue_type = 'issue'
49
- mock_args.prompt_file = None
50
- return mock_args
51
-
52
-
53
- @pytest.fixture
54
- def mock_github_token():
55
- """Fixture that patches the identify_token function to return GitHub provider type.
56
-
57
- This eliminates the need for repeated patching in each test function.
58
- """
59
- with patch(
60
- 'openhands.resolver.issue_resolver.identify_token',
61
- return_value=ProviderType.GITHUB,
62
- ) as patched:
63
- yield patched
64
-
65
-
66
- @pytest.fixture
67
- def mock_output_dir():
68
- with tempfile.TemporaryDirectory() as temp_dir:
69
- repo_path = os.path.join(temp_dir, 'repo')
70
- # Initialize a GitHub repo in "repo" and add a commit with "README.md"
71
- os.makedirs(repo_path)
72
- os.system(f'git init {repo_path}')
73
- readme_path = os.path.join(repo_path, 'README.md')
74
- with open(readme_path, 'w') as f:
75
- f.write('hello world')
76
- os.system(f'git -C {repo_path} add README.md')
77
- os.system(f"git -C {repo_path} commit -m 'Initial commit'")
78
- yield temp_dir
79
-
80
-
81
- @pytest.fixture
82
- def mock_subprocess():
83
- with patch('subprocess.check_output') as mock_check_output:
84
- yield mock_check_output
85
-
86
-
87
- @pytest.fixture
88
- def mock_os():
89
- with patch('os.system') as mock_system, patch('os.path.join') as mock_join:
90
- yield mock_system, mock_join
91
-
92
-
93
- @pytest.fixture
94
- def mock_user_instructions_template():
95
- return 'Issue: {{ body }}\n\nPlease fix this issue.'
96
-
97
-
98
- @pytest.fixture
99
- def mock_conversation_instructions_template():
100
- return 'Instructions: {{ repo_instruction }}'
101
-
102
-
103
-
104
- @pytest.fixture
105
- def mock_followup_prompt_template():
106
- return 'Issue context: {{ issues }}\n\nReview comments: {{ review_comments }}\n\nReview threads: {{ review_threads }}\n\nFiles: {{ files }}\n\nThread comments: {{ thread_context }}\n\nPlease fix this issue.'
107
-
108
-
109
- def create_cmd_output(exit_code: int, content: str, command: str):
110
- return CmdOutputObservation(
111
- content=content,
112
- command=command,
113
- metadata=CmdOutputMetadata(exit_code=exit_code),
114
- )
115
-
116
-
117
- def test_initialize_runtime(default_mock_args, mock_github_token):
118
- mock_runtime = MagicMock()
119
- mock_runtime.run_action.side_effect = [
120
- create_cmd_output(exit_code=0, content='', command='cd /workspace'),
121
- create_cmd_output(
122
- exit_code=0, content='', command='git config --global core.pager ""'
123
- ),
124
- ]
125
-
126
- # Create resolver with mocked token identification
127
- resolver = IssueResolver(default_mock_args)
128
-
129
- resolver.initialize_runtime(mock_runtime)
130
-
131
- assert mock_runtime.run_action.call_count == 2
132
- mock_runtime.run_action.assert_any_call(CmdRunAction(command='cd /workspace'))
133
- mock_runtime.run_action.assert_any_call(
134
- CmdRunAction(command='git config --global core.pager ""')
135
- )
136
-
137
-
138
- @pytest.mark.asyncio
139
- async def test_resolve_issue_no_issues_found(default_mock_args, mock_github_token):
140
- """Test the resolve_issue method when no issues are found."""
141
- # Mock dependencies
142
- mock_handler = MagicMock()
143
- mock_handler.get_converted_issues.return_value = [] # Return empty list
144
-
145
- # Customize the mock args for this test
146
- default_mock_args.issue_number = 5432
147
-
148
- # Create a resolver instance with mocked token identification
149
- resolver = IssueResolver(default_mock_args)
150
-
151
- # Mock the issue handler
152
- resolver.issue_handler = mock_handler
153
-
154
- # Test that the correct exception is raised
155
- with pytest.raises(ValueError) as exc_info:
156
- await resolver.resolve_issue()
157
-
158
- # Verify the error message
159
- assert 'No issues found for issue number 5432' in str(exc_info.value)
160
- assert 'test-owner/test-repo' in str(exc_info.value)
161
-
162
- mock_handler.get_converted_issues.assert_called_once_with(
163
- issue_numbers=[5432], comment_id=None
164
- )
165
-
166
-
167
- def test_download_issues_from_github():
168
- llm_config = LLMConfig(model='test', api_key='test')
169
- handler = ServiceContextIssue(
170
- GithubIssueHandler('owner', 'repo', 'token'), llm_config
171
- )
172
-
173
- mock_issues_response = MagicMock()
174
- mock_issues_response.json.side_effect = [
175
- [
176
- {'number': 1, 'title': 'Issue 1', 'body': 'This is an issue'},
177
- {
178
- 'number': 2,
179
- 'title': 'PR 1',
180
- 'body': 'This is a pull request',
181
- 'pull_request': {},
182
- },
183
- {'number': 3, 'title': 'Issue 2', 'body': 'This is another issue'},
184
- ],
185
- None,
186
- ]
187
- mock_issues_response.raise_for_status = MagicMock()
188
-
189
- mock_comments_response = MagicMock()
190
- mock_comments_response.json.return_value = []
191
- mock_comments_response.raise_for_status = MagicMock()
192
-
193
- def get_mock_response(url, *args, **kwargs):
194
- if '/comments' in url:
195
- return mock_comments_response
196
- return mock_issues_response
197
-
198
- with patch('httpx.get', side_effect=get_mock_response):
199
- issues = handler.get_converted_issues(issue_numbers=[1, 3])
200
-
201
- assert len(issues) == 2
202
- assert handler.issue_type == 'issue'
203
- assert all(isinstance(issue, Issue) for issue in issues)
204
- assert [issue.number for issue in issues] == [1, 3]
205
- assert [issue.title for issue in issues] == ['Issue 1', 'Issue 2']
206
- assert [issue.review_comments for issue in issues] == [None, None]
207
- assert [issue.closing_issues for issue in issues] == [None, None]
208
- assert [issue.thread_ids for issue in issues] == [None, None]
209
-
210
-
211
- def test_download_pr_from_github():
212
- llm_config = LLMConfig(model='test', api_key='test')
213
- handler = ServiceContextPR(GithubPRHandler('owner', 'repo', 'token'), llm_config)
214
- mock_pr_response = MagicMock()
215
- mock_pr_response.json.side_effect = [
216
- [
217
- {
218
- 'number': 1,
219
- 'title': 'PR 1',
220
- 'body': 'This is a pull request',
221
- 'head': {'ref': 'b1'},
222
- },
223
- {
224
- 'number': 2,
225
- 'title': 'My PR',
226
- 'body': 'This is another pull request',
227
- 'head': {'ref': 'b2'},
228
- },
229
- {'number': 3, 'title': 'PR 3', 'body': 'Final PR', 'head': {'ref': 'b3'}},
230
- ],
231
- None,
232
- ]
233
- mock_pr_response.raise_for_status = MagicMock()
234
-
235
- # Mock for PR comments response
236
- mock_comments_response = MagicMock()
237
- mock_comments_response.json.return_value = [] # No PR comments
238
- mock_comments_response.raise_for_status = MagicMock()
239
-
240
- # Mock for GraphQL request (for download_pr_metadata)
241
- mock_graphql_response = MagicMock()
242
- mock_graphql_response.json.side_effect = lambda: {
243
- 'data': {
244
- 'repository': {
245
- 'pullRequest': {
246
- 'closingIssuesReferences': {
247
- 'edges': [
248
- {'node': {'body': 'Issue 1 body', 'number': 1}},
249
- {'node': {'body': 'Issue 2 body', 'number': 2}},
250
- ]
251
- },
252
- 'reviewThreads': {
253
- 'edges': [
254
- {
255
- 'node': {
256
- 'isResolved': False,
257
- 'id': '1',
258
- 'comments': {
259
- 'nodes': [
260
- {
261
- 'body': 'Unresolved comment 1',
262
- 'path': '/frontend/header.tsx',
263
- },
264
- {'body': 'Follow up thread'},
265
- ]
266
- },
267
- }
268
- },
269
- {
270
- 'node': {
271
- 'isResolved': True,
272
- 'id': '2',
273
- 'comments': {
274
- 'nodes': [
275
- {
276
- 'body': 'Resolved comment 1',
277
- 'path': '/some/file.py',
278
- }
279
- ]
280
- },
281
- }
282
- },
283
- {
284
- 'node': {
285
- 'isResolved': False,
286
- 'id': '3',
287
- 'comments': {
288
- 'nodes': [
289
- {
290
- 'body': 'Unresolved comment 3',
291
- 'path': '/another/file.py',
292
- }
293
- ]
294
- },
295
- }
296
- },
297
- ]
298
- },
299
- }
300
- }
301
- }
302
- }
303
-
304
- mock_graphql_response.raise_for_status = MagicMock()
305
-
306
- def get_mock_response(url, *args, **kwargs):
307
- if '/comments' in url:
308
- return mock_comments_response
309
- return mock_pr_response
310
-
311
- with patch('httpx.get', side_effect=get_mock_response):
312
- with patch('httpx.post', return_value=mock_graphql_response):
313
- issues = handler.get_converted_issues(issue_numbers=[1, 2, 3])
314
-
315
- assert len(issues) == 3
316
- assert handler.issue_type == 'pr'
317
- assert all(isinstance(issue, Issue) for issue in issues)
318
- assert [issue.number for issue in issues] == [1, 2, 3]
319
- assert [issue.title for issue in issues] == ['PR 1', 'My PR', 'PR 3']
320
- assert [issue.head_branch for issue in issues] == ['b1', 'b2', 'b3']
321
-
322
- assert len(issues[0].review_threads) == 2 # Only unresolved threads
323
- assert (
324
- issues[0].review_threads[0].comment
325
- == 'Unresolved comment 1\n---\nlatest feedback:\nFollow up thread\n'
326
- )
327
- assert issues[0].review_threads[0].files == ['/frontend/header.tsx']
328
- assert (
329
- issues[0].review_threads[1].comment
330
- == 'latest feedback:\nUnresolved comment 3\n'
331
- )
332
- assert issues[0].review_threads[1].files == ['/another/file.py']
333
- assert issues[0].closing_issues == ['Issue 1 body', 'Issue 2 body']
334
- assert issues[0].thread_ids == ['1', '3']
335
-
336
-
337
- @pytest.mark.asyncio
338
- async def test_complete_runtime(default_mock_args, mock_github_token):
339
- """Test the complete_runtime method."""
340
- mock_runtime = MagicMock()
341
- mock_runtime.run_action.side_effect = [
342
- create_cmd_output(exit_code=0, content='', command='cd /workspace'),
343
- create_cmd_output(
344
- exit_code=0, content='', command='git config --global core.pager ""'
345
- ),
346
- create_cmd_output(
347
- exit_code=0,
348
- content='',
349
- command='git config --global --add safe.directory /workspace',
350
- ),
351
- create_cmd_output(
352
- exit_code=0, content='', command='git diff base_commit_hash fix'
353
- ),
354
- create_cmd_output(exit_code=0, content='git diff content', command='git apply'),
355
- ]
356
-
357
- # Create resolver with mocked token identification
358
- resolver = IssueResolver(default_mock_args)
359
-
360
- result = await resolver.complete_runtime(mock_runtime, 'base_commit_hash')
361
-
362
- assert result == {'git_patch': 'git diff content'}
363
- assert mock_runtime.run_action.call_count == 5
364
-
365
-
366
- @pytest.mark.asyncio
367
- @pytest.mark.parametrize(
368
- 'test_case',
369
- [
370
- {
371
- 'name': 'successful_run',
372
- 'run_controller_return': MagicMock(
373
- history=[NullObservation(content='')],
374
- metrics=MagicMock(
375
- get=MagicMock(return_value={'test_result': 'passed'})
376
- ),
377
- last_error=None,
378
- ),
379
- 'run_controller_raises': None,
380
- 'expected_success': True,
381
- 'expected_error': None,
382
- 'expected_explanation': 'Issue resolved successfully',
383
- 'is_pr': False,
384
- 'comment_success': None,
385
- },
386
- {
387
- 'name': 'value_error',
388
- 'run_controller_return': None,
389
- 'run_controller_raises': ValueError('Test value error'),
390
- 'expected_success': False,
391
- 'expected_error': 'Agent failed to run or crashed',
392
- 'expected_explanation': 'Agent failed to run',
393
- 'is_pr': False,
394
- 'comment_success': None,
395
- },
396
- {
397
- 'name': 'runtime_error',
398
- 'run_controller_return': None,
399
- 'run_controller_raises': RuntimeError('Test runtime error'),
400
- 'expected_success': False,
401
- 'expected_error': 'Agent failed to run or crashed',
402
- 'expected_explanation': 'Agent failed to run',
403
- 'is_pr': False,
404
- 'comment_success': None,
405
- },
406
- {
407
- 'name': 'json_decode_error',
408
- 'run_controller_return': MagicMock(
409
- history=[NullObservation(content='')],
410
- metrics=MagicMock(
411
- get=MagicMock(return_value={'test_result': 'passed'})
412
- ),
413
- last_error=None,
414
- ),
415
- 'run_controller_raises': None,
416
- 'expected_success': True,
417
- 'expected_error': None,
418
- 'expected_explanation': 'Non-JSON explanation',
419
- 'is_pr': True,
420
- 'comment_success': [
421
- True,
422
- False,
423
- ], # To trigger the PR success logging code path
424
- },
425
- ],
426
- )
427
- async def test_process_issue(
428
- default_mock_args,
429
- mock_github_token,
430
- mock_output_dir,
431
- mock_user_instructions_template,
432
- test_case,
433
- ):
434
- """Test the process_issue method with different scenarios."""
435
-
436
- # Set up test data
437
- issue = Issue(
438
- owner='test_owner',
439
- repo='test_repo',
440
- number=1,
441
- title='Test Issue',
442
- body='This is a test issue',
443
- )
444
- base_commit = 'abcdef1234567890'
445
-
446
- # Customize the mock args for this test
447
- default_mock_args.output_dir = mock_output_dir
448
- default_mock_args.issue_type = 'pr' if test_case.get('is_pr', False) else 'issue'
449
-
450
- # Create a resolver instance with mocked token identification
451
- resolver = IssueResolver(default_mock_args)
452
- resolver.user_instructions_prompt_template = mock_user_instructions_template
453
-
454
- # Mock the handler with LLM config
455
- llm_config = LLMConfig(model='test', api_key='test')
456
- handler_instance = MagicMock()
457
- handler_instance.guess_success.return_value = (
458
- test_case['expected_success'],
459
- test_case.get('comment_success', None),
460
- test_case['expected_explanation'],
461
- )
462
- handler_instance.get_instruction.return_value = (
463
- 'Test instruction',
464
- 'Test conversation instructions',
465
- [],
466
- )
467
- handler_instance.issue_type = 'pr' if test_case.get('is_pr', False) else 'issue'
468
- handler_instance.llm = LLM(llm_config)
469
-
470
- # Mock the runtime and its methods
471
- mock_runtime = MagicMock()
472
- mock_runtime.connect = AsyncMock()
473
- mock_runtime.run_action.return_value = CmdOutputObservation(
474
- content='test patch',
475
- command='git diff',
476
- metadata=CmdOutputMetadata(exit_code=0),
477
- )
478
- mock_runtime.event_stream.subscribe = MagicMock()
479
-
480
- # Mock the create_runtime function
481
- mock_create_runtime = MagicMock(return_value=mock_runtime)
482
-
483
- # Mock the run_controller function
484
- mock_run_controller = AsyncMock()
485
- if test_case['run_controller_raises']:
486
- mock_run_controller.side_effect = test_case['run_controller_raises']
487
- else:
488
- mock_run_controller.return_value = test_case['run_controller_return']
489
-
490
- # Patch the necessary functions and methods
491
- with (
492
- patch('openhands.resolver.issue_resolver.create_runtime', mock_create_runtime),
493
- patch('openhands.resolver.issue_resolver.run_controller', mock_run_controller),
494
- patch.object(
495
- resolver, 'complete_runtime', return_value={'git_patch': 'test patch'}
496
- ),
497
- patch.object(resolver, 'initialize_runtime') as mock_initialize_runtime,
498
- ):
499
- # Call the process_issue method
500
- result = await resolver.process_issue(issue, base_commit, handler_instance)
501
-
502
- # Assert the result matches our expectations
503
- assert isinstance(result, ResolverOutput)
504
- assert result.issue == issue
505
- assert result.base_commit == base_commit
506
- assert result.git_patch == 'test patch'
507
- assert result.success == test_case['expected_success']
508
- assert result.result_explanation == test_case['expected_explanation']
509
- assert result.error == test_case['expected_error']
510
-
511
- # Assert that the mocked functions were called
512
- mock_create_runtime.assert_called_once()
513
- mock_runtime.connect.assert_called_once()
514
- mock_initialize_runtime.assert_called_once()
515
- mock_run_controller.assert_called_once()
516
- resolver.complete_runtime.assert_awaited_once_with(mock_runtime, base_commit)
517
-
518
- # Assert run_controller was called with the right parameters
519
- if not test_case['run_controller_raises']:
520
- # Check that the first positional argument is a config
521
- assert 'config' in mock_run_controller.call_args[1]
522
- # Check that initial_user_action is a MessageAction with the right content
523
- assert isinstance(
524
- mock_run_controller.call_args[1]['initial_user_action'], MessageAction
525
- )
526
- assert mock_run_controller.call_args[1]['runtime'] == mock_runtime
527
-
528
- # Assert that guess_success was called only for successful runs
529
- if test_case['expected_success']:
530
- handler_instance.guess_success.assert_called_once()
531
- else:
532
- handler_instance.guess_success.assert_not_called()
533
-
534
-
535
- def test_get_instruction(mock_user_instructions_template, mock_conversation_instructions_template, mock_followup_prompt_template):
536
- issue = Issue(
537
- owner='test_owner',
538
- repo='test_repo',
539
- number=123,
540
- title='Test Issue',
541
- body='This is a test issue refer to image ![First Image](https://sampleimage.com/image1.png)',
542
- )
543
- mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
544
- issue_handler = ServiceContextIssue(
545
- GithubIssueHandler('owner', 'repo', 'token'), mock_llm_config
546
- )
547
- instruction, conversation_instructions, images_urls = issue_handler.get_instruction(
548
- issue, mock_user_instructions_template, mock_conversation_instructions_template, None
549
- )
550
- expected_instruction = 'Issue: Test Issue\n\nThis is a test issue refer to image ![First Image](https://sampleimage.com/image1.png)\n\nPlease fix this issue.'
551
-
552
- assert images_urls == ['https://sampleimage.com/image1.png']
553
- assert issue_handler.issue_type == 'issue'
554
- assert instruction == expected_instruction
555
- assert conversation_instructions is not None
556
-
557
- issue = Issue(
558
- owner='test_owner',
559
- repo='test_repo',
560
- number=123,
561
- title='Test Issue',
562
- body='This is a test issue',
563
- closing_issues=['Issue 1 fix the type'],
564
- review_threads=[
565
- ReviewThread(
566
- comment="There is still a typo 'pthon' instead of 'python'", files=[]
567
- )
568
- ],
569
- thread_comments=[
570
- "I've left review comments, please address them",
571
- 'This is a valid concern.',
572
- ],
573
- )
574
-
575
- pr_handler = ServiceContextPR(
576
- GithubPRHandler('owner', 'repo', 'token'), mock_llm_config
577
- )
578
- instruction, conversation_instructions, images_urls = pr_handler.get_instruction(
579
- issue, mock_followup_prompt_template, mock_conversation_instructions_template, None
580
- )
581
- expected_instruction = "Issue context: [\n \"Issue 1 fix the type\"\n]\n\nReview comments: None\n\nReview threads: [\n \"There is still a typo 'pthon' instead of 'python'\"\n]\n\nFiles: []\n\nThread comments: I've left review comments, please address them\n---\nThis is a valid concern.\n\nPlease fix this issue."
582
-
583
- assert images_urls == []
584
- assert pr_handler.issue_type == 'pr'
585
- # Compare content ignoring exact formatting
586
- assert "There is still a typo 'pthon' instead of 'python'" in instruction
587
- assert "I've left review comments, please address them" in instruction
588
- assert 'This is a valid concern' in instruction
589
- assert conversation_instructions is not None
590
-
591
-
592
- def test_file_instruction():
593
- issue = Issue(
594
- owner='test_owner',
595
- repo='test_repo',
596
- number=123,
597
- title='Test Issue',
598
- body='This is a test issue ![image](https://sampleimage.com/sample.png)',
599
- )
600
- # load prompt from openhands/resolver/prompts/resolve/basic.jinja
601
- with open('openhands/resolver/prompts/resolve/basic.jinja', 'r') as f:
602
- prompt = f.read()
603
-
604
- with open('openhands/resolver/prompts/resolve/basic-conversation-instructions.jinja', 'r') as f:
605
- conversation_instructions_template = f.read()
606
-
607
- # Test without thread comments
608
- mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
609
- issue_handler = ServiceContextIssue(
610
- GithubIssueHandler('owner', 'repo', 'token'), mock_llm_config
611
- )
612
- instruction, conversation_instructions, images_urls = issue_handler.get_instruction(
613
- issue, prompt,conversation_instructions_template, None
614
- )
615
- expected_instruction = """Please fix the following issue for the repository in /workspace.
616
- An environment has been set up for you to start working. You may assume all necessary tools are installed.
617
-
618
- # Problem Statement
619
- Test Issue
620
-
621
- This is a test issue ![image](https://sampleimage.com/sample.png)"""
622
-
623
-
624
- expected_conversation_instructions = """IMPORTANT: You should ONLY interact with the environment provided to you AND NEVER ASK FOR HUMAN HELP.
625
- You SHOULD INCLUDE PROPER INDENTATION in your edit commands.
626
-
627
- When you think you have fixed the issue through code changes, please finish the interaction."""
628
-
629
- assert instruction == expected_instruction
630
- assert conversation_instructions == expected_conversation_instructions
631
-
632
- assert images_urls == ['https://sampleimage.com/sample.png']
633
-
634
-
635
- def test_file_instruction_with_repo_instruction():
636
- issue = Issue(
637
- owner='test_owner',
638
- repo='test_repo',
639
- number=123,
640
- title='Test Issue',
641
- body='This is a test issue',
642
- )
643
- # load prompt from openhands/resolver/prompts/resolve/basic.jinja
644
- with open('openhands/resolver/prompts/resolve/basic.jinja', 'r') as f:
645
- prompt = f.read()
646
-
647
- with open('openhands/resolver/prompts/resolve/basic-conversation-instructions.jinja', 'r') as f:
648
- conversation_instructions_prompt = f.read()
649
-
650
- # load repo instruction from openhands/resolver/prompts/repo_instructions/all-hands-ai___openhands-resolver.txt
651
- with open(
652
- 'openhands/resolver/prompts/repo_instructions/all-hands-ai___openhands-resolver.txt',
653
- 'r',
654
- ) as f:
655
- repo_instruction = f.read()
656
-
657
- mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
658
- issue_handler = ServiceContextIssue(
659
- GithubIssueHandler('owner', 'repo', 'token'), mock_llm_config
660
- )
661
- instruction, conversation_instructions, image_urls = issue_handler.get_instruction(
662
- issue, prompt, conversation_instructions_prompt, repo_instruction
663
- )
664
-
665
-
666
- expected_instruction = """Please fix the following issue for the repository in /workspace.
667
- An environment has been set up for you to start working. You may assume all necessary tools are installed.
668
-
669
- # Problem Statement
670
- Test Issue
671
-
672
- This is a test issue"""
673
-
674
- expected_conversation_instructions = """IMPORTANT: You should ONLY interact with the environment provided to you AND NEVER ASK FOR HUMAN HELP.
675
- You SHOULD INCLUDE PROPER INDENTATION in your edit commands.
676
-
677
- Some basic information about this repository:
678
- This is a Python repo for openhands-resolver, a library that attempts to resolve github issues with the AI agent OpenHands.
679
-
680
- - Setup: `poetry install --with test --with dev`
681
- - Testing: `poetry run pytest tests/test_*.py`
682
-
683
-
684
- When you think you have fixed the issue through code changes, please finish the interaction."""
685
-
686
-
687
- assert instruction == expected_instruction
688
- assert conversation_instructions == expected_conversation_instructions
689
- assert conversation_instructions is not None
690
- assert issue_handler.issue_type == 'issue'
691
- assert image_urls == []
692
-
693
-
694
- def test_guess_success():
695
- mock_issue = Issue(
696
- owner='test_owner',
697
- repo='test_repo',
698
- number=1,
699
- title='Test Issue',
700
- body='This is a test issue',
701
- )
702
- mock_history = [create_cmd_output(exit_code=0, content='', command='cd /workspace')]
703
- mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
704
-
705
- mock_completion_response = MagicMock()
706
- mock_completion_response.choices = [
707
- MagicMock(
708
- message=MagicMock(
709
- content='--- success\ntrue\n--- explanation\nIssue resolved successfully'
710
- )
711
- )
712
- ]
713
- issue_handler = ServiceContextIssue(
714
- GithubIssueHandler('owner', 'repo', 'token'), mock_llm_config
715
- )
716
-
717
- with patch.object(
718
- LLM, 'completion', MagicMock(return_value=mock_completion_response)
719
- ):
720
- success, comment_success, explanation = issue_handler.guess_success(
721
- mock_issue, mock_history
722
- )
723
- assert issue_handler.issue_type == 'issue'
724
- assert comment_success is None
725
- assert success
726
- assert explanation == 'Issue resolved successfully'
727
-
728
-
729
- def test_guess_success_with_thread_comments():
730
- mock_issue = Issue(
731
- owner='test_owner',
732
- repo='test_repo',
733
- number=1,
734
- title='Test Issue',
735
- body='This is a test issue',
736
- thread_comments=[
737
- 'First comment',
738
- 'Second comment',
739
- 'latest feedback:\nPlease add tests',
740
- ],
741
- )
742
- mock_history = [MagicMock(message='I have added tests for this case')]
743
- mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
744
-
745
- mock_completion_response = MagicMock()
746
- mock_completion_response.choices = [
747
- MagicMock(
748
- message=MagicMock(
749
- content='--- success\ntrue\n--- explanation\nTests have been added to verify thread comments handling'
750
- )
751
- )
752
- ]
753
- issue_handler = ServiceContextIssue(
754
- GithubIssueHandler('owner', 'repo', 'token'), mock_llm_config
755
- )
756
-
757
- with patch.object(
758
- LLM, 'completion', MagicMock(return_value=mock_completion_response)
759
- ):
760
- success, comment_success, explanation = issue_handler.guess_success(
761
- mock_issue, mock_history
762
- )
763
- assert issue_handler.issue_type == 'issue'
764
- assert comment_success is None
765
- assert success
766
- assert 'Tests have been added' in explanation
767
-
768
-
769
- def test_instruction_with_thread_comments():
770
- # Create an issue with thread comments
771
- issue = Issue(
772
- owner='test_owner',
773
- repo='test_repo',
774
- number=123,
775
- title='Test Issue',
776
- body='This is a test issue',
777
- thread_comments=[
778
- 'First comment',
779
- 'Second comment',
780
- 'latest feedback:\nPlease add tests',
781
- ],
782
- )
783
-
784
- # Load the basic prompt template
785
- with open('openhands/resolver/prompts/resolve/basic.jinja', 'r') as f:
786
- prompt = f.read()
787
-
788
- with open('openhands/resolver/prompts/resolve/basic-conversation-instructions.jinja', 'r') as f:
789
- conversation_instructions_template = f.read()
790
-
791
- llm_config = LLMConfig(model='test', api_key='test')
792
- issue_handler = ServiceContextIssue(
793
- GithubIssueHandler('owner', 'repo', 'token'), llm_config
794
- )
795
- instruction, _, images_urls = issue_handler.get_instruction(
796
- issue, prompt, conversation_instructions_template, None
797
- )
798
-
799
- # Verify that thread comments are included in the instruction
800
- assert 'First comment' in instruction
801
- assert 'Second comment' in instruction
802
- assert 'Please add tests' in instruction
803
- assert 'Issue Thread Comments:' in instruction
804
- assert images_urls == []
805
-
806
-
807
- def test_guess_success_failure():
808
- mock_issue = Issue(
809
- owner='test_owner',
810
- repo='test_repo',
811
- number=1,
812
- title='Test Issue',
813
- body='This is a test issue',
814
- thread_comments=[
815
- 'First comment',
816
- 'Second comment',
817
- 'latest feedback:\nPlease add tests',
818
- ],
819
- )
820
- mock_history = [MagicMock(message='I have added tests for this case')]
821
- mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
822
-
823
- mock_completion_response = MagicMock()
824
- mock_completion_response.choices = [
825
- MagicMock(
826
- message=MagicMock(
827
- content='--- success\ntrue\n--- explanation\nTests have been added to verify thread comments handling'
828
- )
829
- )
830
- ]
831
- issue_handler = ServiceContextIssue(
832
- GithubIssueHandler('owner', 'repo', 'token'), mock_llm_config
833
- )
834
-
835
- with patch.object(
836
- LLM, 'completion', MagicMock(return_value=mock_completion_response)
837
- ):
838
- success, comment_success, explanation = issue_handler.guess_success(
839
- mock_issue, mock_history
840
- )
841
- assert issue_handler.issue_type == 'issue'
842
- assert comment_success is None
843
- assert success
844
- assert 'Tests have been added' in explanation
845
-
846
-
847
- def test_guess_success_negative_case():
848
- mock_issue = Issue(
849
- owner='test_owner',
850
- repo='test_repo',
851
- number=1,
852
- title='Test Issue',
853
- body='This is a test issue',
854
- )
855
- mock_history = [create_cmd_output(exit_code=0, content='', command='cd /workspace')]
856
- mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
857
-
858
- mock_completion_response = MagicMock()
859
- mock_completion_response.choices = [
860
- MagicMock(
861
- message=MagicMock(
862
- content='--- success\nfalse\n--- explanation\nIssue not resolved'
863
- )
864
- )
865
- ]
866
- issue_handler = ServiceContextIssue(
867
- GithubIssueHandler('owner', 'repo', 'token'), mock_llm_config
868
- )
869
-
870
- with patch.object(
871
- LLM, 'completion', MagicMock(return_value=mock_completion_response)
872
- ):
873
- success, comment_success, explanation = issue_handler.guess_success(
874
- mock_issue, mock_history
875
- )
876
- assert issue_handler.issue_type == 'issue'
877
- assert comment_success is None
878
- assert not success
879
- assert explanation == 'Issue not resolved'
880
-
881
-
882
- def test_guess_success_invalid_output():
883
- mock_issue = Issue(
884
- owner='test_owner',
885
- repo='test_repo',
886
- number=1,
887
- title='Test Issue',
888
- body='This is a test issue',
889
- )
890
- mock_history = [create_cmd_output(exit_code=0, content='', command='cd /workspace')]
891
- mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
892
-
893
- mock_completion_response = MagicMock()
894
- mock_completion_response.choices = [
895
- MagicMock(message=MagicMock(content='This is not a valid output'))
896
- ]
897
- issue_handler = ServiceContextIssue(
898
- GithubIssueHandler('owner', 'repo', 'token'), mock_llm_config
899
- )
900
-
901
- with patch.object(
902
- LLM, 'completion', MagicMock(return_value=mock_completion_response)
903
- ):
904
- success, comment_success, explanation = issue_handler.guess_success(
905
- mock_issue, mock_history
906
- )
907
- assert issue_handler.issue_type == 'issue'
908
- assert comment_success is None
909
- assert not success
910
- assert (
911
- explanation
912
- == 'Failed to decode answer from LLM response: This is not a valid output'
913
- )
914
-
915
-
916
- def test_download_pr_with_review_comments():
917
- llm_config = LLMConfig(model='test', api_key='test')
918
- handler = ServiceContextPR(GithubPRHandler('owner', 'repo', 'token'), llm_config)
919
- mock_pr_response = MagicMock()
920
- mock_pr_response.json.side_effect = [
921
- [
922
- {
923
- 'number': 1,
924
- 'title': 'PR 1',
925
- 'body': 'This is a pull request',
926
- 'head': {'ref': 'b1'},
927
- },
928
- ],
929
- None,
930
- ]
931
- mock_pr_response.raise_for_status = MagicMock()
932
-
933
- # Mock for PR comments response
934
- mock_comments_response = MagicMock()
935
- mock_comments_response.json.return_value = [] # No PR comments
936
- mock_comments_response.raise_for_status = MagicMock()
937
-
938
- # Mock for GraphQL request with review comments but no threads
939
- mock_graphql_response = MagicMock()
940
- mock_graphql_response.json.side_effect = lambda: {
941
- 'data': {
942
- 'repository': {
943
- 'pullRequest': {
944
- 'closingIssuesReferences': {'edges': []},
945
- 'reviews': {
946
- 'nodes': [
947
- {'body': 'Please fix this typo'},
948
- {'body': 'Add more tests'},
949
- ]
950
- },
951
- }
952
- }
953
- }
954
- }
955
-
956
- mock_graphql_response.raise_for_status = MagicMock()
957
-
958
- def get_mock_response(url, *args, **kwargs):
959
- if '/comments' in url:
960
- return mock_comments_response
961
- return mock_pr_response
962
-
963
- with patch('httpx.get', side_effect=get_mock_response):
964
- with patch('httpx.post', return_value=mock_graphql_response):
965
- issues = handler.get_converted_issues(issue_numbers=[1])
966
-
967
- assert len(issues) == 1
968
- assert handler.issue_type == 'pr'
969
- assert isinstance(issues[0], Issue)
970
- assert issues[0].number == 1
971
- assert issues[0].title == 'PR 1'
972
- assert issues[0].head_branch == 'b1'
973
-
974
- # Verify review comments are set but threads are empty
975
- assert len(issues[0].review_comments) == 2
976
- assert issues[0].review_comments[0] == 'Please fix this typo'
977
- assert issues[0].review_comments[1] == 'Add more tests'
978
- assert not issues[0].review_threads
979
- assert not issues[0].closing_issues
980
- assert not issues[0].thread_ids
981
-
982
-
983
- def test_download_issue_with_specific_comment():
984
- llm_config = LLMConfig(model='test', api_key='test')
985
- handler = ServiceContextIssue(
986
- GithubIssueHandler('owner', 'repo', 'token'), llm_config
987
- )
988
-
989
- # Define the specific comment_id to filter
990
- specific_comment_id = 101
991
-
992
- # Mock issue and comment responses
993
- mock_issue_response = MagicMock()
994
- mock_issue_response.json.side_effect = [
995
- [
996
- {'number': 1, 'title': 'Issue 1', 'body': 'This is an issue'},
997
- ],
998
- None,
999
- ]
1000
- mock_issue_response.raise_for_status = MagicMock()
1001
-
1002
- mock_comments_response = MagicMock()
1003
- mock_comments_response.json.return_value = [
1004
- {
1005
- 'id': specific_comment_id,
1006
- 'body': 'Specific comment body',
1007
- 'issue_url': 'https://api.github.com/repos/owner/repo/issues/1',
1008
- },
1009
- {
1010
- 'id': 102,
1011
- 'body': 'Another comment body',
1012
- 'issue_url': 'https://api.github.com/repos/owner/repo/issues/2',
1013
- },
1014
- ]
1015
- mock_comments_response.raise_for_status = MagicMock()
1016
-
1017
- def get_mock_response(url, *args, **kwargs):
1018
- if '/comments' in url:
1019
- return mock_comments_response
1020
-
1021
- return mock_issue_response
1022
-
1023
- with patch('httpx.get', side_effect=get_mock_response):
1024
- issues = handler.get_converted_issues(
1025
- issue_numbers=[1], comment_id=specific_comment_id
1026
- )
1027
-
1028
- assert len(issues) == 1
1029
- assert issues[0].number == 1
1030
- assert issues[0].title == 'Issue 1'
1031
- assert issues[0].thread_comments == ['Specific comment body']
1032
-
1033
-
1034
- if __name__ == '__main__':
1035
- pytest.main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/unit/resolver/github/test_send_pull_request.py DELETED
@@ -1,1304 +0,0 @@
1
- import os
2
- import tempfile
3
- from unittest.mock import ANY, MagicMock, call, patch
4
-
5
- import pytest
6
-
7
- from openhands.core.config import LLMConfig
8
- from openhands.integrations.service_types import ProviderType
9
- from openhands.resolver.interfaces.github import GithubIssueHandler
10
- from openhands.resolver.interfaces.issue import ReviewThread
11
- from openhands.resolver.resolver_output import Issue, ResolverOutput
12
- from openhands.resolver.send_pull_request import (
13
- apply_patch,
14
- initialize_repo,
15
- load_single_resolver_output,
16
- main,
17
- make_commit,
18
- process_single_issue,
19
- send_pull_request,
20
- update_existing_pull_request,
21
- )
22
-
23
-
24
- @pytest.fixture
25
- def mock_output_dir():
26
- with tempfile.TemporaryDirectory() as temp_dir:
27
- repo_path = os.path.join(temp_dir, 'repo')
28
- # Initialize a GitHub repo in "repo" and add a commit with "README.md"
29
- os.makedirs(repo_path)
30
- os.system(f'git init {repo_path}')
31
- readme_path = os.path.join(repo_path, 'README.md')
32
- with open(readme_path, 'w') as f:
33
- f.write('hello world')
34
- os.system(f'git -C {repo_path} add README.md')
35
- os.system(f"git -C {repo_path} commit -m 'Initial commit'")
36
- yield temp_dir
37
-
38
-
39
- @pytest.fixture
40
- def mock_issue():
41
- return Issue(
42
- number=42,
43
- title='Test Issue',
44
- owner='test-owner',
45
- repo='test-repo',
46
- body='Test body',
47
- )
48
-
49
-
50
- @pytest.fixture
51
- def mock_llm_config():
52
- return LLMConfig()
53
-
54
-
55
- def test_load_single_resolver_output():
56
- mock_output_jsonl = 'tests/unit/resolver/mock_output/output.jsonl'
57
-
58
- # Test loading an existing issue
59
- resolver_output = load_single_resolver_output(mock_output_jsonl, 5)
60
- assert isinstance(resolver_output, ResolverOutput)
61
- assert resolver_output.issue.number == 5
62
- assert resolver_output.issue.title == 'Add MIT license'
63
- assert resolver_output.issue.owner == 'neubig'
64
- assert resolver_output.issue.repo == 'pr-viewer'
65
-
66
- # Test loading a non-existent issue
67
- with pytest.raises(ValueError):
68
- load_single_resolver_output(mock_output_jsonl, 999)
69
-
70
-
71
- def test_apply_patch(mock_output_dir):
72
- # Create a sample file in the mock repo
73
- sample_file = os.path.join(mock_output_dir, 'sample.txt')
74
- with open(sample_file, 'w') as f:
75
- f.write('Original content')
76
-
77
- # Create a sample patch
78
- patch_content = """
79
- diff --git a/sample.txt b/sample.txt
80
- index 9daeafb..b02def2 100644
81
- --- a/sample.txt
82
- +++ b/sample.txt
83
- @@ -1 +1,2 @@
84
- -Original content
85
- +Updated content
86
- +New line
87
- """
88
-
89
- # Apply the patch
90
- apply_patch(mock_output_dir, patch_content)
91
-
92
- # Check if the file was updated correctly
93
- with open(sample_file, 'r') as f:
94
- updated_content = f.read()
95
-
96
- assert updated_content.strip() == 'Updated content\nNew line'.strip()
97
-
98
-
99
- def test_apply_patch_preserves_line_endings(mock_output_dir):
100
- # Create sample files with different line endings
101
- unix_file = os.path.join(mock_output_dir, 'unix_style.txt')
102
- dos_file = os.path.join(mock_output_dir, 'dos_style.txt')
103
-
104
- with open(unix_file, 'w', newline='\n') as f:
105
- f.write('Line 1\nLine 2\nLine 3')
106
-
107
- with open(dos_file, 'w', newline='\r\n') as f:
108
- f.write('Line 1\r\nLine 2\r\nLine 3')
109
-
110
- # Create patches for both files
111
- unix_patch = """
112
- diff --git a/unix_style.txt b/unix_style.txt
113
- index 9daeafb..b02def2 100644
114
- --- a/unix_style.txt
115
- +++ b/unix_style.txt
116
- @@ -1,3 +1,3 @@
117
- Line 1
118
- -Line 2
119
- +Updated Line 2
120
- Line 3
121
- """
122
-
123
- dos_patch = """
124
- diff --git a/dos_style.txt b/dos_style.txt
125
- index 9daeafb..b02def2 100644
126
- --- a/dos_style.txt
127
- +++ b/dos_style.txt
128
- @@ -1,3 +1,3 @@
129
- Line 1
130
- -Line 2
131
- +Updated Line 2
132
- Line 3
133
- """
134
-
135
- # Apply patches
136
- apply_patch(mock_output_dir, unix_patch)
137
- apply_patch(mock_output_dir, dos_patch)
138
-
139
- # Check if line endings are preserved
140
- with open(unix_file, 'rb') as f:
141
- unix_content = f.read()
142
- with open(dos_file, 'rb') as f:
143
- dos_content = f.read()
144
-
145
- assert b'\r\n' not in unix_content, (
146
- 'Unix-style line endings were changed to DOS-style'
147
- )
148
- assert b'\r\n' in dos_content, 'DOS-style line endings were changed to Unix-style'
149
-
150
- # Check if content was updated correctly
151
- assert unix_content.decode('utf-8').split('\n')[1] == 'Updated Line 2'
152
- assert dos_content.decode('utf-8').split('\r\n')[1] == 'Updated Line 2'
153
-
154
-
155
- def test_apply_patch_create_new_file(mock_output_dir):
156
- # Create a patch that adds a new file
157
- patch_content = """
158
- diff --git a/new_file.txt b/new_file.txt
159
- new file mode 100644
160
- index 0000000..3b18e51
161
- --- /dev/null
162
- +++ b/new_file.txt
163
- @@ -0,0 +1 @@
164
- +hello world
165
- """
166
-
167
- # Apply the patch
168
- apply_patch(mock_output_dir, patch_content)
169
-
170
- # Check if the new file was created
171
- new_file_path = os.path.join(mock_output_dir, 'new_file.txt')
172
- assert os.path.exists(new_file_path), 'New file was not created'
173
-
174
- # Check if the file content is correct
175
- with open(new_file_path, 'r') as f:
176
- content = f.read().strip()
177
- assert content == 'hello world', 'File content is incorrect'
178
-
179
-
180
- def test_apply_patch_rename_file(mock_output_dir):
181
- # Create a sample file in the mock repo
182
- old_file = os.path.join(mock_output_dir, 'old_name.txt')
183
- with open(old_file, 'w') as f:
184
- f.write('This file will be renamed')
185
-
186
- # Create a patch that renames the file
187
- patch_content = """diff --git a/old_name.txt b/new_name.txt
188
- similarity index 100%
189
- rename from old_name.txt
190
- rename to new_name.txt"""
191
-
192
- # Apply the patch
193
- apply_patch(mock_output_dir, patch_content)
194
-
195
- # Check if the file was renamed
196
- new_file = os.path.join(mock_output_dir, 'new_name.txt')
197
- assert not os.path.exists(old_file), 'Old file still exists'
198
- assert os.path.exists(new_file), 'New file was not created'
199
-
200
- # Check if the content is preserved
201
- with open(new_file, 'r') as f:
202
- content = f.read()
203
- assert content == 'This file will be renamed'
204
-
205
-
206
- def test_apply_patch_delete_file(mock_output_dir):
207
- # Create a sample file in the mock repo
208
- sample_file = os.path.join(mock_output_dir, 'to_be_deleted.txt')
209
- with open(sample_file, 'w') as f:
210
- f.write('This file will be deleted')
211
-
212
- # Create a patch that deletes the file
213
- patch_content = """
214
- diff --git a/to_be_deleted.txt b/to_be_deleted.txt
215
- deleted file mode 100644
216
- index 9daeafb..0000000
217
- --- a/to_be_deleted.txt
218
- +++ /dev/null
219
- @@ -1 +0,0 @@
220
- -This file will be deleted
221
- """
222
-
223
- # Apply the patch
224
- apply_patch(mock_output_dir, patch_content)
225
-
226
- # Check if the file was deleted
227
- assert not os.path.exists(sample_file), 'File was not deleted'
228
-
229
-
230
- def test_initialize_repo(mock_output_dir):
231
- issue_type = 'issue'
232
- # Copy the repo to patches
233
- ISSUE_NUMBER = 3
234
- initialize_repo(mock_output_dir, ISSUE_NUMBER, issue_type)
235
- patches_dir = os.path.join(mock_output_dir, 'patches', f'issue_{ISSUE_NUMBER}')
236
-
237
- # Check if files were copied correctly
238
- assert os.path.exists(os.path.join(patches_dir, 'README.md'))
239
-
240
- # Check file contents
241
- with open(os.path.join(patches_dir, 'README.md'), 'r') as f:
242
- assert f.read() == 'hello world'
243
-
244
-
245
- @patch('openhands.resolver.interfaces.github.GithubIssueHandler.reply_to_comment')
246
- @patch('httpx.post')
247
- @patch('subprocess.run')
248
- @patch('openhands.resolver.send_pull_request.LLM')
249
- def test_update_existing_pull_request(
250
- mock_llm_class,
251
- mock_subprocess_run,
252
- mock_requests_post,
253
- mock_reply_to_comment,
254
- ):
255
- # Arrange: Set up test data
256
- issue = Issue(
257
- owner='test-owner',
258
- repo='test-repo',
259
- number=1,
260
- title='Test PR',
261
- body='This is a test PR',
262
- thread_ids=['comment1', 'comment2'],
263
- head_branch='test-branch',
264
- )
265
- token = 'test-token'
266
- username = 'test-user'
267
- patch_dir = '/path/to/patch'
268
- additional_message = '["Fixed bug in function A", "Updated documentation for B"]'
269
-
270
- # Mock the subprocess.run call for git push
271
- mock_subprocess_run.return_value = MagicMock(returncode=0)
272
-
273
- # Mock the requests.post call for adding a PR comment
274
- mock_requests_post.return_value.status_code = 201
275
-
276
- # Mock LLM instance and completion call
277
- mock_llm_instance = MagicMock()
278
- mock_completion_response = MagicMock()
279
- mock_completion_response.choices = [
280
- MagicMock(message=MagicMock(content='This is an issue resolution.'))
281
- ]
282
- mock_llm_instance.completion.return_value = mock_completion_response
283
- mock_llm_class.return_value = mock_llm_instance
284
-
285
- llm_config = LLMConfig()
286
-
287
- # Act: Call the function without comment_message to test auto-generation
288
- result = update_existing_pull_request(
289
- issue,
290
- token,
291
- username,
292
- ProviderType.GITHUB,
293
- patch_dir,
294
- llm_config,
295
- comment_message=None,
296
- additional_message=additional_message,
297
- )
298
-
299
- # Assert: Check if the git push command was executed
300
- push_command = (
301
- f'git -C {patch_dir} push '
302
- f'https://{username}:{token}@github.com/'
303
- f'{issue.owner}/{issue.repo}.git {issue.head_branch}'
304
- )
305
- mock_subprocess_run.assert_called_once_with(
306
- push_command, shell=True, capture_output=True, text=True
307
- )
308
-
309
- # Assert: Check if the auto-generated comment was posted to the PR
310
- comment_url = f'https://api.github.com/repos/{issue.owner}/{issue.repo}/issues/{issue.number}/comments'
311
- expected_comment = 'This is an issue resolution.'
312
- mock_requests_post.assert_called_once_with(
313
- comment_url,
314
- headers={
315
- 'Authorization': f'token {token}',
316
- 'Accept': 'application/vnd.github.v3+json',
317
- },
318
- json={'body': expected_comment},
319
- )
320
-
321
- # Assert: Check if the reply_to_comment function was called for each thread ID
322
- mock_reply_to_comment.assert_has_calls(
323
- [
324
- call(issue.number, 'comment1', 'Fixed bug in function A'),
325
- call(issue.number, 'comment2', 'Updated documentation for B'),
326
- ]
327
- )
328
-
329
- # Assert: Check the returned PR URL
330
- assert (
331
- result == f'https://github.com/{issue.owner}/{issue.repo}/pull/{issue.number}'
332
- )
333
-
334
-
335
- @pytest.mark.parametrize(
336
- 'pr_type,target_branch,pr_title',
337
- [
338
- ('branch', None, None),
339
- ('draft', None, None),
340
- ('ready', None, None),
341
- ('branch', 'feature', None),
342
- ('draft', 'develop', None),
343
- ('ready', 'staging', None),
344
- ('ready', None, 'Custom PR Title'),
345
- ('draft', 'develop', 'Another Custom Title'),
346
- ],
347
- )
348
- @patch('subprocess.run')
349
- @patch('httpx.post')
350
- @patch('httpx.get')
351
- def test_send_pull_request(
352
- mock_get,
353
- mock_post,
354
- mock_run,
355
- mock_issue,
356
- mock_llm_config,
357
- mock_output_dir,
358
- pr_type,
359
- target_branch,
360
- pr_title,
361
- ):
362
- repo_path = os.path.join(mock_output_dir, 'repo')
363
-
364
- # Mock API responses based on whether target_branch is specified
365
- if target_branch:
366
- mock_get.side_effect = [
367
- MagicMock(status_code=404), # Branch doesn't exist
368
- MagicMock(status_code=200), # Target branch exists
369
- ]
370
- else:
371
- mock_get.side_effect = [
372
- MagicMock(status_code=404), # Branch doesn't exist
373
- MagicMock(json=lambda: {'default_branch': 'main'}), # Get default branch
374
- ]
375
-
376
- mock_post.return_value.json.return_value = {
377
- 'html_url': 'https://github.com/test-owner/test-repo/pull/1'
378
- }
379
-
380
- # Mock subprocess.run calls
381
- mock_run.side_effect = [
382
- MagicMock(returncode=0), # git checkout -b
383
- MagicMock(returncode=0), # git push
384
- ]
385
-
386
- # Call the function
387
- result = send_pull_request(
388
- issue=mock_issue,
389
- token='test-token',
390
- username='test-user',
391
- platform=ProviderType.GITHUB,
392
- patch_dir=repo_path,
393
- pr_type=pr_type,
394
- target_branch=target_branch,
395
- pr_title=pr_title,
396
- )
397
-
398
- # Assert API calls
399
- expected_get_calls = 2
400
- assert mock_get.call_count == expected_get_calls
401
-
402
- # Check branch creation and push
403
- assert mock_run.call_count == 2
404
- checkout_call, push_call = mock_run.call_args_list
405
-
406
- assert checkout_call == call(
407
- ['git', '-C', repo_path, 'checkout', '-b', 'openhands-fix-issue-42'],
408
- capture_output=True,
409
- text=True,
410
- )
411
- assert push_call == call(
412
- [
413
- 'git',
414
- '-C',
415
- repo_path,
416
- 'push',
417
- 'https://test-user:[email protected]/test-owner/test-repo.git',
418
- 'openhands-fix-issue-42',
419
- ],
420
- capture_output=True,
421
- text=True,
422
- )
423
-
424
- # Check PR creation based on pr_type
425
- if pr_type == 'branch':
426
- assert (
427
- result
428
- == 'https://github.com/test-owner/test-repo/compare/openhands-fix-issue-42?expand=1'
429
- )
430
- mock_post.assert_not_called()
431
- else:
432
- assert result == 'https://github.com/test-owner/test-repo/pull/1'
433
- mock_post.assert_called_once()
434
- post_data = mock_post.call_args[1]['json']
435
- expected_title = pr_title if pr_title else 'Fix issue #42: Test Issue'
436
- assert post_data['title'] == expected_title
437
- assert post_data['body'].startswith('This pull request fixes #42.')
438
- assert post_data['head'] == 'openhands-fix-issue-42'
439
- assert post_data['base'] == (target_branch if target_branch else 'main')
440
- assert post_data['draft'] == (pr_type == 'draft')
441
-
442
-
443
- @patch('subprocess.run')
444
- @patch('httpx.post')
445
- @patch('httpx.get')
446
- def test_send_pull_request_with_reviewer(
447
- mock_get, mock_post, mock_run, mock_issue, mock_output_dir, mock_llm_config
448
- ):
449
- repo_path = os.path.join(mock_output_dir, 'repo')
450
- reviewer = 'test-reviewer'
451
-
452
- # Mock API responses
453
- mock_get.side_effect = [
454
- MagicMock(status_code=404), # Branch doesn't exist
455
- MagicMock(json=lambda: {'default_branch': 'main'}), # Get default branch
456
- ]
457
-
458
- # Mock PR creation response
459
- mock_post.side_effect = [
460
- MagicMock(
461
- status_code=201,
462
- json=lambda: {
463
- 'html_url': 'https://github.com/test-owner/test-repo/pull/1',
464
- 'number': 1,
465
- },
466
- ), # PR creation
467
- MagicMock(status_code=201), # Reviewer request
468
- ]
469
-
470
- # Mock subprocess.run calls
471
- mock_run.side_effect = [
472
- MagicMock(returncode=0), # git checkout -b
473
- MagicMock(returncode=0), # git push
474
- ]
475
-
476
- # Call the function with reviewer
477
- result = send_pull_request(
478
- issue=mock_issue,
479
- token='test-token',
480
- username='test-user',
481
- platform=ProviderType.GITHUB,
482
- patch_dir=repo_path,
483
- pr_type='ready',
484
- reviewer=reviewer,
485
- )
486
-
487
- # Assert API calls
488
- assert mock_get.call_count == 2
489
- assert mock_post.call_count == 2
490
-
491
- # Check PR creation
492
- pr_create_call = mock_post.call_args_list[0]
493
- assert pr_create_call[1]['json']['title'] == 'Fix issue #42: Test Issue'
494
-
495
- # Check reviewer request
496
- reviewer_request_call = mock_post.call_args_list[1]
497
- assert (
498
- reviewer_request_call[0][0]
499
- == 'https://api.github.com/repos/test-owner/test-repo/pulls/1/requested_reviewers'
500
- )
501
- assert reviewer_request_call[1]['json'] == {'reviewers': ['test-reviewer']}
502
-
503
- # Check the result URL
504
- assert result == 'https://github.com/test-owner/test-repo/pull/1'
505
-
506
-
507
- @patch('subprocess.run')
508
- @patch('httpx.post')
509
- @patch('httpx.get')
510
- def test_send_pull_request_target_branch_with_fork(
511
- mock_get, mock_post, mock_run, mock_issue, mock_output_dir
512
- ):
513
- """Test that target_branch works correctly when using a fork."""
514
- repo_path = os.path.join(mock_output_dir, 'repo')
515
- fork_owner = 'fork-owner'
516
- target_branch = 'custom-target'
517
-
518
- # Mock API responses
519
- mock_get.side_effect = [
520
- MagicMock(status_code=404), # Branch doesn't exist
521
- MagicMock(status_code=200), # Target branch exists
522
- ]
523
-
524
- mock_post.return_value.json.return_value = {
525
- 'html_url': 'https://github.com/test-owner/test-repo/pull/1'
526
- }
527
-
528
- # Mock subprocess.run calls
529
- mock_run.side_effect = [
530
- MagicMock(returncode=0), # git checkout -b
531
- MagicMock(returncode=0), # git push
532
- ]
533
-
534
- # Call the function with fork_owner and target_branch
535
- send_pull_request(
536
- issue=mock_issue,
537
- token='test-token',
538
- username='test-user',
539
- platform=ProviderType.GITHUB,
540
- patch_dir=repo_path,
541
- pr_type='ready',
542
- fork_owner=fork_owner,
543
- target_branch=target_branch,
544
- )
545
-
546
- # Assert API calls
547
- assert mock_get.call_count == 2
548
-
549
- # Verify target branch was checked in original repo, not fork
550
- target_branch_check = mock_get.call_args_list[1]
551
- assert (
552
- target_branch_check[0][0]
553
- == f'https://api.github.com/repos/test-owner/test-repo/branches/{target_branch}'
554
- )
555
-
556
- # Check PR creation
557
- mock_post.assert_called_once()
558
- post_data = mock_post.call_args[1]['json']
559
- assert post_data['base'] == target_branch # PR should target the specified branch
560
- assert (
561
- post_data['head'] == 'fork-owner:openhands-fix-issue-42'
562
- ) # Branch name should be standard
563
-
564
- # Check that push was to fork
565
- push_call = mock_run.call_args_list[1]
566
- assert f'https://test-user:[email protected]/{fork_owner}/test-repo.git' in str(
567
- push_call
568
- )
569
-
570
-
571
- @patch('subprocess.run')
572
- @patch('httpx.post')
573
- @patch('httpx.get')
574
- def test_send_pull_request_target_branch_with_additional_message(
575
- mock_get, mock_post, mock_run, mock_issue, mock_output_dir
576
- ):
577
- """Test that target_branch works correctly with additional PR message."""
578
- repo_path = os.path.join(mock_output_dir, 'repo')
579
- target_branch = 'feature-branch'
580
- additional_message = 'Additional PR context'
581
-
582
- # Mock API responses
583
- mock_get.side_effect = [
584
- MagicMock(status_code=404), # Branch doesn't exist
585
- MagicMock(status_code=200), # Target branch exists
586
- ]
587
-
588
- mock_post.return_value.json.return_value = {
589
- 'html_url': 'https://github.com/test-owner/test-repo/pull/1'
590
- }
591
-
592
- # Mock subprocess.run calls
593
- mock_run.side_effect = [
594
- MagicMock(returncode=0), # git checkout -b
595
- MagicMock(returncode=0), # git push
596
- ]
597
-
598
- # Call the function with target_branch and additional_message
599
- send_pull_request(
600
- issue=mock_issue,
601
- token='test-token',
602
- username='test-user',
603
- platform=ProviderType.GITHUB,
604
- patch_dir=repo_path,
605
- pr_type='ready',
606
- target_branch=target_branch,
607
- additional_message=additional_message,
608
- )
609
-
610
- # Assert API calls
611
- assert mock_get.call_count == 2
612
-
613
- # Check PR creation
614
- mock_post.assert_called_once()
615
- post_data = mock_post.call_args[1]['json']
616
- assert post_data['base'] == target_branch
617
- assert additional_message in post_data['body']
618
- assert 'This pull request fixes #42' in post_data['body']
619
-
620
-
621
- @patch('httpx.get')
622
- def test_send_pull_request_invalid_target_branch(
623
- mock_get, mock_issue, mock_output_dir, mock_llm_config
624
- ):
625
- """Test that an error is raised when specifying a non-existent target branch"""
626
- repo_path = os.path.join(mock_output_dir, 'repo')
627
-
628
- # Mock API response for non-existent branch
629
- mock_get.side_effect = [
630
- MagicMock(status_code=404), # Branch doesn't exist
631
- MagicMock(status_code=404), # Target branch doesn't exist
632
- ]
633
-
634
- # Test that ValueError is raised when target branch doesn't exist
635
- with pytest.raises(
636
- ValueError, match='Target branch nonexistent-branch does not exist'
637
- ):
638
- send_pull_request(
639
- issue=mock_issue,
640
- token='test-token',
641
- username='test-user',
642
- platform=ProviderType.GITHUB,
643
- patch_dir=repo_path,
644
- pr_type='ready',
645
- target_branch='nonexistent-branch',
646
- )
647
-
648
- # Verify API calls
649
- assert mock_get.call_count == 2
650
-
651
-
652
- @patch('subprocess.run')
653
- @patch('httpx.post')
654
- @patch('httpx.get')
655
- def test_send_pull_request_git_push_failure(
656
- mock_get, mock_post, mock_run, mock_issue, mock_output_dir, mock_llm_config
657
- ):
658
- repo_path = os.path.join(mock_output_dir, 'repo')
659
-
660
- # Mock API responses
661
- mock_get.return_value = MagicMock(json=lambda: {'default_branch': 'main'})
662
-
663
- # Mock the subprocess.run calls
664
- mock_run.side_effect = [
665
- MagicMock(returncode=0), # git checkout -b
666
- MagicMock(returncode=1, stderr='Error: failed to push some refs'), # git push
667
- ]
668
-
669
- # Test that RuntimeError is raised when git push fails
670
- with pytest.raises(
671
- RuntimeError, match='Failed to push changes to the remote repository'
672
- ):
673
- send_pull_request(
674
- issue=mock_issue,
675
- token='test-token',
676
- username='test-user',
677
- platform=ProviderType.GITHUB,
678
- patch_dir=repo_path,
679
- pr_type='ready',
680
- )
681
-
682
- # Assert that subprocess.run was called twice
683
- assert mock_run.call_count == 2
684
-
685
- # Check the git checkout -b command
686
- checkout_call = mock_run.call_args_list[0]
687
- assert checkout_call[0][0] == [
688
- 'git',
689
- '-C',
690
- repo_path,
691
- 'checkout',
692
- '-b',
693
- 'openhands-fix-issue-42',
694
- ]
695
-
696
- # Check the git push command
697
- push_call = mock_run.call_args_list[1]
698
- assert push_call[0][0] == [
699
- 'git',
700
- '-C',
701
- repo_path,
702
- 'push',
703
- 'https://test-user:[email protected]/test-owner/test-repo.git',
704
- 'openhands-fix-issue-42',
705
- ]
706
-
707
- # Assert that no pull request was created
708
- mock_post.assert_not_called()
709
-
710
-
711
- @patch('subprocess.run')
712
- @patch('httpx.post')
713
- @patch('httpx.get')
714
- def test_send_pull_request_permission_error(
715
- mock_get, mock_post, mock_run, mock_issue, mock_output_dir, mock_llm_config
716
- ):
717
- repo_path = os.path.join(mock_output_dir, 'repo')
718
-
719
- # Mock API responses
720
- mock_get.return_value = MagicMock(json=lambda: {'default_branch': 'main'})
721
- mock_post.return_value.status_code = 403
722
-
723
- # Mock subprocess.run calls
724
- mock_run.side_effect = [
725
- MagicMock(returncode=0), # git checkout -b
726
- MagicMock(returncode=0), # git push
727
- ]
728
-
729
- # Test that RuntimeError is raised when PR creation fails due to permissions
730
- with pytest.raises(
731
- RuntimeError, match='Failed to create pull request due to missing permissions.'
732
- ):
733
- send_pull_request(
734
- issue=mock_issue,
735
- token='test-token',
736
- username='test-user',
737
- platform=ProviderType.GITHUB,
738
- patch_dir=repo_path,
739
- pr_type='ready',
740
- )
741
-
742
- # Assert that the branch was created and pushed
743
- assert mock_run.call_count == 2
744
- mock_post.assert_called_once()
745
-
746
-
747
- @patch('httpx.post')
748
- def test_reply_to_comment(mock_post, mock_issue):
749
- # Arrange: set up the test data
750
- token = 'test_token'
751
- comment_id = 'test_comment_id'
752
- reply = 'This is a test reply.'
753
-
754
- # Create an instance of GithubIssueHandler
755
- handler = GithubIssueHandler(
756
- owner='test-owner', repo='test-repo', token=token, username='test-user'
757
- )
758
-
759
- # Mock the response from the GraphQL API
760
- mock_response = MagicMock()
761
- mock_response.status_code = 200
762
- mock_response.json.return_value = {
763
- 'data': {
764
- 'addPullRequestReviewThreadReply': {
765
- 'comment': {
766
- 'id': 'test_reply_id',
767
- 'body': 'Openhands fix success summary\n\n\nThis is a test reply.',
768
- 'createdAt': '2024-10-01T12:34:56Z',
769
- }
770
- }
771
- }
772
- }
773
-
774
- mock_post.return_value = mock_response
775
-
776
- # Act: call the function
777
- handler.reply_to_comment(mock_issue.number, comment_id, reply)
778
-
779
- # Assert: check that the POST request was made with the correct parameters
780
- query = """
781
- mutation($body: String!, $pullRequestReviewThreadId: ID!) {
782
- addPullRequestReviewThreadReply(input: { body: $body, pullRequestReviewThreadId: $pullRequestReviewThreadId }) {
783
- comment {
784
- id
785
- body
786
- createdAt
787
- }
788
- }
789
- }
790
- """
791
-
792
- expected_variables = {
793
- 'body': 'Openhands fix success summary\n\n\nThis is a test reply.',
794
- 'pullRequestReviewThreadId': comment_id,
795
- }
796
-
797
- # Check that the correct request was made to the API
798
- mock_post.assert_called_once_with(
799
- 'https://api.github.com/graphql',
800
- json={'query': query, 'variables': expected_variables},
801
- headers={
802
- 'Authorization': f'Bearer {token}',
803
- 'Content-Type': 'application/json',
804
- },
805
- )
806
-
807
- # Check that the response status was checked (via response.raise_for_status)
808
- mock_response.raise_for_status.assert_called_once()
809
-
810
-
811
- @patch('openhands.resolver.send_pull_request.initialize_repo')
812
- @patch('openhands.resolver.send_pull_request.apply_patch')
813
- @patch('openhands.resolver.send_pull_request.update_existing_pull_request')
814
- @patch('openhands.resolver.send_pull_request.make_commit')
815
- def test_process_single_pr_update(
816
- mock_make_commit,
817
- mock_update_existing_pull_request,
818
- mock_apply_patch,
819
- mock_initialize_repo,
820
- mock_output_dir,
821
- mock_llm_config,
822
- ):
823
- # Initialize test data
824
- token = 'test_token'
825
- username = 'test_user'
826
- pr_type = 'draft'
827
-
828
- resolver_output = ResolverOutput(
829
- issue=Issue(
830
- owner='test-owner',
831
- repo='test-repo',
832
- number=1,
833
- title='Issue 1',
834
- body='Body 1',
835
- closing_issues=[],
836
- review_threads=[
837
- ReviewThread(comment='review comment for feedback', files=[])
838
- ],
839
- thread_ids=['1'],
840
- head_branch='branch 1',
841
- ),
842
- issue_type='pr',
843
- instruction='Test instruction 1',
844
- base_commit='def456',
845
- git_patch='Test patch 1',
846
- history=[],
847
- metrics={},
848
- success=True,
849
- comment_success=None,
850
- result_explanation='[Test success 1]',
851
- error=None,
852
- )
853
-
854
- mock_update_existing_pull_request.return_value = (
855
- 'https://github.com/test-owner/test-repo/pull/1'
856
- )
857
- mock_initialize_repo.return_value = f'{mock_output_dir}/patches/pr_1'
858
-
859
- process_single_issue(
860
- mock_output_dir,
861
- resolver_output,
862
- token,
863
- username,
864
- ProviderType.GITHUB,
865
- pr_type,
866
- mock_llm_config,
867
- None,
868
- False,
869
- None,
870
- )
871
-
872
- mock_initialize_repo.assert_called_once_with(mock_output_dir, 1, 'pr', 'branch 1')
873
- mock_apply_patch.assert_called_once_with(
874
- f'{mock_output_dir}/patches/pr_1', resolver_output.git_patch
875
- )
876
- mock_make_commit.assert_called_once_with(
877
- f'{mock_output_dir}/patches/pr_1', resolver_output.issue, 'pr'
878
- )
879
- mock_update_existing_pull_request.assert_called_once_with(
880
- issue=resolver_output.issue,
881
- token=token,
882
- username=username,
883
- platform=ProviderType.GITHUB,
884
- patch_dir=f'{mock_output_dir}/patches/pr_1',
885
- additional_message='[Test success 1]',
886
- llm_config=mock_llm_config,
887
- base_domain='github.com',
888
- )
889
-
890
-
891
- @patch('openhands.resolver.send_pull_request.initialize_repo')
892
- @patch('openhands.resolver.send_pull_request.apply_patch')
893
- @patch('openhands.resolver.send_pull_request.send_pull_request')
894
- @patch('openhands.resolver.send_pull_request.make_commit')
895
- def test_process_single_issue(
896
- mock_make_commit,
897
- mock_send_pull_request,
898
- mock_apply_patch,
899
- mock_initialize_repo,
900
- mock_output_dir,
901
- mock_llm_config,
902
- ):
903
- # Initialize test data
904
- token = 'test_token'
905
- username = 'test_user'
906
- pr_type = 'draft'
907
- platform = ProviderType.GITHUB
908
-
909
- resolver_output = ResolverOutput(
910
- issue=Issue(
911
- owner='test-owner',
912
- repo='test-repo',
913
- number=1,
914
- title='Issue 1',
915
- body='Body 1',
916
- ),
917
- issue_type='issue',
918
- instruction='Test instruction 1',
919
- base_commit='def456',
920
- git_patch='Test patch 1',
921
- history=[],
922
- metrics={},
923
- success=True,
924
- comment_success=None,
925
- result_explanation='Test success 1',
926
- error=None,
927
- )
928
-
929
- # Mock return value
930
- mock_send_pull_request.return_value = (
931
- 'https://github.com/test-owner/test-repo/pull/1'
932
- )
933
- mock_initialize_repo.return_value = f'{mock_output_dir}/patches/issue_1'
934
-
935
- # Call the function
936
- process_single_issue(
937
- mock_output_dir,
938
- resolver_output,
939
- token,
940
- username,
941
- platform,
942
- pr_type,
943
- mock_llm_config,
944
- None,
945
- False,
946
- None,
947
- )
948
-
949
- # Assert that the mocked functions were called with correct arguments
950
- mock_initialize_repo.assert_called_once_with(mock_output_dir, 1, 'issue', 'def456')
951
- mock_apply_patch.assert_called_once_with(
952
- f'{mock_output_dir}/patches/issue_1', resolver_output.git_patch
953
- )
954
- mock_make_commit.assert_called_once_with(
955
- f'{mock_output_dir}/patches/issue_1', resolver_output.issue, 'issue'
956
- )
957
- mock_send_pull_request.assert_called_once_with(
958
- issue=resolver_output.issue,
959
- token=token,
960
- username=username,
961
- platform=platform,
962
- patch_dir=f'{mock_output_dir}/patches/issue_1',
963
- pr_type=pr_type,
964
- fork_owner=None,
965
- additional_message=resolver_output.result_explanation,
966
- target_branch=None,
967
- reviewer=None,
968
- pr_title=None,
969
- base_domain='github.com',
970
- )
971
-
972
-
973
- @patch('openhands.resolver.send_pull_request.initialize_repo')
974
- @patch('openhands.resolver.send_pull_request.apply_patch')
975
- @patch('openhands.resolver.send_pull_request.send_pull_request')
976
- @patch('openhands.resolver.send_pull_request.make_commit')
977
- def test_process_single_issue_unsuccessful(
978
- mock_make_commit,
979
- mock_send_pull_request,
980
- mock_apply_patch,
981
- mock_initialize_repo,
982
- mock_output_dir,
983
- mock_llm_config,
984
- ):
985
- # Initialize test data
986
- token = 'test_token'
987
- username = 'test_user'
988
- pr_type = 'draft'
989
-
990
- resolver_output = ResolverOutput(
991
- issue=Issue(
992
- owner='test-owner',
993
- repo='test-repo',
994
- number=1,
995
- title='Issue 1',
996
- body='Body 1',
997
- ),
998
- issue_type='issue',
999
- instruction='Test instruction 1',
1000
- base_commit='def456',
1001
- git_patch='Test patch 1',
1002
- history=[],
1003
- metrics={},
1004
- success=False,
1005
- comment_success=None,
1006
- result_explanation='',
1007
- error='Test error',
1008
- )
1009
-
1010
- # Call the function
1011
- process_single_issue(
1012
- mock_output_dir,
1013
- resolver_output,
1014
- token,
1015
- username,
1016
- ProviderType.GITHUB,
1017
- pr_type,
1018
- mock_llm_config,
1019
- None,
1020
- False,
1021
- None,
1022
- )
1023
-
1024
- # Assert that none of the mocked functions were called
1025
- mock_initialize_repo.assert_not_called()
1026
- mock_apply_patch.assert_not_called()
1027
- mock_make_commit.assert_not_called()
1028
- mock_send_pull_request.assert_not_called()
1029
-
1030
-
1031
- @patch('httpx.get')
1032
- @patch('subprocess.run')
1033
- def test_send_pull_request_branch_naming(
1034
- mock_run, mock_get, mock_issue, mock_output_dir, mock_llm_config
1035
- ):
1036
- repo_path = os.path.join(mock_output_dir, 'repo')
1037
-
1038
- # Mock API responses
1039
- mock_get.side_effect = [
1040
- MagicMock(status_code=200), # First branch exists
1041
- MagicMock(status_code=200), # Second branch exists
1042
- MagicMock(status_code=404), # Third branch doesn't exist
1043
- MagicMock(json=lambda: {'default_branch': 'main'}), # Get default branch
1044
- ]
1045
-
1046
- # Mock subprocess.run calls
1047
- mock_run.side_effect = [
1048
- MagicMock(returncode=0), # git checkout -b
1049
- MagicMock(returncode=0), # git push
1050
- ]
1051
-
1052
- # Call the function
1053
- result = send_pull_request(
1054
- issue=mock_issue,
1055
- token='test-token',
1056
- username='test-user',
1057
- platform=ProviderType.GITHUB,
1058
- patch_dir=repo_path,
1059
- pr_type='branch',
1060
- )
1061
-
1062
- # Assert API calls
1063
- assert mock_get.call_count == 4
1064
-
1065
- # Check branch creation and push
1066
- assert mock_run.call_count == 2
1067
- checkout_call, push_call = mock_run.call_args_list
1068
-
1069
- assert checkout_call == call(
1070
- ['git', '-C', repo_path, 'checkout', '-b', 'openhands-fix-issue-42-try3'],
1071
- capture_output=True,
1072
- text=True,
1073
- )
1074
- assert push_call == call(
1075
- [
1076
- 'git',
1077
- '-C',
1078
- repo_path,
1079
- 'push',
1080
- 'https://test-user:[email protected]/test-owner/test-repo.git',
1081
- 'openhands-fix-issue-42-try3',
1082
- ],
1083
- capture_output=True,
1084
- text=True,
1085
- )
1086
-
1087
- # Check the result
1088
- assert (
1089
- result
1090
- == 'https://github.com/test-owner/test-repo/compare/openhands-fix-issue-42-try3?expand=1'
1091
- )
1092
-
1093
-
1094
- @patch('openhands.resolver.send_pull_request.argparse.ArgumentParser')
1095
- @patch('openhands.resolver.send_pull_request.process_single_issue')
1096
- @patch('openhands.resolver.send_pull_request.load_single_resolver_output')
1097
- @patch('openhands.resolver.send_pull_request.identify_token')
1098
- @patch('os.path.exists')
1099
- @patch('os.getenv')
1100
- def test_main(
1101
- mock_getenv,
1102
- mock_path_exists,
1103
- mock_identify_token,
1104
- mock_load_single_resolver_output,
1105
- mock_process_single_issue,
1106
- mock_parser,
1107
- ):
1108
- # Setup mock parser
1109
- mock_args = MagicMock()
1110
- mock_args.token = None
1111
- mock_args.username = 'mock_username'
1112
- mock_args.output_dir = '/mock/output'
1113
- mock_args.pr_type = 'draft'
1114
- mock_args.issue_number = '42'
1115
- mock_args.fork_owner = None
1116
- mock_args.send_on_failure = False
1117
- mock_args.llm_model = 'mock_model'
1118
- mock_args.llm_base_url = 'mock_url'
1119
- mock_args.llm_api_key = 'mock_key'
1120
- mock_args.target_branch = None
1121
- mock_args.reviewer = None
1122
- mock_args.pr_title = None
1123
- mock_args.selected_repo = None
1124
- mock_parser.return_value.parse_args.return_value = mock_args
1125
-
1126
- # Setup environment variables
1127
- mock_getenv.side_effect = (
1128
- lambda key, default=None: 'mock_token' if key == 'GITHUB_TOKEN' else default
1129
- )
1130
-
1131
- # Setup path exists
1132
- mock_path_exists.return_value = True
1133
-
1134
- # Setup mock resolver output
1135
- mock_resolver_output = MagicMock()
1136
- mock_load_single_resolver_output.return_value = mock_resolver_output
1137
-
1138
- mock_identify_token.return_value = ProviderType.GITHUB
1139
-
1140
- # Run main function
1141
- main()
1142
-
1143
- mock_identify_token.assert_called_with('mock_token', mock_args.base_domain)
1144
-
1145
- llm_config = LLMConfig(
1146
- model=mock_args.llm_model,
1147
- base_url=mock_args.llm_base_url,
1148
- api_key=mock_args.llm_api_key,
1149
- )
1150
-
1151
- # Use any_call instead of assert_called_with for more flexible matching
1152
- assert mock_process_single_issue.call_args == call(
1153
- '/mock/output',
1154
- mock_resolver_output,
1155
- 'mock_token',
1156
- 'mock_username',
1157
- ProviderType.GITHUB,
1158
- 'draft',
1159
- llm_config,
1160
- None,
1161
- False,
1162
- mock_args.target_branch,
1163
- mock_args.reviewer,
1164
- mock_args.pr_title,
1165
- ANY,
1166
- )
1167
-
1168
- # Other assertions
1169
- mock_parser.assert_called_once()
1170
- mock_getenv.assert_any_call('GITHUB_TOKEN')
1171
- mock_path_exists.assert_called_with('/mock/output')
1172
- mock_load_single_resolver_output.assert_called_with('/mock/output/output.jsonl', 42)
1173
-
1174
- # Test for invalid issue number
1175
- mock_args.issue_number = 'invalid'
1176
- with pytest.raises(ValueError):
1177
- main()
1178
-
1179
- # Test for invalid token
1180
- mock_args.issue_number = '42' # Reset to valid issue number
1181
- mock_getenv.side_effect = (
1182
- lambda key, default=None: None
1183
- ) # Return None for all env vars
1184
- with pytest.raises(ValueError, match='token is not set'):
1185
- main()
1186
-
1187
-
1188
- @patch('subprocess.run')
1189
- def test_make_commit_escapes_issue_title(mock_subprocess_run):
1190
- # Setup
1191
- repo_dir = '/path/to/repo'
1192
- issue = Issue(
1193
- owner='test-owner',
1194
- repo='test-repo',
1195
- number=42,
1196
- title='Issue with "quotes" and $pecial characters',
1197
- body='Test body',
1198
- )
1199
-
1200
- # Mock subprocess.run to return success for all calls
1201
- mock_subprocess_run.return_value = MagicMock(
1202
- returncode=0, stdout='sample output', stderr=''
1203
- )
1204
-
1205
- # Call the function
1206
- issue_type = 'issue'
1207
- make_commit(repo_dir, issue, issue_type)
1208
-
1209
- # Assert that subprocess.run was called with the correct arguments
1210
- calls = mock_subprocess_run.call_args_list
1211
- assert len(calls) == 4 # git config check, git add, git commit
1212
-
1213
- # Check the git commit call
1214
- git_commit_call = calls[3][0][0]
1215
- expected_commit_message = (
1216
- 'Fix issue #42: Issue with "quotes" and $pecial characters'
1217
- )
1218
- assert [
1219
- 'git',
1220
- '-C',
1221
- '/path/to/repo',
1222
- 'commit',
1223
- '-m',
1224
- expected_commit_message,
1225
- ] == git_commit_call
1226
-
1227
-
1228
- @patch('subprocess.run')
1229
- def test_make_commit_no_changes(mock_subprocess_run):
1230
- # Setup
1231
- repo_dir = '/path/to/repo'
1232
- issue = Issue(
1233
- owner='test-owner',
1234
- repo='test-repo',
1235
- number=42,
1236
- title='Issue with no changes',
1237
- body='Test body',
1238
- )
1239
-
1240
- # Mock subprocess.run to simulate no changes in the repo
1241
- mock_subprocess_run.side_effect = [
1242
- MagicMock(returncode=0),
1243
- MagicMock(returncode=0),
1244
- MagicMock(returncode=1, stdout=''), # git status --porcelain (no changes)
1245
- ]
1246
-
1247
- with pytest.raises(
1248
- RuntimeError, match='ERROR: Openhands failed to make code changes.'
1249
- ):
1250
- make_commit(repo_dir, issue, 'issue')
1251
-
1252
- # Check that subprocess.run was called for checking git status and add, but not commit
1253
- assert mock_subprocess_run.call_count == 3
1254
- git_status_call = mock_subprocess_run.call_args_list[2][0][0]
1255
- assert f'git -C {repo_dir} status --porcelain' in git_status_call
1256
-
1257
-
1258
- def test_apply_patch_rename_directory(mock_output_dir):
1259
- # Create a sample directory structure
1260
- old_dir = os.path.join(mock_output_dir, 'prompts', 'resolve')
1261
- os.makedirs(old_dir)
1262
-
1263
- # Create test files
1264
- test_files = [
1265
- 'issue-success-check.jinja',
1266
- 'pr-feedback-check.jinja',
1267
- 'pr-thread-check.jinja',
1268
- ]
1269
- for filename in test_files:
1270
- file_path = os.path.join(old_dir, filename)
1271
- with open(file_path, 'w') as f:
1272
- f.write(f'Content of {filename}')
1273
-
1274
- # Create a patch that renames the directory
1275
- patch_content = """diff --git a/prompts/resolve/issue-success-check.jinja b/prompts/guess_success/issue-success-check.jinja
1276
- similarity index 100%
1277
- rename from prompts/resolve/issue-success-check.jinja
1278
- rename to prompts/guess_success/issue-success-check.jinja
1279
- diff --git a/prompts/resolve/pr-feedback-check.jinja b/prompts/guess_success/pr-feedback-check.jinja
1280
- similarity index 100%
1281
- rename from prompts/resolve/pr-feedback-check.jinja
1282
- rename to prompts/guess_success/pr-feedback-check.jinja
1283
- diff --git a/prompts/resolve/pr-thread-check.jinja b/prompts/guess_success/pr-thread-check.jinja
1284
- similarity index 100%
1285
- rename from prompts/resolve/pr-thread-check.jinja
1286
- rename to prompts/guess_success/pr-thread-check.jinja"""
1287
-
1288
- # Apply the patch
1289
- apply_patch(mock_output_dir, patch_content)
1290
-
1291
- # Check if files were moved correctly
1292
- new_dir = os.path.join(mock_output_dir, 'prompts', 'guess_success')
1293
- assert not os.path.exists(old_dir), 'Old directory still exists'
1294
- assert os.path.exists(new_dir), 'New directory was not created'
1295
-
1296
- # Check if all files were moved and content preserved
1297
- for filename in test_files:
1298
- old_path = os.path.join(old_dir, filename)
1299
- new_path = os.path.join(new_dir, filename)
1300
- assert not os.path.exists(old_path), f'Old file {filename} still exists'
1301
- assert os.path.exists(new_path), f'New file {filename} was not created'
1302
- with open(new_path, 'r') as f:
1303
- content = f.read()
1304
- assert content == f'Content of {filename}', f'Content mismatch for {filename}'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/unit/resolver/gitlab/test_gitlab_guess_success.py DELETED
@@ -1,202 +0,0 @@
1
- import json
2
- from unittest.mock import MagicMock, patch
3
-
4
- from openhands.core.config import LLMConfig
5
- from openhands.events.action.message import MessageAction
6
- from openhands.llm import LLM
7
- from openhands.resolver.interfaces.gitlab import GitlabIssueHandler, GitlabPRHandler
8
- from openhands.resolver.interfaces.issue import Issue
9
- from openhands.resolver.interfaces.issue_definitions import (
10
- ServiceContextIssue,
11
- ServiceContextPR,
12
- )
13
-
14
-
15
- def test_guess_success_multiline_explanation():
16
- # Mock data
17
- issue = Issue(
18
- owner='test',
19
- repo='test',
20
- number=1,
21
- title='Test Issue',
22
- body='Test body',
23
- thread_comments=None,
24
- review_comments=None,
25
- )
26
- history = [MessageAction(content='Test message')]
27
- llm_config = LLMConfig(model='test', api_key='test')
28
-
29
- # Create a mock response with multi-line explanation
30
- mock_response = MagicMock()
31
- mock_response.choices = [
32
- MagicMock(
33
- message=MagicMock(
34
- content="""--- success
35
- true
36
-
37
- --- explanation
38
- The PR successfully addressed the issue by:
39
- - Fixed bug A
40
- - Added test B
41
- - Updated documentation C
42
-
43
- Automatic fix generated by OpenHands 🙌"""
44
- )
45
- )
46
- ]
47
-
48
- # Use patch to mock the LLM completion call
49
- with patch.object(LLM, 'completion', return_value=mock_response) as mock_completion:
50
- # Create a handler instance
51
- handler = ServiceContextIssue(
52
- GitlabIssueHandler('test', 'test', 'test'), llm_config
53
- )
54
-
55
- # Call guess_success
56
- success, _, explanation = handler.guess_success(issue, history)
57
-
58
- # Verify the results
59
- assert success is True
60
- assert 'The PR successfully addressed the issue by:' in explanation
61
- assert 'Fixed bug A' in explanation
62
- assert 'Added test B' in explanation
63
- assert 'Updated documentation C' in explanation
64
- assert 'Automatic fix generated by OpenHands' in explanation
65
-
66
- # Verify that LLM completion was called exactly once
67
- mock_completion.assert_called_once()
68
-
69
-
70
- def test_pr_handler_guess_success_with_thread_comments():
71
- # Create a PR handler instance
72
- llm_config = LLMConfig(model='test', api_key='test')
73
- handler = ServiceContextPR(GitlabPRHandler('test', 'test', 'test'), llm_config)
74
-
75
- # Create a mock issue with thread comments but no review comments
76
- issue = Issue(
77
- owner='test-owner',
78
- repo='test-repo',
79
- number=1,
80
- title='Test PR',
81
- body='Test Body',
82
- thread_comments=['First comment', 'Second comment'],
83
- closing_issues=['Issue description'],
84
- review_comments=None,
85
- thread_ids=None,
86
- head_branch='test-branch',
87
- )
88
-
89
- # Create mock history
90
- history = [MessageAction(content='Fixed the issue by implementing X and Y')]
91
-
92
- # Create mock LLM config
93
- llm_config = LLMConfig(model='test-model', api_key='test-key')
94
-
95
- # Mock the LLM response
96
- mock_response = MagicMock()
97
- mock_response.choices = [
98
- MagicMock(
99
- message=MagicMock(
100
- content="""--- success
101
- true
102
-
103
- --- explanation
104
- The changes successfully address the feedback."""
105
- )
106
- )
107
- ]
108
-
109
- # Test the guess_success method
110
- with patch.object(LLM, 'completion', return_value=mock_response):
111
- success, success_list, explanation = handler.guess_success(issue, history)
112
-
113
- # Verify the results
114
- assert success is True
115
- assert success_list == [True]
116
- assert 'successfully address' in explanation
117
- assert len(json.loads(explanation)) == 1
118
-
119
-
120
- def test_pr_handler_guess_success_only_review_comments():
121
- # Create a PR handler instance
122
- llm_config = LLMConfig(model='test', api_key='test')
123
- handler = ServiceContextPR(
124
- GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
125
- )
126
-
127
- # Create a mock issue with only review comments
128
- issue = Issue(
129
- owner='test-owner',
130
- repo='test-repo',
131
- number=1,
132
- title='Test PR',
133
- body='Test Body',
134
- thread_comments=None,
135
- closing_issues=['Issue description'],
136
- review_comments=['Please fix the formatting', 'Add more tests'],
137
- thread_ids=None,
138
- head_branch='test-branch',
139
- )
140
-
141
- # Create mock history
142
- history = [MessageAction(content='Fixed the formatting and added more tests')]
143
-
144
- # Create mock LLM config
145
- llm_config = LLMConfig(model='test-model', api_key='test-key')
146
-
147
- # Mock the LLM response
148
- mock_response = MagicMock()
149
- mock_response.choices = [
150
- MagicMock(
151
- message=MagicMock(
152
- content="""--- success
153
- true
154
-
155
- --- explanation
156
- The changes successfully address the review comments."""
157
- )
158
- )
159
- ]
160
-
161
- # Test the guess_success method
162
- with patch.object(LLM, 'completion', return_value=mock_response):
163
- success, success_list, explanation = handler.guess_success(issue, history)
164
-
165
- # Verify the results
166
- assert success is True
167
- assert success_list == [True]
168
- assert (
169
- '["The changes successfully address the review comments."]' in explanation
170
- )
171
-
172
-
173
- def test_pr_handler_guess_success_no_comments():
174
- # Create a PR handler instance
175
- llm_config = LLMConfig(model='test', api_key='test')
176
- handler = ServiceContextPR(GitlabPRHandler('test', 'test', 'test'), llm_config)
177
-
178
- # Create a mock issue with no comments
179
- issue = Issue(
180
- owner='test-owner',
181
- repo='test-repo',
182
- number=1,
183
- title='Test PR',
184
- body='Test Body',
185
- thread_comments=None,
186
- closing_issues=['Issue description'],
187
- review_comments=None,
188
- thread_ids=None,
189
- head_branch='test-branch',
190
- )
191
-
192
- # Create mock history
193
- history = [MessageAction(content='Fixed the issue')]
194
-
195
- # Create mock LLM config
196
- llm_config = LLMConfig(model='test-model', api_key='test-key')
197
-
198
- # Test that it returns appropriate message when no comments are present
199
- success, success_list, explanation = handler.guess_success(issue, history)
200
- assert success is False
201
- assert success_list is None
202
- assert explanation == 'No feedback was found to process'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/unit/resolver/gitlab/test_gitlab_issue_handler.py DELETED
@@ -1,683 +0,0 @@
1
- from unittest.mock import MagicMock, patch
2
-
3
- from openhands.core.config import LLMConfig
4
- from openhands.resolver.interfaces.gitlab import GitlabIssueHandler, GitlabPRHandler
5
- from openhands.resolver.interfaces.issue import ReviewThread
6
- from openhands.resolver.interfaces.issue_definitions import (
7
- ServiceContextIssue,
8
- ServiceContextPR,
9
- )
10
-
11
-
12
- def test_get_converted_issues_initializes_review_comments():
13
- # Mock the necessary dependencies
14
- with patch('httpx.get') as mock_get:
15
- # Mock the response for issues
16
- mock_issues_response = MagicMock()
17
- mock_issues_response.json.return_value = [
18
- {'iid': 1, 'title': 'Test Issue', 'description': 'Test Body'}
19
- ]
20
- # Mock the response for comments
21
- mock_comments_response = MagicMock()
22
- mock_comments_response.json.return_value = []
23
-
24
- # Set up the mock to return different responses for different calls
25
- # First call is for issues, second call is for comments
26
- mock_get.side_effect = [
27
- mock_issues_response,
28
- mock_comments_response,
29
- mock_comments_response,
30
- ] # Need two comment responses because we make two API calls
31
-
32
- # Create an instance of IssueHandler
33
- llm_config = LLMConfig(model='test', api_key='test')
34
- handler = ServiceContextIssue(
35
- GitlabIssueHandler('test-owner', 'test-repo', 'test-token'), llm_config
36
- )
37
-
38
- # Get converted issues
39
- issues = handler.get_converted_issues(issue_numbers=[1])
40
-
41
- # Verify that we got exactly one issue
42
- assert len(issues) == 1
43
-
44
- # Verify that review_comments is initialized as None
45
- assert issues[0].review_comments is None
46
-
47
- # Verify other fields are set correctly
48
- assert issues[0].number == 1
49
- assert issues[0].title == 'Test Issue'
50
- assert issues[0].body == 'Test Body'
51
- assert issues[0].owner == 'test-owner'
52
- assert issues[0].repo == 'test-repo'
53
-
54
-
55
- def test_get_converted_issues_handles_empty_body():
56
- # Mock the necessary dependencies
57
- with patch('httpx.get') as mock_get:
58
- # Mock the response for issues
59
- mock_issues_response = MagicMock()
60
- mock_issues_response.json.return_value = [
61
- {'iid': 1, 'title': 'Test Issue', 'description': None}
62
- ]
63
- # Mock the response for comments
64
- mock_comments_response = MagicMock()
65
- mock_comments_response.json.return_value = []
66
- # Set up the mock to return different responses
67
- mock_get.side_effect = [
68
- mock_issues_response,
69
- mock_comments_response,
70
- mock_comments_response,
71
- ]
72
-
73
- # Create an instance of IssueHandler
74
- llm_config = LLMConfig(model='test', api_key='test')
75
- handler = ServiceContextIssue(
76
- GitlabIssueHandler('test-owner', 'test-repo', 'test-token'), llm_config
77
- )
78
-
79
- # Get converted issues
80
- issues = handler.get_converted_issues(issue_numbers=[1])
81
-
82
- # Verify that we got exactly one issue
83
- assert len(issues) == 1
84
-
85
- # Verify that body is empty string when None
86
- assert issues[0].body == ''
87
-
88
- # Verify other fields are set correctly
89
- assert issues[0].number == 1
90
- assert issues[0].title == 'Test Issue'
91
- assert issues[0].owner == 'test-owner'
92
- assert issues[0].repo == 'test-repo'
93
-
94
- # Verify that review_comments is initialized as None
95
- assert issues[0].review_comments is None
96
-
97
-
98
- def test_pr_handler_get_converted_issues_with_comments():
99
- # Mock the necessary dependencies
100
- with patch('httpx.get') as mock_get:
101
- # Mock the response for PRs
102
- mock_prs_response = MagicMock()
103
- mock_prs_response.json.return_value = [
104
- {
105
- 'iid': 1,
106
- 'title': 'Test PR',
107
- 'description': 'Test Body fixes #1',
108
- 'source_branch': 'test-branch',
109
- }
110
- ]
111
-
112
- # Mock the response for PR comments
113
- mock_comments_response = MagicMock()
114
- mock_comments_response.json.return_value = [
115
- {'body': 'First comment', 'resolvable': True, 'system': False},
116
- {'body': 'Second comment', 'resolvable': True, 'system': False},
117
- ]
118
-
119
- # Mock the response for PR metadata (GraphQL)
120
- mock_graphql_response = MagicMock()
121
- mock_graphql_response.json.return_value = {
122
- 'data': {
123
- 'project': {
124
- 'mergeRequest': {
125
- 'discussions': {'edges': []},
126
- }
127
- }
128
- }
129
- }
130
-
131
- # Set up the mock to return different responses
132
- # We need to return empty responses for subsequent pages
133
- mock_empty_response = MagicMock()
134
- mock_empty_response.json.return_value = []
135
-
136
- # Mock the response for fetching the external issue referenced in PR body
137
- mock_external_issue_response = MagicMock()
138
- mock_external_issue_response.json.return_value = {
139
- 'description': 'This is additional context from an externally referenced issue.'
140
- }
141
-
142
- mock_get.side_effect = [
143
- mock_prs_response, # First call for PRs
144
- mock_empty_response, # Second call for PRs (empty page)
145
- mock_empty_response, # Third call for related issues
146
- mock_comments_response, # Fourth call for PR comments
147
- mock_empty_response, # Fifth call for PR comments (empty page)
148
- mock_external_issue_response, # Mock response for the external issue reference #1
149
- ]
150
-
151
- # Mock the post request for GraphQL
152
- with patch('httpx.post') as mock_post:
153
- mock_post.return_value = mock_graphql_response
154
-
155
- # Create an instance of PRHandler
156
- llm_config = LLMConfig(model='test', api_key='test')
157
- handler = ServiceContextPR(
158
- GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
159
- )
160
-
161
- # Get converted issues
162
- prs = handler.get_converted_issues(issue_numbers=[1])
163
-
164
- # Verify that we got exactly one PR
165
- assert len(prs) == 1
166
-
167
- # Verify that thread_comments are set correctly
168
- assert prs[0].thread_comments == ['First comment', 'Second comment']
169
-
170
- # Verify other fields are set correctly
171
- assert prs[0].number == 1
172
- assert prs[0].title == 'Test PR'
173
- assert prs[0].body == 'Test Body fixes #1'
174
- assert prs[0].owner == 'test-owner'
175
- assert prs[0].repo == 'test-repo'
176
- assert prs[0].head_branch == 'test-branch'
177
- assert prs[0].closing_issues == [
178
- 'This is additional context from an externally referenced issue.'
179
- ]
180
-
181
-
182
- def test_get_issue_comments_with_specific_comment_id():
183
- # Mock the necessary dependencies
184
- with patch('httpx.get') as mock_get:
185
- # Mock the response for comments
186
- mock_comments_response = MagicMock()
187
- mock_comments_response.json.return_value = [
188
- {'id': 123, 'body': 'First comment', 'resolvable': True, 'system': False},
189
- {'id': 456, 'body': 'Second comment', 'resolvable': True, 'system': False},
190
- ]
191
-
192
- mock_get.return_value = mock_comments_response
193
-
194
- # Create an instance of IssueHandler
195
- llm_config = LLMConfig(model='test', api_key='test')
196
- handler = ServiceContextIssue(
197
- GitlabIssueHandler('test-owner', 'test-repo', 'test-token'), llm_config
198
- )
199
-
200
- # Get comments with a specific comment_id
201
- specific_comment = handler.get_issue_comments(issue_number=1, comment_id=123)
202
-
203
- # Verify only the specific comment is returned
204
- assert specific_comment == ['First comment']
205
-
206
-
207
- def test_pr_handler_get_converted_issues_with_specific_thread_comment():
208
- # Define the specific comment_id to filter
209
- specific_comment_id = 123
210
-
211
- # Mock GraphQL response for review threads
212
- with patch('httpx.get') as mock_get:
213
- # Mock the response for PRs
214
- mock_prs_response = MagicMock()
215
- mock_prs_response.json.return_value = [
216
- {
217
- 'iid': 1,
218
- 'title': 'Test PR',
219
- 'description': 'Test Body',
220
- 'source_branch': 'test-branch',
221
- }
222
- ]
223
-
224
- # Mock the response for PR comments
225
- mock_comments_response = MagicMock()
226
- mock_comments_response.json.return_value = [
227
- {'body': 'First comment', 'id': 123, 'resolvable': True, 'system': False},
228
- {'body': 'Second comment', 'id': 124, 'resolvable': True, 'system': False},
229
- ]
230
-
231
- # Mock the response for PR metadata (GraphQL)
232
- mock_graphql_response = MagicMock()
233
- mock_graphql_response.json.return_value = {
234
- 'data': {
235
- 'project': {
236
- 'mergeRequest': {
237
- 'discussions': {
238
- 'edges': [
239
- {
240
- 'node': {
241
- 'id': 'review-thread-1',
242
- 'resolved': False,
243
- 'resolvable': True,
244
- 'notes': {
245
- 'nodes': [
246
- {
247
- 'id': 'GID/121',
248
- 'body': 'Specific review comment',
249
- 'position': {
250
- 'filePath': 'file1.txt',
251
- },
252
- },
253
- {
254
- 'id': 'GID/456',
255
- 'body': 'Another review comment',
256
- 'position': {
257
- 'filePath': 'file2.txt',
258
- },
259
- },
260
- ]
261
- },
262
- }
263
- }
264
- ]
265
- },
266
- }
267
- }
268
- }
269
- }
270
-
271
- # Set up the mock to return different responses
272
- # We need to return empty responses for subsequent pages
273
- mock_empty_response = MagicMock()
274
- mock_empty_response.json.return_value = []
275
-
276
- mock_get.side_effect = [
277
- mock_prs_response, # First call for PRs
278
- mock_empty_response, # Second call for PRs (empty page)
279
- mock_empty_response, # Third call for related issues
280
- mock_comments_response, # Fourth call for PR comments
281
- mock_empty_response, # Fifth call for PR comments (empty page)
282
- ]
283
-
284
- # Mock the post request for GraphQL
285
- with patch('httpx.post') as mock_post:
286
- mock_post.return_value = mock_graphql_response
287
-
288
- # Create an instance of PRHandler
289
- llm_config = LLMConfig(model='test', api_key='test')
290
- handler = ServiceContextPR(
291
- GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
292
- )
293
-
294
- # Get converted issues
295
- prs = handler.get_converted_issues(
296
- issue_numbers=[1], comment_id=specific_comment_id
297
- )
298
-
299
- # Verify that we got exactly one PR
300
- assert len(prs) == 1
301
-
302
- # Verify that thread_comments are set correctly
303
- assert prs[0].thread_comments == ['First comment']
304
- assert prs[0].review_comments is None
305
- assert prs[0].review_threads == []
306
-
307
- # Verify other fields are set correctly
308
- assert prs[0].number == 1
309
- assert prs[0].title == 'Test PR'
310
- assert prs[0].body == 'Test Body'
311
- assert prs[0].owner == 'test-owner'
312
- assert prs[0].repo == 'test-repo'
313
- assert prs[0].head_branch == 'test-branch'
314
-
315
-
316
- def test_pr_handler_get_converted_issues_with_specific_review_thread_comment():
317
- # Define the specific comment_id to filter
318
- specific_comment_id = 123
319
-
320
- # Mock GraphQL response for review threads
321
- with patch('httpx.get') as mock_get:
322
- # Mock the response for PRs
323
- mock_prs_response = MagicMock()
324
- mock_prs_response.json.return_value = [
325
- {
326
- 'iid': 1,
327
- 'title': 'Test PR',
328
- 'description': 'Test Body',
329
- 'source_branch': 'test-branch',
330
- }
331
- ]
332
-
333
- # Mock the response for PR comments
334
- mock_comments_response = MagicMock()
335
- mock_comments_response.json.return_value = [
336
- {
337
- 'description': 'First comment',
338
- 'id': 120,
339
- 'resolvable': True,
340
- 'system': False,
341
- },
342
- {
343
- 'description': 'Second comment',
344
- 'id': 124,
345
- 'resolvable': True,
346
- 'system': False,
347
- },
348
- ]
349
-
350
- # Mock the response for PR metadata (GraphQL)
351
- mock_graphql_response = MagicMock()
352
- mock_graphql_response.json.return_value = {
353
- 'data': {
354
- 'project': {
355
- 'mergeRequest': {
356
- 'discussions': {
357
- 'edges': [
358
- {
359
- 'node': {
360
- 'id': 'review-thread-1',
361
- 'resolved': False,
362
- 'resolvable': True,
363
- 'notes': {
364
- 'nodes': [
365
- {
366
- 'id': f'GID/{specific_comment_id}',
367
- 'body': 'Specific review comment',
368
- 'position': {
369
- 'filePath': 'file1.txt',
370
- },
371
- },
372
- {
373
- 'id': 'GID/456',
374
- 'body': 'Another review comment',
375
- 'position': {
376
- 'filePath': 'file1.txt',
377
- },
378
- },
379
- ]
380
- },
381
- }
382
- }
383
- ]
384
- },
385
- }
386
- }
387
- }
388
- }
389
-
390
- # Set up the mock to return different responses
391
- # We need to return empty responses for subsequent pages
392
- mock_empty_response = MagicMock()
393
- mock_empty_response.json.return_value = []
394
-
395
- mock_get.side_effect = [
396
- mock_prs_response, # First call for PRs
397
- mock_empty_response, # Second call for PRs (empty page)
398
- mock_empty_response, # Third call for related issues
399
- mock_comments_response, # Fourth call for PR comments
400
- mock_empty_response, # Fifth call for PR comments (empty page)
401
- ]
402
-
403
- # Mock the post request for GraphQL
404
- with patch('httpx.post') as mock_post:
405
- mock_post.return_value = mock_graphql_response
406
-
407
- # Create an instance of PRHandler
408
- llm_config = LLMConfig(model='test', api_key='test')
409
- handler = ServiceContextPR(
410
- GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
411
- )
412
-
413
- # Get converted issues
414
- prs = handler.get_converted_issues(
415
- issue_numbers=[1], comment_id=specific_comment_id
416
- )
417
-
418
- # Verify that we got exactly one PR
419
- assert len(prs) == 1
420
-
421
- # Verify that thread_comments are set correctly
422
- assert prs[0].thread_comments is None
423
- assert prs[0].review_comments is None
424
- assert len(prs[0].review_threads) == 1
425
- assert isinstance(prs[0].review_threads[0], ReviewThread)
426
- assert (
427
- prs[0].review_threads[0].comment
428
- == 'Specific review comment\n---\nlatest feedback:\nAnother review comment\n'
429
- )
430
- assert prs[0].review_threads[0].files == ['file1.txt']
431
-
432
- # Verify other fields are set correctly
433
- assert prs[0].number == 1
434
- assert prs[0].title == 'Test PR'
435
- assert prs[0].body == 'Test Body'
436
- assert prs[0].owner == 'test-owner'
437
- assert prs[0].repo == 'test-repo'
438
- assert prs[0].head_branch == 'test-branch'
439
-
440
-
441
- def test_pr_handler_get_converted_issues_with_specific_comment_and_issue_refs():
442
- # Define the specific comment_id to filter
443
- specific_comment_id = 123
444
-
445
- # Mock GraphQL response for review threads
446
- with patch('httpx.get') as mock_get:
447
- # Mock the response for PRs
448
- mock_prs_response = MagicMock()
449
- mock_prs_response.json.return_value = [
450
- {
451
- 'iid': 1,
452
- 'title': 'Test PR fixes #3',
453
- 'description': 'Test Body',
454
- 'source_branch': 'test-branch',
455
- }
456
- ]
457
-
458
- # Mock the response for PR comments
459
- mock_comments_response = MagicMock()
460
- mock_comments_response.json.return_value = [
461
- {
462
- 'description': 'First comment',
463
- 'id': 120,
464
- 'resolvable': True,
465
- 'system': False,
466
- },
467
- {
468
- 'description': 'Second comment',
469
- 'id': 124,
470
- 'resolvable': True,
471
- 'system': False,
472
- },
473
- ]
474
-
475
- # Mock the response for PR metadata (GraphQL)
476
- mock_graphql_response = MagicMock()
477
- mock_graphql_response.json.return_value = {
478
- 'data': {
479
- 'project': {
480
- 'mergeRequest': {
481
- 'discussions': {
482
- 'edges': [
483
- {
484
- 'node': {
485
- 'id': 'review-thread-1',
486
- 'resolved': False,
487
- 'resolvable': True,
488
- 'notes': {
489
- 'nodes': [
490
- {
491
- 'id': f'GID/{specific_comment_id}',
492
- 'body': 'Specific review comment that references #6',
493
- 'position': {
494
- 'filePath': 'file1.txt',
495
- },
496
- },
497
- {
498
- 'id': 'GID/456',
499
- 'body': 'Another review comment referencing #7',
500
- 'position': {
501
- 'filePath': 'file2.txt',
502
- },
503
- },
504
- ]
505
- },
506
- }
507
- }
508
- ]
509
- },
510
- }
511
- }
512
- }
513
- }
514
-
515
- # Set up the mock to return different responses
516
- # We need to return empty responses for subsequent pages
517
- mock_empty_response = MagicMock()
518
- mock_empty_response.json.return_value = []
519
-
520
- # Mock the response for fetching the external issue referenced in PR body
521
- mock_external_issue_response_in_body = MagicMock()
522
- mock_external_issue_response_in_body.json.return_value = {
523
- 'description': 'External context #1.'
524
- }
525
-
526
- # Mock the response for fetching the external issue referenced in review thread
527
- mock_external_issue_response_review_thread = MagicMock()
528
- mock_external_issue_response_review_thread.json.return_value = {
529
- 'description': 'External context #2.'
530
- }
531
-
532
- mock_get.side_effect = [
533
- mock_prs_response, # First call for PRs
534
- mock_empty_response, # Second call for PRs (empty page)
535
- mock_empty_response, # Third call for related issues
536
- mock_comments_response, # Fourth call for PR comments
537
- mock_empty_response, # Fifth call for PR comments (empty page)
538
- mock_external_issue_response_in_body,
539
- mock_external_issue_response_review_thread,
540
- ]
541
-
542
- # Mock the post request for GraphQL
543
- with patch('httpx.post') as mock_post:
544
- mock_post.return_value = mock_graphql_response
545
-
546
- # Create an instance of PRHandler
547
- llm_config = LLMConfig(model='test', api_key='test')
548
- handler = ServiceContextPR(
549
- GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
550
- )
551
-
552
- # Get converted issues
553
- prs = handler.get_converted_issues(
554
- issue_numbers=[1], comment_id=specific_comment_id
555
- )
556
-
557
- # Verify that we got exactly one PR
558
- assert len(prs) == 1
559
-
560
- # Verify that thread_comments are set correctly
561
- assert prs[0].thread_comments is None
562
- assert prs[0].review_comments is None
563
- assert len(prs[0].review_threads) == 1
564
- assert isinstance(prs[0].review_threads[0], ReviewThread)
565
- assert (
566
- prs[0].review_threads[0].comment
567
- == 'Specific review comment that references #6\n---\nlatest feedback:\nAnother review comment referencing #7\n'
568
- )
569
- assert prs[0].closing_issues == [
570
- 'External context #1.',
571
- 'External context #2.',
572
- ] # Only includes references inside comment ID and body PR
573
-
574
- # Verify other fields are set correctly
575
- assert prs[0].number == 1
576
- assert prs[0].title == 'Test PR fixes #3'
577
- assert prs[0].body == 'Test Body'
578
- assert prs[0].owner == 'test-owner'
579
- assert prs[0].repo == 'test-repo'
580
- assert prs[0].head_branch == 'test-branch'
581
-
582
-
583
- def test_pr_handler_get_converted_issues_with_duplicate_issue_refs():
584
- # Mock the necessary dependencies
585
- with patch('httpx.get') as mock_get:
586
- # Mock the response for PRs
587
- mock_prs_response = MagicMock()
588
- mock_prs_response.json.return_value = [
589
- {
590
- 'iid': 1,
591
- 'title': 'Test PR',
592
- 'description': 'Test Body fixes #1',
593
- 'source_branch': 'test-branch',
594
- }
595
- ]
596
-
597
- # Mock the response for PR comments
598
- mock_comments_response = MagicMock()
599
- mock_comments_response.json.return_value = [
600
- {
601
- 'body': 'First comment addressing #1',
602
- 'resolvable': True,
603
- 'system': False,
604
- },
605
- {
606
- 'body': 'Second comment addressing #2',
607
- 'resolvable': True,
608
- 'system': False,
609
- },
610
- ]
611
-
612
- # Mock the response for PR metadata (GraphQL)
613
- mock_graphql_response = MagicMock()
614
- mock_graphql_response.json.return_value = {
615
- 'data': {
616
- 'project': {
617
- 'mergeRequest': {
618
- 'discussions': {'edges': []},
619
- }
620
- }
621
- }
622
- }
623
-
624
- # Set up the mock to return different responses
625
- # We need to return empty responses for subsequent pages
626
- mock_empty_response = MagicMock()
627
- mock_empty_response.json.return_value = []
628
-
629
- # Mock the response for fetching the external issue referenced in PR body
630
- mock_external_issue_response_in_body = MagicMock()
631
- mock_external_issue_response_in_body.json.return_value = {
632
- 'description': 'External context #1.'
633
- }
634
-
635
- # Mock the response for fetching the external issue referenced in review thread
636
- mock_external_issue_response_in_comment = MagicMock()
637
- mock_external_issue_response_in_comment.json.return_value = {
638
- 'description': 'External context #2.'
639
- }
640
-
641
- mock_get.side_effect = [
642
- mock_prs_response, # First call for PRs
643
- mock_empty_response, # Second call for PRs (empty page)
644
- mock_empty_response, # Third call for related issues
645
- mock_comments_response, # Fourth call for PR comments
646
- mock_empty_response, # Fifth call for PR comments (empty page)
647
- mock_external_issue_response_in_body, # Mock response for the external issue reference #1
648
- mock_external_issue_response_in_comment,
649
- ]
650
-
651
- # Mock the post request for GraphQL
652
- with patch('httpx.post') as mock_post:
653
- mock_post.return_value = mock_graphql_response
654
-
655
- # Create an instance of PRHandler
656
- llm_config = LLMConfig(model='test', api_key='test')
657
- handler = ServiceContextPR(
658
- GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
659
- )
660
-
661
- # Get converted issues
662
- prs = handler.get_converted_issues(issue_numbers=[1])
663
-
664
- # Verify that we got exactly one PR
665
- assert len(prs) == 1
666
-
667
- # Verify that thread_comments are set correctly
668
- assert prs[0].thread_comments == [
669
- 'First comment addressing #1',
670
- 'Second comment addressing #2',
671
- ]
672
-
673
- # Verify other fields are set correctly
674
- assert prs[0].number == 1
675
- assert prs[0].title == 'Test PR'
676
- assert prs[0].body == 'Test Body fixes #1'
677
- assert prs[0].owner == 'test-owner'
678
- assert prs[0].repo == 'test-repo'
679
- assert prs[0].head_branch == 'test-branch'
680
- assert prs[0].closing_issues == [
681
- 'External context #1.',
682
- 'External context #2.',
683
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/unit/resolver/gitlab/test_gitlab_issue_handler_error_handling.py DELETED
@@ -1,283 +0,0 @@
1
- from unittest.mock import MagicMock, patch
2
-
3
- import httpx
4
- import pytest
5
- from litellm.exceptions import RateLimitError
6
-
7
- from openhands.core.config import LLMConfig
8
- from openhands.events.action.message import MessageAction
9
- from openhands.llm.llm import LLM
10
- from openhands.resolver.interfaces.gitlab import GitlabIssueHandler, GitlabPRHandler
11
- from openhands.resolver.interfaces.issue import Issue
12
- from openhands.resolver.interfaces.issue_definitions import (
13
- ServiceContextIssue,
14
- ServiceContextPR,
15
- )
16
-
17
-
18
- @pytest.fixture(autouse=True)
19
- def mock_logger(monkeypatch):
20
- # suppress logging of completion data to file
21
- mock_logger = MagicMock()
22
- monkeypatch.setattr('openhands.llm.debug_mixin.llm_prompt_logger', mock_logger)
23
- monkeypatch.setattr('openhands.llm.debug_mixin.llm_response_logger', mock_logger)
24
- return mock_logger
25
-
26
-
27
- @pytest.fixture
28
- def default_config():
29
- return LLMConfig(
30
- model='gpt-4o',
31
- api_key='test_key',
32
- num_retries=2,
33
- retry_min_wait=1,
34
- retry_max_wait=2,
35
- )
36
-
37
-
38
- def test_handle_nonexistent_issue_reference():
39
- llm_config = LLMConfig(model='test', api_key='test')
40
- handler = ServiceContextPR(
41
- GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
42
- )
43
-
44
- # Mock the requests.get to simulate a 404 error
45
- mock_response = MagicMock()
46
- mock_response.raise_for_status.side_effect = httpx.HTTPError(
47
- '404 Client Error: Not Found'
48
- )
49
-
50
- with patch('httpx.get', return_value=mock_response):
51
- # Call the method with a non-existent issue reference
52
- result = handler._strategy.get_context_from_external_issues_references(
53
- closing_issues=[],
54
- closing_issue_numbers=[],
55
- issue_body='This references #999999', # Non-existent issue
56
- review_comments=[],
57
- review_threads=[],
58
- thread_comments=None,
59
- )
60
-
61
- # The method should return an empty list since the referenced issue couldn't be fetched
62
- assert result == []
63
-
64
-
65
- def test_handle_rate_limit_error():
66
- llm_config = LLMConfig(model='test', api_key='test')
67
- handler = ServiceContextPR(
68
- GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
69
- )
70
-
71
- # Mock the requests.get to simulate a rate limit error
72
- mock_response = MagicMock()
73
- mock_response.raise_for_status.side_effect = httpx.HTTPError(
74
- '403 Client Error: Rate Limit Exceeded'
75
- )
76
-
77
- with patch('httpx.get', return_value=mock_response):
78
- # Call the method with an issue reference
79
- result = handler._strategy.get_context_from_external_issues_references(
80
- closing_issues=[],
81
- closing_issue_numbers=[],
82
- issue_body='This references #123',
83
- review_comments=[],
84
- review_threads=[],
85
- thread_comments=None,
86
- )
87
-
88
- # The method should return an empty list since the request was rate limited
89
- assert result == []
90
-
91
-
92
- def test_handle_network_error():
93
- llm_config = LLMConfig(model='test', api_key='test')
94
- handler = ServiceContextPR(
95
- GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
96
- )
97
-
98
- # Mock the requests.get to simulate a network error
99
- with patch('httpx.get', side_effect=httpx.NetworkError('Network Error')):
100
- # Call the method with an issue reference
101
- result = handler._strategy.get_context_from_external_issues_references(
102
- closing_issues=[],
103
- closing_issue_numbers=[],
104
- issue_body='This references #123',
105
- review_comments=[],
106
- review_threads=[],
107
- thread_comments=None,
108
- )
109
-
110
- # The method should return an empty list since the network request failed
111
- assert result == []
112
-
113
-
114
- def test_successful_issue_reference():
115
- llm_config = LLMConfig(model='test', api_key='test')
116
- handler = ServiceContextPR(
117
- GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
118
- )
119
-
120
- # Mock a successful response
121
- mock_response = MagicMock()
122
- mock_response.raise_for_status.return_value = None
123
- mock_response.json.return_value = {
124
- 'description': 'This is the referenced issue body'
125
- }
126
-
127
- with patch('httpx.get', return_value=mock_response):
128
- # Call the method with an issue reference
129
- result = handler._strategy.get_context_from_external_issues_references(
130
- closing_issues=[],
131
- closing_issue_numbers=[],
132
- issue_body='This references #123',
133
- review_comments=[],
134
- review_threads=[],
135
- thread_comments=None,
136
- )
137
-
138
- # The method should return a list with the referenced issue body
139
- assert result == ['This is the referenced issue body']
140
-
141
-
142
- class MockLLMResponse:
143
- """Mock LLM Response class to mimic the actual LLM response structure."""
144
-
145
- class Choice:
146
- class Message:
147
- def __init__(self, content):
148
- self.content = content
149
-
150
- def __init__(self, content):
151
- self.message = self.Message(content)
152
-
153
- def __init__(self, content):
154
- self.choices = [self.Choice(content)]
155
-
156
-
157
- class DotDict(dict):
158
- """
159
- A dictionary that supports dot notation access.
160
- """
161
-
162
- def __init__(self, *args, **kwargs):
163
- super().__init__(*args, **kwargs)
164
- for key, value in self.items():
165
- if isinstance(value, dict):
166
- self[key] = DotDict(value)
167
- elif isinstance(value, list):
168
- self[key] = [
169
- DotDict(item) if isinstance(item, dict) else item for item in value
170
- ]
171
-
172
- def __getattr__(self, key):
173
- if key in self:
174
- return self[key]
175
- else:
176
- raise AttributeError(
177
- f"'{self.__class__.__name__}' object has no attribute '{key}'"
178
- )
179
-
180
- def __setattr__(self, key, value):
181
- self[key] = value
182
-
183
- def __delattr__(self, key):
184
- if key in self:
185
- del self[key]
186
- else:
187
- raise AttributeError(
188
- f"'{self.__class__.__name__}' object has no attribute '{key}'"
189
- )
190
-
191
-
192
- @patch('openhands.llm.llm.litellm_completion')
193
- def test_guess_success_rate_limit_wait_time(mock_litellm_completion, default_config):
194
- """Test that the retry mechanism in guess_success respects wait time between retries."""
195
-
196
- with patch('time.sleep') as mock_sleep:
197
- # Simulate a rate limit error followed by a successful response
198
- mock_litellm_completion.side_effect = [
199
- RateLimitError(
200
- 'Rate limit exceeded', llm_provider='test_provider', model='test_model'
201
- ),
202
- DotDict(
203
- {
204
- 'choices': [
205
- {
206
- 'message': {
207
- 'content': '--- success\ntrue\n--- explanation\nRetry successful'
208
- }
209
- }
210
- ]
211
- }
212
- ),
213
- ]
214
-
215
- llm = LLM(config=default_config)
216
- handler = ServiceContextIssue(
217
- GitlabIssueHandler('test-owner', 'test-repo', 'test-token'), default_config
218
- )
219
- handler.llm = llm
220
-
221
- # Mock issue and history
222
- issue = Issue(
223
- owner='test-owner',
224
- repo='test-repo',
225
- number=1,
226
- title='Test Issue',
227
- body='This is a test issue.',
228
- thread_comments=['Please improve error handling'],
229
- )
230
- history = [MessageAction(content='Fixed error handling.')]
231
-
232
- # Call guess_success
233
- success, _, explanation = handler.guess_success(issue, history)
234
-
235
- # Assertions
236
- assert success is True
237
- assert explanation == 'Retry successful'
238
- assert mock_litellm_completion.call_count == 2 # Two attempts made
239
- mock_sleep.assert_called_once() # Sleep called once between retries
240
-
241
- # Validate wait time
242
- wait_time = mock_sleep.call_args[0][0]
243
- assert (
244
- default_config.retry_min_wait <= wait_time <= default_config.retry_max_wait
245
- ), (
246
- f'Expected wait time between {default_config.retry_min_wait} and {default_config.retry_max_wait} seconds, but got {wait_time}'
247
- )
248
-
249
-
250
- @patch('openhands.llm.llm.litellm_completion')
251
- def test_guess_success_exhausts_retries(mock_completion, default_config):
252
- """Test the retry mechanism in guess_success exhausts retries and raises an error."""
253
- # Simulate persistent rate limit errors by always raising RateLimitError
254
- mock_completion.side_effect = RateLimitError(
255
- 'Rate limit exceeded', llm_provider='test_provider', model='test_model'
256
- )
257
-
258
- # Initialize LLM and handler
259
- llm = LLM(config=default_config)
260
- handler = ServiceContextPR(
261
- GitlabPRHandler('test-owner', 'test-repo', 'test-token'), default_config
262
- )
263
- handler.llm = llm
264
-
265
- # Mock issue and history
266
- issue = Issue(
267
- owner='test-owner',
268
- repo='test-repo',
269
- number=1,
270
- title='Test Issue',
271
- body='This is a test issue.',
272
- thread_comments=['Please improve error handling'],
273
- )
274
- history = [MessageAction(content='Fixed error handling.')]
275
-
276
- # Call guess_success and expect it to raise an error after retries
277
- with pytest.raises(RateLimitError):
278
- handler.guess_success(issue, history)
279
-
280
- # Assertions
281
- assert (
282
- mock_completion.call_count == default_config.num_retries
283
- ) # Initial call + retries
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/unit/resolver/gitlab/test_gitlab_pr_handler_guess_success.py DELETED
@@ -1,672 +0,0 @@
1
- import json
2
- from unittest.mock import MagicMock, patch
3
-
4
- import pytest
5
-
6
- from openhands.core.config import LLMConfig
7
- from openhands.events.action.message import MessageAction
8
- from openhands.llm.llm import LLM
9
- from openhands.resolver.interfaces.gitlab import GitlabPRHandler
10
- from openhands.resolver.interfaces.issue import Issue, ReviewThread
11
- from openhands.resolver.interfaces.issue_definitions import ServiceContextPR
12
-
13
-
14
- @pytest.fixture
15
- def pr_handler():
16
- llm_config = LLMConfig(model='test-model')
17
- handler = ServiceContextPR(
18
- GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
19
- )
20
- return handler
21
-
22
-
23
- @pytest.fixture
24
- def mock_llm_success_response():
25
- return MagicMock(
26
- choices=[
27
- MagicMock(
28
- message=MagicMock(
29
- content="""--- success
30
- true
31
-
32
- --- explanation
33
- The changes look good"""
34
- )
35
- )
36
- ]
37
- )
38
-
39
-
40
- def test_guess_success_review_threads_litellm_call():
41
- """Test that the completion() call for review threads contains the expected content."""
42
- # Create a PR handler instance
43
- llm_config = LLMConfig(model='test', api_key='test')
44
- handler = ServiceContextPR(
45
- GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
46
- )
47
-
48
- # Create a mock issue with review threads
49
- issue = Issue(
50
- owner='test-owner',
51
- repo='test-repo',
52
- number=1,
53
- title='Test PR',
54
- body='Test Body',
55
- thread_comments=None,
56
- closing_issues=['Issue 1 description', 'Issue 2 description'],
57
- review_comments=None,
58
- review_threads=[
59
- ReviewThread(
60
- comment='Please fix the formatting\n---\nlatest feedback:\nAdd docstrings',
61
- files=['/src/file1.py', '/src/file2.py'],
62
- ),
63
- ReviewThread(
64
- comment='Add more tests\n---\nlatest feedback:\nAdd test cases',
65
- files=['/tests/test_file.py'],
66
- ),
67
- ],
68
- thread_ids=['1', '2'],
69
- head_branch='test-branch',
70
- )
71
-
72
- # Create mock history with a detailed response
73
- history = [
74
- MessageAction(
75
- content="""I have made the following changes:
76
- 1. Fixed formatting in file1.py and file2.py
77
- 2. Added docstrings to all functions
78
- 3. Added test cases in test_file.py"""
79
- )
80
- ]
81
-
82
- # Create mock LLM config
83
- llm_config = LLMConfig(model='test-model', api_key='test-key')
84
-
85
- # Mock the LLM response
86
- mock_response = MagicMock()
87
- mock_response.choices = [
88
- MagicMock(
89
- message=MagicMock(
90
- content="""--- success
91
- true
92
-
93
- --- explanation
94
- The changes successfully address the feedback."""
95
- )
96
- )
97
- ]
98
-
99
- # Test the guess_success method
100
- with patch.object(LLM, 'completion') as mock_completion:
101
- mock_completion.return_value = mock_response
102
- success, success_list, explanation = handler.guess_success(issue, history)
103
-
104
- # Verify the completion() calls
105
- assert mock_completion.call_count == 2 # One call per review thread
106
-
107
- # Check first call
108
- first_call = mock_completion.call_args_list[0]
109
- first_prompt = first_call[1]['messages'][0]['content']
110
- assert (
111
- 'Issue descriptions:\n'
112
- + json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
113
- in first_prompt
114
- )
115
- assert (
116
- 'Feedback:\nPlease fix the formatting\n---\nlatest feedback:\nAdd docstrings'
117
- in first_prompt
118
- )
119
- assert (
120
- 'Files locations:\n'
121
- + json.dumps(['/src/file1.py', '/src/file2.py'], indent=4)
122
- in first_prompt
123
- )
124
- assert 'Last message from AI agent:\n' + history[0].content in first_prompt
125
-
126
- # Check second call
127
- second_call = mock_completion.call_args_list[1]
128
- second_prompt = second_call[1]['messages'][0]['content']
129
- assert (
130
- 'Issue descriptions:\n'
131
- + json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
132
- in second_prompt
133
- )
134
- assert (
135
- 'Feedback:\nAdd more tests\n---\nlatest feedback:\nAdd test cases'
136
- in second_prompt
137
- )
138
- assert (
139
- 'Files locations:\n' + json.dumps(['/tests/test_file.py'], indent=4)
140
- in second_prompt
141
- )
142
- assert 'Last message from AI agent:\n' + history[0].content in second_prompt
143
-
144
- assert len(json.loads(explanation)) == 2
145
-
146
-
147
- def test_guess_success_thread_comments_litellm_call():
148
- """Test that the completion() call for thread comments contains the expected content."""
149
- # Create a PR handler instance
150
- llm_config = LLMConfig(model='test', api_key='test')
151
- handler = ServiceContextPR(
152
- GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
153
- )
154
-
155
- # Create a mock issue with thread comments
156
- issue = Issue(
157
- owner='test-owner',
158
- repo='test-repo',
159
- number=1,
160
- title='Test PR',
161
- body='Test Body',
162
- thread_comments=[
163
- 'Please improve error handling',
164
- 'Add input validation',
165
- 'latest feedback:\nHandle edge cases',
166
- ],
167
- closing_issues=['Issue 1 description', 'Issue 2 description'],
168
- review_comments=None,
169
- thread_ids=None,
170
- head_branch='test-branch',
171
- )
172
-
173
- # Create mock history with a detailed response
174
- history = [
175
- MessageAction(
176
- content="""I have made the following changes:
177
- 1. Added try/catch blocks for error handling
178
- 2. Added input validation checks
179
- 3. Added handling for edge cases"""
180
- )
181
- ]
182
-
183
- # Create mock LLM config
184
- llm_config = LLMConfig(model='test-model', api_key='test-key')
185
-
186
- # Mock the LLM response
187
- mock_response = MagicMock()
188
- mock_response.choices = [
189
- MagicMock(
190
- message=MagicMock(
191
- content="""--- success
192
- true
193
-
194
- --- explanation
195
- The changes successfully address the feedback."""
196
- )
197
- )
198
- ]
199
-
200
- # Test the guess_success method
201
- with patch.object(LLM, 'completion') as mock_completion:
202
- mock_completion.return_value = mock_response
203
- success, success_list, explanation = handler.guess_success(issue, history)
204
-
205
- # Verify the completion() call
206
- mock_completion.assert_called_once()
207
- call_args = mock_completion.call_args
208
- prompt = call_args[1]['messages'][0]['content']
209
-
210
- # Check prompt content
211
- assert (
212
- 'Issue descriptions:\n'
213
- + json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
214
- in prompt
215
- )
216
- assert 'PR Thread Comments:\n' + '\n---\n'.join(issue.thread_comments) in prompt
217
- assert 'Last message from AI agent:\n' + history[0].content in prompt
218
-
219
- assert len(json.loads(explanation)) == 1
220
-
221
-
222
- def test_check_feedback_with_llm():
223
- """Test the _check_feedback_with_llm helper function."""
224
- # Create a PR handler instance
225
- llm_config = LLMConfig(model='test', api_key='test')
226
- handler = ServiceContextPR(
227
- GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
228
- )
229
-
230
- # Test cases for different LLM responses
231
- test_cases = [
232
- {
233
- 'response': '--- success\ntrue\n--- explanation\nChanges look good',
234
- 'expected': (True, 'Changes look good'),
235
- },
236
- {
237
- 'response': '--- success\nfalse\n--- explanation\nNot all issues fixed',
238
- 'expected': (False, 'Not all issues fixed'),
239
- },
240
- {
241
- 'response': 'Invalid response format',
242
- 'expected': (
243
- False,
244
- 'Failed to decode answer from LLM response: Invalid response format',
245
- ),
246
- },
247
- {
248
- 'response': '--- success\ntrue\n--- explanation\nMultiline\nexplanation\nhere',
249
- 'expected': (True, 'Multiline\nexplanation\nhere'),
250
- },
251
- ]
252
-
253
- for case in test_cases:
254
- # Mock the LLM response
255
- mock_response = MagicMock()
256
- mock_response.choices = [MagicMock(message=MagicMock(content=case['response']))]
257
-
258
- # Test the function
259
- with patch.object(LLM, 'completion', return_value=mock_response):
260
- success, explanation = handler._check_feedback_with_llm('test prompt')
261
- assert (success, explanation) == case['expected']
262
-
263
-
264
- def test_check_review_thread_with_git_patch():
265
- """Test that git patch from complete_runtime is included in the prompt."""
266
- # Create a PR handler instance
267
- llm_config = LLMConfig(model='test', api_key='test')
268
- handler = ServiceContextPR(
269
- GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
270
- )
271
-
272
- # Create test data
273
- review_thread = ReviewThread(
274
- comment='Please fix the formatting\n---\nlatest feedback:\nAdd docstrings',
275
- files=['/src/file1.py', '/src/file2.py'],
276
- )
277
- issues_context = json.dumps(
278
- ['Issue 1 description', 'Issue 2 description'], indent=4
279
- )
280
- last_message = 'I have fixed the formatting and added docstrings'
281
- git_patch = 'diff --git a/src/file1.py b/src/file1.py\n+"""Added docstring."""\n'
282
-
283
- # Mock the LLM response
284
- mock_response = MagicMock()
285
- mock_response.choices = [
286
- MagicMock(
287
- message=MagicMock(
288
- content="""--- success
289
- true
290
-
291
- --- explanation
292
- Changes look good"""
293
- )
294
- )
295
- ]
296
-
297
- # Test the function
298
- with patch.object(LLM, 'completion') as mock_completion:
299
- mock_completion.return_value = mock_response
300
- success, explanation = handler._check_review_thread(
301
- review_thread, issues_context, last_message, git_patch
302
- )
303
-
304
- # Verify the completion() call
305
- mock_completion.assert_called_once()
306
- call_args = mock_completion.call_args
307
- prompt = call_args[1]['messages'][0]['content']
308
-
309
- # Check prompt content
310
- assert 'Issue descriptions:\n' + issues_context in prompt
311
- assert 'Feedback:\n' + review_thread.comment in prompt
312
- assert (
313
- 'Files locations:\n' + json.dumps(review_thread.files, indent=4) in prompt
314
- )
315
- assert 'Last message from AI agent:\n' + last_message in prompt
316
- assert 'Changes made (git patch):\n' + git_patch in prompt
317
-
318
- # Check result
319
- assert success is True
320
- assert explanation == 'Changes look good'
321
-
322
-
323
- def test_check_review_thread():
324
- """Test the _check_review_thread helper function."""
325
- # Create a PR handler instance
326
- llm_config = LLMConfig(model='test', api_key='test')
327
- handler = ServiceContextPR(
328
- GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
329
- )
330
-
331
- # Create test data
332
- review_thread = ReviewThread(
333
- comment='Please fix the formatting\n---\nlatest feedback:\nAdd docstrings',
334
- files=['/src/file1.py', '/src/file2.py'],
335
- )
336
- issues_context = json.dumps(
337
- ['Issue 1 description', 'Issue 2 description'], indent=4
338
- )
339
- last_message = 'I have fixed the formatting and added docstrings'
340
-
341
- # Mock the LLM response
342
- mock_response = MagicMock()
343
- mock_response.choices = [
344
- MagicMock(
345
- message=MagicMock(
346
- content="""--- success
347
- true
348
-
349
- --- explanation
350
- Changes look good"""
351
- )
352
- )
353
- ]
354
-
355
- # Test the function
356
- with patch.object(LLM, 'completion') as mock_completion:
357
- mock_completion.return_value = mock_response
358
- success, explanation = handler._check_review_thread(
359
- review_thread, issues_context, last_message
360
- )
361
-
362
- # Verify the completion() call
363
- mock_completion.assert_called_once()
364
- call_args = mock_completion.call_args
365
- prompt = call_args[1]['messages'][0]['content']
366
-
367
- # Check prompt content
368
- assert 'Issue descriptions:\n' + issues_context in prompt
369
- assert 'Feedback:\n' + review_thread.comment in prompt
370
- assert (
371
- 'Files locations:\n' + json.dumps(review_thread.files, indent=4) in prompt
372
- )
373
- assert 'Last message from AI agent:\n' + last_message in prompt
374
-
375
- # Check result
376
- assert success is True
377
- assert explanation == 'Changes look good'
378
-
379
-
380
- def test_check_thread_comments_with_git_patch():
381
- """Test that git patch from complete_runtime is included in the prompt."""
382
- # Create a PR handler instance
383
- llm_config = LLMConfig(model='test', api_key='test')
384
- handler = ServiceContextPR(
385
- GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
386
- )
387
-
388
- # Create test data
389
- thread_comments = [
390
- 'Please improve error handling',
391
- 'Add input validation',
392
- 'latest feedback:\nHandle edge cases',
393
- ]
394
- issues_context = json.dumps(
395
- ['Issue 1 description', 'Issue 2 description'], indent=4
396
- )
397
- last_message = 'I have added error handling and input validation'
398
- git_patch = 'diff --git a/src/file1.py b/src/file1.py\n+try:\n+ validate_input()\n+except ValueError:\n+ handle_error()\n'
399
-
400
- # Mock the LLM response
401
- mock_response = MagicMock()
402
- mock_response.choices = [
403
- MagicMock(
404
- message=MagicMock(
405
- content="""--- success
406
- true
407
-
408
- --- explanation
409
- Changes look good"""
410
- )
411
- )
412
- ]
413
-
414
- # Test the function
415
- with patch.object(LLM, 'completion') as mock_completion:
416
- mock_completion.return_value = mock_response
417
- success, explanation = handler._check_thread_comments(
418
- thread_comments, issues_context, last_message, git_patch
419
- )
420
-
421
- # Verify the completion() call
422
- mock_completion.assert_called_once()
423
- call_args = mock_completion.call_args
424
- prompt = call_args[1]['messages'][0]['content']
425
-
426
- # Check prompt content
427
- assert 'Issue descriptions:\n' + issues_context in prompt
428
- assert 'PR Thread Comments:\n' + '\n---\n'.join(thread_comments) in prompt
429
- assert 'Last message from AI agent:\n' + last_message in prompt
430
- assert 'Changes made (git patch):\n' + git_patch in prompt
431
-
432
- # Check result
433
- assert success is True
434
- assert explanation == 'Changes look good'
435
-
436
-
437
- def test_check_thread_comments():
438
- """Test the _check_thread_comments helper function."""
439
- # Create a PR handler instance
440
- llm_config = LLMConfig(model='test', api_key='test')
441
- handler = ServiceContextPR(
442
- GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
443
- )
444
-
445
- # Create test data
446
- thread_comments = [
447
- 'Please improve error handling',
448
- 'Add input validation',
449
- 'latest feedback:\nHandle edge cases',
450
- ]
451
- issues_context = json.dumps(
452
- ['Issue 1 description', 'Issue 2 description'], indent=4
453
- )
454
- last_message = 'I have added error handling and input validation'
455
-
456
- # Mock the LLM response
457
- mock_response = MagicMock()
458
- mock_response.choices = [
459
- MagicMock(
460
- message=MagicMock(
461
- content="""--- success
462
- true
463
-
464
- --- explanation
465
- Changes look good"""
466
- )
467
- )
468
- ]
469
-
470
- # Test the function
471
- with patch.object(LLM, 'completion') as mock_completion:
472
- mock_completion.return_value = mock_response
473
- success, explanation = handler._check_thread_comments(
474
- thread_comments, issues_context, last_message
475
- )
476
-
477
- # Verify the completion() call
478
- mock_completion.assert_called_once()
479
- call_args = mock_completion.call_args
480
- prompt = call_args[1]['messages'][0]['content']
481
-
482
- # Check prompt content
483
- assert 'Issue descriptions:\n' + issues_context in prompt
484
- assert 'PR Thread Comments:\n' + '\n---\n'.join(thread_comments) in prompt
485
- assert 'Last message from AI agent:\n' + last_message in prompt
486
-
487
- # Check result
488
- assert success is True
489
- assert explanation == 'Changes look good'
490
-
491
-
492
- def test_check_review_comments_with_git_patch():
493
- """Test that git patch from complete_runtime is included in the prompt."""
494
- # Create a PR handler instance
495
- llm_config = LLMConfig(model='test', api_key='test')
496
- handler = ServiceContextPR(
497
- GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
498
- )
499
-
500
- # Create test data
501
- review_comments = [
502
- 'Please fix the code style',
503
- 'Add more test cases',
504
- 'latest feedback:\nImprove documentation',
505
- ]
506
- issues_context = json.dumps(
507
- ['Issue 1 description', 'Issue 2 description'], indent=4
508
- )
509
- last_message = 'I have fixed the code style and added tests'
510
- git_patch = 'diff --git a/src/file1.py b/src/file1.py\n+"""This module does X."""\n+def func():\n+ """Do Y."""\n'
511
-
512
- # Mock the LLM response
513
- mock_response = MagicMock()
514
- mock_response.choices = [
515
- MagicMock(
516
- message=MagicMock(
517
- content="""--- success
518
- true
519
-
520
- --- explanation
521
- Changes look good"""
522
- )
523
- )
524
- ]
525
-
526
- # Test the function
527
- with patch.object(LLM, 'completion') as mock_completion:
528
- mock_completion.return_value = mock_response
529
- success, explanation = handler._check_review_comments(
530
- review_comments, issues_context, last_message, git_patch
531
- )
532
-
533
- # Verify the completion() call
534
- mock_completion.assert_called_once()
535
- call_args = mock_completion.call_args
536
- prompt = call_args[1]['messages'][0]['content']
537
-
538
- # Check prompt content
539
- assert 'Issue descriptions:\n' + issues_context in prompt
540
- assert 'PR Review Comments:\n' + '\n---\n'.join(review_comments) in prompt
541
- assert 'Last message from AI agent:\n' + last_message in prompt
542
- assert 'Changes made (git patch):\n' + git_patch in prompt
543
-
544
- # Check result
545
- assert success is True
546
- assert explanation == 'Changes look good'
547
-
548
-
549
- def test_check_review_comments():
550
- """Test the _check_review_comments helper function."""
551
- # Create a PR handler instance
552
- llm_config = LLMConfig(model='test', api_key='test')
553
- handler = ServiceContextPR(
554
- GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
555
- )
556
-
557
- # Create test data
558
- review_comments = [
559
- 'Please improve code readability',
560
- 'Add comments to complex functions',
561
- 'Follow PEP 8 style guide',
562
- ]
563
- issues_context = json.dumps(
564
- ['Issue 1 description', 'Issue 2 description'], indent=4
565
- )
566
- last_message = 'I have improved code readability and added comments'
567
-
568
- # Mock the LLM response
569
- mock_response = MagicMock()
570
- mock_response.choices = [
571
- MagicMock(
572
- message=MagicMock(
573
- content="""--- success
574
- true
575
-
576
- --- explanation
577
- Changes look good"""
578
- )
579
- )
580
- ]
581
-
582
- # Test the function
583
- with patch.object(LLM, 'completion') as mock_completion:
584
- mock_completion.return_value = mock_response
585
- success, explanation = handler._check_review_comments(
586
- review_comments, issues_context, last_message
587
- )
588
-
589
- # Verify the completion() call
590
- mock_completion.assert_called_once()
591
- call_args = mock_completion.call_args
592
- prompt = call_args[1]['messages'][0]['content']
593
-
594
- # Check prompt content
595
- assert 'Issue descriptions:\n' + issues_context in prompt
596
- assert 'PR Review Comments:\n' + '\n---\n'.join(review_comments) in prompt
597
- assert 'Last message from AI agent:\n' + last_message in prompt
598
-
599
- # Check result
600
- assert success is True
601
- assert explanation == 'Changes look good'
602
-
603
-
604
- def test_guess_success_review_comments_litellm_call():
605
- """Test that the completion() call for review comments contains the expected content."""
606
- # Create a PR handler instance
607
- llm_config = LLMConfig(model='test', api_key='test')
608
- handler = ServiceContextPR(
609
- GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
610
- )
611
-
612
- # Create a mock issue with review comments
613
- issue = Issue(
614
- owner='test-owner',
615
- repo='test-repo',
616
- number=1,
617
- title='Test PR',
618
- body='Test Body',
619
- thread_comments=None,
620
- closing_issues=['Issue 1 description', 'Issue 2 description'],
621
- review_comments=[
622
- 'Please improve code readability',
623
- 'Add comments to complex functions',
624
- 'Follow PEP 8 style guide',
625
- ],
626
- thread_ids=None,
627
- head_branch='test-branch',
628
- )
629
-
630
- # Create mock history with a detailed response
631
- history = [
632
- MessageAction(
633
- content="""I have made the following changes:
634
- 1. Improved code readability by breaking down complex functions
635
- 2. Added detailed comments to all complex functions
636
- 3. Fixed code style to follow PEP 8"""
637
- )
638
- ]
639
-
640
- # Mock the LLM response
641
- mock_response = MagicMock()
642
- mock_response.choices = [
643
- MagicMock(
644
- message=MagicMock(
645
- content="""--- success
646
- true
647
-
648
- --- explanation
649
- The changes successfully address the feedback."""
650
- )
651
- )
652
- ]
653
-
654
- with patch.object(LLM, 'completion') as mock_completion:
655
- mock_completion.return_value = mock_response
656
- success, success_list, explanation = handler.guess_success(issue, history)
657
-
658
- # Verify the completion() call
659
- mock_completion.assert_called_once()
660
- call_args = mock_completion.call_args
661
- prompt = call_args[1]['messages'][0]['content']
662
-
663
- # Check prompt content
664
- assert (
665
- 'Issue descriptions:\n'
666
- + json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
667
- in prompt
668
- )
669
- assert 'PR Review Comments:\n' + '\n---\n'.join(issue.review_comments) in prompt
670
- assert 'Last message from AI agent:\n' + history[0].content in prompt
671
-
672
- assert len(json.loads(explanation)) == 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/unit/resolver/gitlab/test_gitlab_pr_title_escaping.py DELETED
@@ -1,166 +0,0 @@
1
- import os
2
- import subprocess
3
- import tempfile
4
-
5
- from openhands.core.logger import openhands_logger as logger
6
- from openhands.integrations.service_types import ProviderType
7
- from openhands.resolver.interfaces.issue import Issue
8
- from openhands.resolver.send_pull_request import make_commit, send_pull_request
9
-
10
-
11
- def test_commit_message_with_quotes():
12
- # Create a temporary directory and initialize git repo
13
- with tempfile.TemporaryDirectory() as temp_dir:
14
- subprocess.run(['git', 'init', temp_dir], check=True)
15
-
16
- # Create a test file and add it to git
17
- test_file = os.path.join(temp_dir, 'test.txt')
18
- with open(test_file, 'w') as f:
19
- f.write('test content')
20
-
21
- subprocess.run(['git', '-C', temp_dir, 'add', 'test.txt'], check=True)
22
-
23
- # Create a test issue with problematic title
24
- issue = Issue(
25
- owner='test-owner',
26
- repo='test-repo',
27
- number=123,
28
- title="Issue with 'quotes' and \"double quotes\" and <class 'ValueError'>",
29
- body='Test body',
30
- labels=[],
31
- assignees=[],
32
- state='open',
33
- created_at='2024-01-01T00:00:00Z',
34
- updated_at='2024-01-01T00:00:00Z',
35
- closed_at=None,
36
- head_branch=None,
37
- thread_ids=None,
38
- )
39
-
40
- # Make the commit
41
- make_commit(temp_dir, issue, 'issue')
42
-
43
- # Get the commit message
44
- result = subprocess.run(
45
- ['git', '-C', temp_dir, 'log', '-1', '--pretty=%B'],
46
- capture_output=True,
47
- text=True,
48
- check=True,
49
- )
50
- commit_msg = result.stdout.strip()
51
-
52
- # The commit message should contain the quotes without excessive escaping
53
- expected = "Fix issue #123: Issue with 'quotes' and \"double quotes\" and <class 'ValueError'>"
54
- assert commit_msg == expected, f'Expected: {expected}\nGot: {commit_msg}'
55
-
56
-
57
- def test_pr_title_with_quotes(monkeypatch):
58
- # Mock httpx.post to avoid actual API calls
59
- class MockResponse:
60
- def __init__(self, status_code=201):
61
- self.status_code = status_code
62
- self.text = ''
63
-
64
- def json(self):
65
- return {'html_url': 'https://github.com/test/test/pull/1'}
66
-
67
- def raise_for_status(self):
68
- pass
69
-
70
- def mock_post(*args, **kwargs):
71
- # Verify that the PR title is not over-escaped
72
- data = kwargs.get('json', {})
73
- title = data.get('title', '')
74
- expected = "Fix issue #123: Issue with 'quotes' and \"double quotes\" and <class 'ValueError'>"
75
- assert title == expected, (
76
- f'PR title was incorrectly escaped.\nExpected: {expected}\nGot: {title}'
77
- )
78
- return MockResponse()
79
-
80
- class MockGetResponse:
81
- def __init__(self, status_code=200):
82
- self.status_code = status_code
83
- self.text = ''
84
-
85
- def json(self):
86
- return {'default_branch': 'main'}
87
-
88
- def raise_for_status(self):
89
- pass
90
-
91
- monkeypatch.setattr('httpx.post', mock_post)
92
- monkeypatch.setattr('httpx.get', lambda *args, **kwargs: MockGetResponse())
93
- monkeypatch.setattr(
94
- 'openhands.resolver.interfaces.github.GithubIssueHandler.branch_exists',
95
- lambda *args, **kwargs: False,
96
- )
97
-
98
- # Mock subprocess.run to avoid actual git commands
99
- original_run = subprocess.run
100
-
101
- def mock_run(*args, **kwargs):
102
- logger.info(f'Running command: {args[0] if args else kwargs.get("args", [])}')
103
- if isinstance(args[0], list) and args[0][0] == 'git':
104
- if 'push' in args[0]:
105
- return subprocess.CompletedProcess(
106
- args[0], returncode=0, stdout='', stderr=''
107
- )
108
- return original_run(*args, **kwargs)
109
- return original_run(*args, **kwargs)
110
-
111
- monkeypatch.setattr('subprocess.run', mock_run)
112
-
113
- # Create a temporary directory and initialize git repo
114
- with tempfile.TemporaryDirectory() as temp_dir:
115
- logger.info('Initializing git repo...')
116
- subprocess.run(['git', 'init', temp_dir], check=True)
117
-
118
- # Add these lines to configure git
119
- subprocess.run(
120
- ['git', '-C', temp_dir, 'config', 'user.name', 'Test User'], check=True
121
- )
122
- subprocess.run(
123
- ['git', '-C', temp_dir, 'config', 'user.email', '[email protected]'],
124
- check=True,
125
- )
126
-
127
- # Create a test file and add it to git
128
- test_file = os.path.join(temp_dir, 'test.txt')
129
- with open(test_file, 'w') as f:
130
- f.write('test content')
131
-
132
- logger.info('Adding and committing test file...')
133
- subprocess.run(['git', '-C', temp_dir, 'add', 'test.txt'], check=True)
134
- subprocess.run(
135
- ['git', '-C', temp_dir, 'commit', '-m', 'Initial commit'], check=True
136
- )
137
-
138
- # Create a test issue with problematic title
139
- logger.info('Creating test issue...')
140
- issue = Issue(
141
- owner='test-owner',
142
- repo='test-repo',
143
- number=123,
144
- title="Issue with 'quotes' and \"double quotes\" and <class 'ValueError'>",
145
- body='Test body',
146
- labels=[],
147
- assignees=[],
148
- state='open',
149
- created_at='2024-01-01T00:00:00Z',
150
- updated_at='2024-01-01T00:00:00Z',
151
- closed_at=None,
152
- head_branch=None,
153
- thread_ids=None,
154
- )
155
-
156
- # Try to send a PR - this will fail if the title is incorrectly escaped
157
- logger.info('Sending PR...')
158
-
159
- send_pull_request(
160
- issue=issue,
161
- token='dummy-token',
162
- username='test-user',
163
- platform=ProviderType.GITHUB,
164
- patch_dir=temp_dir,
165
- pr_type='ready',
166
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/unit/resolver/gitlab/test_gitlab_resolve_issues.py DELETED
@@ -1,1000 +0,0 @@
1
- import os
2
- import tempfile
3
- from unittest.mock import AsyncMock, MagicMock, patch
4
-
5
- import pytest
6
-
7
- from openhands.core.config import LLMConfig
8
- from openhands.events.action import CmdRunAction
9
- from openhands.events.observation import (
10
- CmdOutputMetadata,
11
- CmdOutputObservation,
12
- NullObservation,
13
- )
14
- from openhands.integrations.service_types import ProviderType
15
- from openhands.llm.llm import LLM
16
- from openhands.resolver.interfaces.gitlab import GitlabIssueHandler, GitlabPRHandler
17
- from openhands.resolver.interfaces.issue import Issue, ReviewThread
18
- from openhands.resolver.interfaces.issue_definitions import (
19
- ServiceContextIssue,
20
- ServiceContextPR,
21
- )
22
- from openhands.resolver.issue_resolver import (
23
- IssueResolver,
24
- )
25
- from openhands.resolver.resolver_output import ResolverOutput
26
-
27
-
28
- @pytest.fixture
29
- def default_mock_args():
30
- """Fixture that provides a default mock args object with common values.
31
-
32
- Tests can override specific attributes as needed.
33
- """
34
- mock_args = MagicMock()
35
- mock_args.selected_repo = 'test-owner/test-repo'
36
- mock_args.token = 'test-token'
37
- mock_args.username = 'test-user'
38
- mock_args.max_iterations = 5
39
- mock_args.output_dir = '/tmp'
40
- mock_args.llm_model = 'test'
41
- mock_args.llm_api_key = 'test'
42
- mock_args.llm_base_url = None
43
- mock_args.base_domain = None
44
- mock_args.runtime_container_image = None
45
- mock_args.is_experimental = False
46
- mock_args.issue_number = None
47
- mock_args.comment_id = None
48
- mock_args.repo_instruction_file = None
49
- mock_args.issue_type = 'issue'
50
- mock_args.prompt_file = None
51
- return mock_args
52
-
53
-
54
- @pytest.fixture
55
- def mock_gitlab_token():
56
- """Fixture that patches the identify_token function to return GitLab provider type.
57
-
58
- This eliminates the need for repeated patching in each test function.
59
- """
60
- with patch(
61
- 'openhands.resolver.issue_resolver.identify_token',
62
- return_value=ProviderType.GITLAB,
63
- ) as patched:
64
- yield patched
65
-
66
-
67
- @pytest.fixture
68
- def mock_output_dir():
69
- with tempfile.TemporaryDirectory() as temp_dir:
70
- repo_path = os.path.join(temp_dir, 'repo')
71
- # Initialize a Gitlab repo in "repo" and add a commit with "README.md"
72
- os.makedirs(repo_path)
73
- os.system(f'git init {repo_path}')
74
- readme_path = os.path.join(repo_path, 'README.md')
75
- with open(readme_path, 'w') as f:
76
- f.write('hello world')
77
- os.system(f'git -C {repo_path} add README.md')
78
- os.system(f"git -C {repo_path} commit -m 'Initial commit'")
79
- yield temp_dir
80
-
81
-
82
- @pytest.fixture
83
- def mock_subprocess():
84
- with patch('subprocess.check_output') as mock_check_output:
85
- yield mock_check_output
86
-
87
-
88
- @pytest.fixture
89
- def mock_os():
90
- with patch('os.system') as mock_system, patch('os.path.join') as mock_join:
91
- yield mock_system, mock_join
92
-
93
-
94
- @pytest.fixture
95
- def mock_user_instructions_template():
96
- return 'Issue: {{ body }}\n\nPlease fix this issue.'
97
-
98
-
99
- @pytest.fixture
100
- def mock_conversation_instructions_template():
101
- return 'Instructions: {{ repo_instruction }}'
102
-
103
-
104
- @pytest.fixture
105
- def mock_followup_prompt_template():
106
- return 'Issue context: {{ issues }}\n\nReview comments: {{ review_comments }}\n\nReview threads: {{ review_threads }}\n\nFiles: {{ files }}\n\nThread comments: {{ thread_context }}\n\nPlease fix this issue.'
107
-
108
-
109
- def create_cmd_output(exit_code: int, content: str, command: str):
110
- return CmdOutputObservation(
111
- content=content,
112
- command=command,
113
- metadata=CmdOutputMetadata(exit_code=exit_code),
114
- )
115
-
116
-
117
- def test_initialize_runtime(default_mock_args, mock_gitlab_token):
118
- mock_runtime = MagicMock()
119
-
120
- if os.getenv('GITLAB_CI') == 'true':
121
- mock_runtime.run_action.side_effect = [
122
- create_cmd_output(exit_code=0, content='', command='cd /workspace'),
123
- create_cmd_output(
124
- exit_code=0, content='', command='sudo chown -R 1001:0 /workspace/*'
125
- ),
126
- create_cmd_output(
127
- exit_code=0, content='', command='git config --global core.pager ""'
128
- ),
129
- ]
130
- else:
131
- mock_runtime.run_action.side_effect = [
132
- create_cmd_output(exit_code=0, content='', command='cd /workspace'),
133
- create_cmd_output(
134
- exit_code=0, content='', command='git config --global core.pager ""'
135
- ),
136
- ]
137
-
138
- # Create resolver with mocked token identification
139
- resolver = IssueResolver(default_mock_args)
140
-
141
- resolver.initialize_runtime(mock_runtime)
142
-
143
- if os.getenv('GITLAB_CI') == 'true':
144
- assert mock_runtime.run_action.call_count == 3
145
- else:
146
- assert mock_runtime.run_action.call_count == 2
147
-
148
- mock_runtime.run_action.assert_any_call(CmdRunAction(command='cd /workspace'))
149
- if os.getenv('GITLAB_CI') == 'true':
150
- mock_runtime.run_action.assert_any_call(
151
- CmdRunAction(command='sudo chown -R 1001:0 /workspace/*')
152
- )
153
- mock_runtime.run_action.assert_any_call(
154
- CmdRunAction(command='git config --global core.pager ""')
155
- )
156
-
157
-
158
- @pytest.mark.asyncio
159
- async def test_resolve_issue_no_issues_found(default_mock_args, mock_gitlab_token):
160
- """Test the resolve_issue method when no issues are found."""
161
- # Mock dependencies
162
- mock_handler = MagicMock()
163
- mock_handler.get_converted_issues.return_value = [] # Return empty list
164
-
165
- # Customize the mock args for this test
166
- default_mock_args.issue_number = 5432
167
-
168
- # Create a resolver instance with mocked token identification
169
- resolver = IssueResolver(default_mock_args)
170
-
171
- # Mock the issue handler
172
- resolver.issue_handler = mock_handler
173
-
174
- # Test that the correct exception is raised
175
- with pytest.raises(ValueError) as exc_info:
176
- await resolver.resolve_issue()
177
-
178
- # Verify the error message
179
- assert 'No issues found for issue number 5432' in str(exc_info.value)
180
- assert 'test-owner/test-repo' in str(exc_info.value)
181
-
182
- mock_handler.get_converted_issues.assert_called_once_with(
183
- issue_numbers=[5432], comment_id=None
184
- )
185
-
186
-
187
- def test_download_issues_from_gitlab():
188
- llm_config = LLMConfig(model='test', api_key='test')
189
- handler = ServiceContextIssue(
190
- GitlabIssueHandler('owner', 'repo', 'token'), llm_config
191
- )
192
-
193
- mock_issues_response = MagicMock()
194
- mock_issues_response.json.side_effect = [
195
- [
196
- {'iid': 1, 'title': 'Issue 1', 'description': 'This is an issue'},
197
- {
198
- 'iid': 2,
199
- 'title': 'PR 1',
200
- 'description': 'This is a pull request',
201
- 'pull_request': {},
202
- },
203
- {'iid': 3, 'title': 'Issue 2', 'description': 'This is another issue'},
204
- ],
205
- None,
206
- ]
207
- mock_issues_response.raise_for_status = MagicMock()
208
-
209
- mock_comments_response = MagicMock()
210
- mock_comments_response.json.return_value = []
211
- mock_comments_response.raise_for_status = MagicMock()
212
-
213
- def get_mock_response(url, *args, **kwargs):
214
- if '/notes' in url:
215
- return mock_comments_response
216
- return mock_issues_response
217
-
218
- with patch('httpx.get', side_effect=get_mock_response):
219
- issues = handler.get_converted_issues(issue_numbers=[1, 3])
220
-
221
- assert len(issues) == 2
222
- assert handler.issue_type == 'issue'
223
- assert all(isinstance(issue, Issue) for issue in issues)
224
- assert [issue.number for issue in issues] == [1, 3]
225
- assert [issue.title for issue in issues] == ['Issue 1', 'Issue 2']
226
- assert [issue.review_comments for issue in issues] == [None, None]
227
- assert [issue.closing_issues for issue in issues] == [None, None]
228
- assert [issue.thread_ids for issue in issues] == [None, None]
229
-
230
-
231
- def test_download_pr_from_gitlab():
232
- llm_config = LLMConfig(model='test', api_key='test')
233
- handler = ServiceContextPR(GitlabPRHandler('owner', 'repo', 'token'), llm_config)
234
- mock_pr_response = MagicMock()
235
- mock_pr_response.json.side_effect = [
236
- [
237
- {
238
- 'iid': 1,
239
- 'title': 'PR 1',
240
- 'description': 'This is a pull request',
241
- 'source_branch': 'b1',
242
- },
243
- {
244
- 'iid': 2,
245
- 'title': 'My PR',
246
- 'description': 'This is another pull request',
247
- 'source_branch': 'b2',
248
- },
249
- {
250
- 'iid': 3,
251
- 'title': 'PR 3',
252
- 'description': 'Final PR',
253
- 'source_branch': 'b3',
254
- },
255
- ],
256
- None,
257
- ]
258
- mock_pr_response.raise_for_status = MagicMock()
259
-
260
- # Mock for related issues response
261
- mock_related_issuse_response = MagicMock()
262
- mock_related_issuse_response.json.return_value = [
263
- {'description': 'Issue 1 body', 'iid': 1},
264
- {'description': 'Issue 2 body', 'iid': 2},
265
- ]
266
- mock_related_issuse_response.raise_for_status = MagicMock()
267
-
268
- # Mock for PR comments response
269
- mock_comments_response = MagicMock()
270
- mock_comments_response.json.return_value = [] # No PR comments
271
- mock_comments_response.raise_for_status = MagicMock()
272
-
273
- # Mock for GraphQL request (for download_pr_metadata)
274
- mock_graphql_response = MagicMock()
275
- mock_graphql_response.json.side_effect = lambda: {
276
- 'data': {
277
- 'project': {
278
- 'mergeRequest': {
279
- 'discussions': {
280
- 'edges': [
281
- {
282
- 'node': {
283
- 'id': '1',
284
- 'resolved': False,
285
- 'resolvable': True,
286
- 'notes': {
287
- 'nodes': [
288
- {
289
- 'body': 'Unresolved comment 1',
290
- 'position': {
291
- 'filePath': '/frontend/header.tsx',
292
- },
293
- },
294
- {
295
- 'body': 'Follow up thread',
296
- },
297
- ]
298
- },
299
- }
300
- },
301
- {
302
- 'node': {
303
- 'id': '2',
304
- 'resolved': True,
305
- 'resolvable': True,
306
- 'notes': {
307
- 'nodes': [
308
- {
309
- 'body': 'Resolved comment 1',
310
- 'position': {
311
- 'filePath': '/some/file.py',
312
- },
313
- },
314
- ]
315
- },
316
- }
317
- },
318
- {
319
- 'node': {
320
- 'id': '3',
321
- 'resolved': False,
322
- 'resolvable': True,
323
- 'notes': {
324
- 'nodes': [
325
- {
326
- 'body': 'Unresolved comment 3',
327
- 'position': {
328
- 'filePath': '/another/file.py',
329
- },
330
- },
331
- ]
332
- },
333
- }
334
- },
335
- ]
336
- },
337
- }
338
- }
339
- }
340
- }
341
-
342
- mock_graphql_response.raise_for_status = MagicMock()
343
-
344
- def get_mock_response(url, *args, **kwargs):
345
- if '/notes' in url:
346
- return mock_comments_response
347
- if '/related_issues' in url:
348
- return mock_related_issuse_response
349
- return mock_pr_response
350
-
351
- with patch('httpx.get', side_effect=get_mock_response):
352
- with patch('httpx.post', return_value=mock_graphql_response):
353
- issues = handler.get_converted_issues(issue_numbers=[1, 2, 3])
354
-
355
- assert len(issues) == 3
356
- assert handler.issue_type == 'pr'
357
- assert all(isinstance(issue, Issue) for issue in issues)
358
- assert [issue.number for issue in issues] == [1, 2, 3]
359
- assert [issue.title for issue in issues] == ['PR 1', 'My PR', 'PR 3']
360
- assert [issue.head_branch for issue in issues] == ['b1', 'b2', 'b3']
361
-
362
- assert len(issues[0].review_threads) == 2 # Only unresolved threads
363
- assert (
364
- issues[0].review_threads[0].comment
365
- == 'Unresolved comment 1\n---\nlatest feedback:\nFollow up thread\n'
366
- )
367
- assert issues[0].review_threads[0].files == ['/frontend/header.tsx']
368
- assert (
369
- issues[0].review_threads[1].comment
370
- == 'latest feedback:\nUnresolved comment 3\n'
371
- )
372
- assert issues[0].review_threads[1].files == ['/another/file.py']
373
- assert issues[0].closing_issues == ['Issue 1 body', 'Issue 2 body']
374
- assert issues[0].thread_ids == ['1', '3']
375
-
376
-
377
- @pytest.mark.asyncio
378
- async def test_complete_runtime(default_mock_args, mock_gitlab_token):
379
- mock_runtime = MagicMock()
380
- mock_runtime.run_action.side_effect = [
381
- create_cmd_output(exit_code=0, content='', command='cd /workspace'),
382
- create_cmd_output(
383
- exit_code=0, content='', command='git config --global core.pager ""'
384
- ),
385
- create_cmd_output(
386
- exit_code=0,
387
- content='',
388
- command='git config --global --add safe.directory /workspace',
389
- ),
390
- create_cmd_output(exit_code=0, content='', command='git add -A'),
391
- create_cmd_output(
392
- exit_code=0,
393
- content='git diff content',
394
- command='git diff --no-color --cached base_commit_hash',
395
- ),
396
- ]
397
-
398
- # Create a resolver instance with mocked token identification
399
- resolver = IssueResolver(default_mock_args)
400
-
401
- result = await resolver.complete_runtime(mock_runtime, 'base_commit_hash')
402
-
403
- assert result == {'git_patch': 'git diff content'}
404
- assert mock_runtime.run_action.call_count == 5
405
-
406
-
407
- @pytest.mark.asyncio
408
- @pytest.mark.parametrize(
409
- 'test_case',
410
- [
411
- {
412
- 'name': 'successful_run',
413
- 'run_controller_return': MagicMock(
414
- history=[NullObservation(content='')],
415
- metrics=MagicMock(
416
- get=MagicMock(return_value={'test_result': 'passed'})
417
- ),
418
- last_error=None,
419
- ),
420
- 'run_controller_raises': None,
421
- 'expected_success': True,
422
- 'expected_error': None,
423
- 'expected_explanation': 'Issue resolved successfully',
424
- 'is_pr': False,
425
- 'comment_success': None,
426
- },
427
- {
428
- 'name': 'value_error',
429
- 'run_controller_raises': ValueError('Test value error'),
430
- 'expected_success': False,
431
- 'expected_error': 'Agent failed to run or crashed',
432
- 'expected_explanation': 'Agent failed to run',
433
- 'is_pr': False,
434
- 'comment_success': None,
435
- },
436
- {
437
- 'name': 'runtime_error',
438
- 'run_controller_raises': RuntimeError('Test runtime error'),
439
- 'expected_success': False,
440
- 'expected_error': 'Agent failed to run or crashed',
441
- 'expected_explanation': 'Agent failed to run',
442
- 'is_pr': False,
443
- 'comment_success': None,
444
- },
445
- {
446
- 'name': 'json_decode_error',
447
- 'run_controller_return': MagicMock(
448
- history=[NullObservation(content='')],
449
- metrics=MagicMock(
450
- get=MagicMock(return_value={'test_result': 'passed'})
451
- ),
452
- last_error=None,
453
- ),
454
- 'run_controller_raises': None,
455
- 'expected_success': True,
456
- 'expected_error': None,
457
- 'expected_explanation': 'Non-JSON explanation',
458
- 'is_pr': True,
459
- 'comment_success': [True, False],
460
- },
461
- ],
462
- )
463
- async def test_process_issue(
464
- default_mock_args,
465
- mock_gitlab_token,
466
- mock_output_dir,
467
- mock_user_instructions_template,
468
- test_case,
469
- ):
470
- """Test the process_issue method with different scenarios."""
471
- # Set up test data
472
- issue = Issue(
473
- owner='test_owner',
474
- repo='test_repo',
475
- number=1,
476
- title='Test Issue',
477
- body='This is a test issue',
478
- )
479
- base_commit = 'abcdef1234567890'
480
-
481
- # Customize the mock args for this test
482
- default_mock_args.output_dir = mock_output_dir
483
- default_mock_args.issue_type = 'pr' if test_case.get('is_pr', False) else 'issue'
484
-
485
- # Create a resolver instance with mocked token identification
486
- resolver = IssueResolver(default_mock_args)
487
- resolver.user_instructions_prompt_template = mock_user_instructions_template
488
-
489
- # Mock the handler with LLM config
490
- llm_config = LLMConfig(model='test', api_key='test')
491
- handler_instance = MagicMock()
492
- handler_instance.guess_success.return_value = (
493
- test_case['expected_success'],
494
- test_case.get('comment_success', None),
495
- test_case['expected_explanation'],
496
- )
497
- handler_instance.get_instruction.return_value = (
498
- 'Test instruction',
499
- 'Test conversation instructions',
500
- [],
501
- )
502
- handler_instance.issue_type = 'pr' if test_case.get('is_pr', False) else 'issue'
503
- handler_instance.llm = LLM(llm_config)
504
-
505
- # Create mock runtime and mock run_controller
506
- mock_runtime = MagicMock()
507
- mock_runtime.connect = AsyncMock()
508
- mock_create_runtime = MagicMock(return_value=mock_runtime)
509
-
510
- # Configure run_controller mock based on test case
511
- mock_run_controller = AsyncMock()
512
- if test_case.get('run_controller_raises'):
513
- mock_run_controller.side_effect = test_case['run_controller_raises']
514
- else:
515
- mock_run_controller.return_value = test_case['run_controller_return']
516
-
517
- # Patch the necessary functions and methods
518
- with (
519
- patch('openhands.resolver.issue_resolver.create_runtime', mock_create_runtime),
520
- patch('openhands.resolver.issue_resolver.run_controller', mock_run_controller),
521
- patch.object(
522
- resolver, 'complete_runtime', return_value={'git_patch': 'test patch'}
523
- ),
524
- patch.object(resolver, 'initialize_runtime') as mock_initialize_runtime,
525
- patch(
526
- 'openhands.resolver.issue_resolver.SandboxConfig', return_value=MagicMock()
527
- ),
528
- patch(
529
- 'openhands.resolver.issue_resolver.OpenHandsConfig',
530
- return_value=MagicMock(),
531
- ),
532
- ):
533
- # Call the process_issue method
534
- result = await resolver.process_issue(issue, base_commit, handler_instance)
535
-
536
- mock_create_runtime.assert_called_once()
537
- mock_runtime.connect.assert_called_once()
538
- mock_initialize_runtime.assert_called_once()
539
- mock_run_controller.assert_called_once()
540
- resolver.complete_runtime.assert_awaited_once_with(mock_runtime, base_commit)
541
-
542
- # Assert the result matches our expectations
543
- assert isinstance(result, ResolverOutput)
544
- assert result.issue == issue
545
- assert result.base_commit == base_commit
546
- assert result.git_patch == 'test patch'
547
- assert result.success == test_case['expected_success']
548
- assert result.result_explanation == test_case['expected_explanation']
549
- assert result.error == test_case['expected_error']
550
-
551
- if test_case['expected_success']:
552
- handler_instance.guess_success.assert_called_once()
553
- else:
554
- handler_instance.guess_success.assert_not_called()
555
-
556
-
557
- def test_get_instruction(
558
- mock_user_instructions_template,
559
- mock_conversation_instructions_template,
560
- mock_followup_prompt_template,
561
- ):
562
- issue = Issue(
563
- owner='test_owner',
564
- repo='test_repo',
565
- number=123,
566
- title='Test Issue',
567
- body='This is a test issue refer to image ![First Image](https://sampleimage.com/image1.png)',
568
- )
569
- mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
570
- issue_handler = ServiceContextIssue(
571
- GitlabIssueHandler('owner', 'repo', 'token'), mock_llm_config
572
- )
573
- instruction, conversation_instructions, images_urls = issue_handler.get_instruction(
574
- issue,
575
- mock_user_instructions_template,
576
- mock_conversation_instructions_template,
577
- None,
578
- )
579
- expected_instruction = 'Issue: Test Issue\n\nThis is a test issue refer to image ![First Image](https://sampleimage.com/image1.png)\n\nPlease fix this issue.'
580
-
581
- assert images_urls == ['https://sampleimage.com/image1.png']
582
- assert issue_handler.issue_type == 'issue'
583
- assert instruction == expected_instruction
584
- assert conversation_instructions is not None
585
-
586
- issue = Issue(
587
- owner='test_owner',
588
- repo='test_repo',
589
- number=123,
590
- title='Test Issue',
591
- body='This is a test issue',
592
- closing_issues=['Issue 1 fix the type'],
593
- review_threads=[
594
- ReviewThread(
595
- comment="There is still a typo 'pthon' instead of 'python'", files=[]
596
- )
597
- ],
598
- thread_comments=[
599
- "I've left review comments, please address them",
600
- 'This is a valid concern.',
601
- ],
602
- )
603
-
604
- pr_handler = ServiceContextPR(
605
- GitlabPRHandler('owner', 'repo', 'token'), mock_llm_config
606
- )
607
- instruction, conversation_instructions, images_urls = pr_handler.get_instruction(
608
- issue,
609
- mock_followup_prompt_template,
610
- mock_conversation_instructions_template,
611
- None,
612
- )
613
- expected_instruction = "Issue context: [\n \"Issue 1 fix the type\"\n]\n\nReview comments: None\n\nReview threads: [\n \"There is still a typo 'pthon' instead of 'python'\"\n]\n\nFiles: []\n\nThread comments: I've left review comments, please address them\n---\nThis is a valid concern.\n\nPlease fix this issue."
614
-
615
- assert images_urls == []
616
- assert pr_handler.issue_type == 'pr'
617
- # Compare content ignoring exact formatting
618
- assert "There is still a typo 'pthon' instead of 'python'" in instruction
619
- assert "I've left review comments, please address them" in instruction
620
- assert 'This is a valid concern' in instruction
621
- assert conversation_instructions is not None
622
-
623
-
624
- def test_file_instruction():
625
- issue = Issue(
626
- owner='test_owner',
627
- repo='test_repo',
628
- number=123,
629
- title='Test Issue',
630
- body='This is a test issue ![image](https://sampleimage.com/sample.png)',
631
- )
632
- # load prompt from openhands/resolver/prompts/resolve/basic.jinja
633
- with open('openhands/resolver/prompts/resolve/basic.jinja', 'r') as f:
634
- prompt = f.read()
635
-
636
- with open(
637
- 'openhands/resolver/prompts/resolve/basic-conversation-instructions.jinja', 'r'
638
- ) as f:
639
- conversation_instructions_template = f.read()
640
-
641
- # Test without thread comments
642
- mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
643
- issue_handler = ServiceContextIssue(
644
- GitlabIssueHandler('owner', 'repo', 'token'), mock_llm_config
645
- )
646
- instruction, conversation_instructions, images_urls = issue_handler.get_instruction(
647
- issue, prompt, conversation_instructions_template, None
648
- )
649
- expected_instruction = """Please fix the following issue for the repository in /workspace.
650
- An environment has been set up for you to start working. You may assume all necessary tools are installed.
651
-
652
- # Problem Statement
653
- Test Issue
654
-
655
- This is a test issue ![image](https://sampleimage.com/sample.png)"""
656
-
657
- expected_conversation_instructions = """IMPORTANT: You should ONLY interact with the environment provided to you AND NEVER ASK FOR HUMAN HELP.
658
- You SHOULD INCLUDE PROPER INDENTATION in your edit commands.
659
-
660
- When you think you have fixed the issue through code changes, please finish the interaction."""
661
-
662
- assert instruction == expected_instruction
663
- assert conversation_instructions == expected_conversation_instructions
664
- assert images_urls == ['https://sampleimage.com/sample.png']
665
-
666
-
667
- def test_file_instruction_with_repo_instruction():
668
- issue = Issue(
669
- owner='test_owner',
670
- repo='test_repo',
671
- number=123,
672
- title='Test Issue',
673
- body='This is a test issue',
674
- )
675
- # load prompt from openhands/resolver/prompts/resolve/basic.jinja
676
- with open('openhands/resolver/prompts/resolve/basic.jinja', 'r') as f:
677
- prompt = f.read()
678
-
679
- with open(
680
- 'openhands/resolver/prompts/resolve/basic-conversation-instructions.jinja', 'r'
681
- ) as f:
682
- conversation_instructions_prompt = f.read()
683
-
684
- # load repo instruction from openhands/resolver/prompts/repo_instructions/all-hands-ai___openhands-resolver.txt
685
- with open(
686
- 'openhands/resolver/prompts/repo_instructions/all-hands-ai___openhands-resolver.txt',
687
- 'r',
688
- ) as f:
689
- repo_instruction = f.read()
690
-
691
- mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
692
- issue_handler = ServiceContextIssue(
693
- GitlabIssueHandler('owner', 'repo', 'token'), mock_llm_config
694
- )
695
- instruction, conversation_instructions, image_urls = issue_handler.get_instruction(
696
- issue, prompt, conversation_instructions_prompt, repo_instruction
697
- )
698
-
699
- expected_instruction = """Please fix the following issue for the repository in /workspace.
700
- An environment has been set up for you to start working. You may assume all necessary tools are installed.
701
-
702
- # Problem Statement
703
- Test Issue
704
-
705
- This is a test issue"""
706
-
707
- expected_conversation_instructions = """IMPORTANT: You should ONLY interact with the environment provided to you AND NEVER ASK FOR HUMAN HELP.
708
- You SHOULD INCLUDE PROPER INDENTATION in your edit commands.
709
-
710
- Some basic information about this repository:
711
- This is a Python repo for openhands-resolver, a library that attempts to resolve github issues with the AI agent OpenHands.
712
-
713
- - Setup: `poetry install --with test --with dev`
714
- - Testing: `poetry run pytest tests/test_*.py`
715
-
716
-
717
- When you think you have fixed the issue through code changes, please finish the interaction."""
718
-
719
- assert instruction == expected_instruction
720
- assert conversation_instructions == expected_conversation_instructions
721
- assert conversation_instructions is not None
722
- assert issue_handler.issue_type == 'issue'
723
- assert image_urls == []
724
-
725
-
726
- def test_guess_success():
727
- mock_issue = Issue(
728
- owner='test_owner',
729
- repo='test_repo',
730
- number=1,
731
- title='Test Issue',
732
- body='This is a test issue',
733
- )
734
- mock_history = [create_cmd_output(exit_code=0, content='', command='cd /workspace')]
735
- mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
736
-
737
- mock_completion_response = MagicMock()
738
- mock_completion_response.choices = [
739
- MagicMock(
740
- message=MagicMock(
741
- content='--- success\ntrue\n--- explanation\nIssue resolved successfully'
742
- )
743
- )
744
- ]
745
- issue_handler = ServiceContextIssue(
746
- GitlabIssueHandler('owner', 'repo', 'token'), mock_llm_config
747
- )
748
-
749
- with patch.object(
750
- LLM, 'completion', MagicMock(return_value=mock_completion_response)
751
- ):
752
- success, comment_success, explanation = issue_handler.guess_success(
753
- mock_issue, mock_history
754
- )
755
- assert issue_handler.issue_type == 'issue'
756
- assert comment_success is None
757
- assert success
758
- assert explanation == 'Issue resolved successfully'
759
-
760
-
761
- def test_guess_success_with_thread_comments():
762
- mock_issue = Issue(
763
- owner='test_owner',
764
- repo='test_repo',
765
- number=1,
766
- title='Test Issue',
767
- body='This is a test issue',
768
- thread_comments=[
769
- 'First comment',
770
- 'Second comment',
771
- 'latest feedback:\nPlease add tests',
772
- ],
773
- )
774
- mock_history = [MagicMock(message='I have added tests for this case')]
775
- mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
776
-
777
- mock_completion_response = MagicMock()
778
- mock_completion_response.choices = [
779
- MagicMock(
780
- message=MagicMock(
781
- content='--- success\ntrue\n--- explanation\nTests have been added to verify thread comments handling'
782
- )
783
- )
784
- ]
785
- issue_handler = ServiceContextIssue(
786
- GitlabIssueHandler('owner', 'repo', 'token'), mock_llm_config
787
- )
788
-
789
- with patch.object(
790
- LLM, 'completion', MagicMock(return_value=mock_completion_response)
791
- ):
792
- success, comment_success, explanation = issue_handler.guess_success(
793
- mock_issue, mock_history
794
- )
795
- assert issue_handler.issue_type == 'issue'
796
- assert comment_success is None
797
- assert success
798
- assert 'Tests have been added' in explanation
799
-
800
-
801
- def test_instruction_with_thread_comments():
802
- # Create an issue with thread comments
803
- issue = Issue(
804
- owner='test_owner',
805
- repo='test_repo',
806
- number=123,
807
- title='Test Issue',
808
- body='This is a test issue',
809
- thread_comments=[
810
- 'First comment',
811
- 'Second comment',
812
- 'latest feedback:\nPlease add tests',
813
- ],
814
- )
815
-
816
- # Load the basic prompt template
817
- with open('openhands/resolver/prompts/resolve/basic.jinja', 'r') as f:
818
- prompt = f.read()
819
-
820
- with open(
821
- 'openhands/resolver/prompts/resolve/basic-conversation-instructions.jinja', 'r'
822
- ) as f:
823
- conversation_instructions_template = f.read()
824
-
825
- llm_config = LLMConfig(model='test', api_key='test')
826
- issue_handler = ServiceContextIssue(
827
- GitlabIssueHandler('owner', 'repo', 'token'), llm_config
828
- )
829
- instruction, conversation_instructions, images_urls = issue_handler.get_instruction(
830
- issue, prompt, conversation_instructions_template, None
831
- )
832
-
833
- # Verify that thread comments are included in the instruction
834
- assert 'First comment' in instruction
835
- assert 'Second comment' in instruction
836
- assert 'Please add tests' in instruction
837
- assert 'Issue Thread Comments:' in instruction
838
- assert images_urls == []
839
-
840
-
841
- def test_guess_success_failure():
842
- mock_issue = Issue(
843
- owner='test_owner',
844
- repo='test_repo',
845
- number=1,
846
- title='Test Issue',
847
- body='This is a test issue',
848
- thread_comments=[
849
- 'First comment',
850
- 'Second comment',
851
- 'latest feedback:\nPlease add tests',
852
- ],
853
- )
854
- mock_history = [MagicMock(message='I have added tests for this case')]
855
- mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
856
-
857
- mock_completion_response = MagicMock()
858
- mock_completion_response.choices = [
859
- MagicMock(
860
- message=MagicMock(
861
- content='--- success\ntrue\n--- explanation\nTests have been added to verify thread comments handling'
862
- )
863
- )
864
- ]
865
- issue_handler = ServiceContextIssue(
866
- GitlabIssueHandler('owner', 'repo', 'token'), mock_llm_config
867
- )
868
-
869
- with patch.object(
870
- LLM, 'completion', MagicMock(return_value=mock_completion_response)
871
- ):
872
- success, comment_success, explanation = issue_handler.guess_success(
873
- mock_issue, mock_history
874
- )
875
- assert issue_handler.issue_type == 'issue'
876
- assert comment_success is None
877
- assert success
878
- assert 'Tests have been added' in explanation
879
-
880
-
881
- def test_guess_success_negative_case():
882
- mock_issue = Issue(
883
- owner='test_owner',
884
- repo='test_repo',
885
- number=1,
886
- title='Test Issue',
887
- body='This is a test issue',
888
- )
889
- mock_history = [create_cmd_output(exit_code=0, content='', command='cd /workspace')]
890
- mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
891
-
892
- mock_completion_response = MagicMock()
893
- mock_completion_response.choices = [
894
- MagicMock(
895
- message=MagicMock(
896
- content='--- success\nfalse\n--- explanation\nIssue not resolved'
897
- )
898
- )
899
- ]
900
- issue_handler = ServiceContextIssue(
901
- GitlabIssueHandler('owner', 'repo', 'token'), mock_llm_config
902
- )
903
-
904
- with patch.object(
905
- LLM, 'completion', MagicMock(return_value=mock_completion_response)
906
- ):
907
- success, comment_success, explanation = issue_handler.guess_success(
908
- mock_issue, mock_history
909
- )
910
- assert issue_handler.issue_type == 'issue'
911
- assert comment_success is None
912
- assert not success
913
- assert explanation == 'Issue not resolved'
914
-
915
-
916
- def test_guess_success_invalid_output():
917
- mock_issue = Issue(
918
- owner='test_owner',
919
- repo='test_repo',
920
- number=1,
921
- title='Test Issue',
922
- body='This is a test issue',
923
- )
924
- mock_history = [create_cmd_output(exit_code=0, content='', command='cd /workspace')]
925
- mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
926
-
927
- mock_completion_response = MagicMock()
928
- mock_completion_response.choices = [
929
- MagicMock(message=MagicMock(content='This is not a valid output'))
930
- ]
931
- issue_handler = ServiceContextIssue(
932
- GitlabIssueHandler('owner', 'repo', 'token'), mock_llm_config
933
- )
934
-
935
- with patch.object(
936
- LLM, 'completion', MagicMock(return_value=mock_completion_response)
937
- ):
938
- success, comment_success, explanation = issue_handler.guess_success(
939
- mock_issue, mock_history
940
- )
941
- assert issue_handler.issue_type == 'issue'
942
- assert comment_success is None
943
- assert not success
944
- assert (
945
- explanation
946
- == 'Failed to decode answer from LLM response: This is not a valid output'
947
- )
948
-
949
-
950
- def test_download_issue_with_specific_comment():
951
- llm_config = LLMConfig(model='test', api_key='test')
952
- handler = ServiceContextIssue(
953
- GitlabIssueHandler('owner', 'repo', 'token'), llm_config
954
- )
955
-
956
- # Define the specific comment_id to filter
957
- specific_comment_id = 101
958
-
959
- # Mock issue and comment responses
960
- mock_issue_response = MagicMock()
961
- mock_issue_response.json.side_effect = [
962
- [
963
- {'iid': 1, 'title': 'Issue 1', 'description': 'This is an issue'},
964
- ],
965
- None,
966
- ]
967
- mock_issue_response.raise_for_status = MagicMock()
968
-
969
- mock_comments_response = MagicMock()
970
- mock_comments_response.json.return_value = [
971
- {
972
- 'id': specific_comment_id,
973
- 'body': 'Specific comment body',
974
- },
975
- {
976
- 'id': 102,
977
- 'body': 'Another comment body',
978
- },
979
- ]
980
- mock_comments_response.raise_for_status = MagicMock()
981
-
982
- def get_mock_response(url, *args, **kwargs):
983
- if '/notes' in url:
984
- return mock_comments_response
985
-
986
- return mock_issue_response
987
-
988
- with patch('httpx.get', side_effect=get_mock_response):
989
- issues = handler.get_converted_issues(
990
- issue_numbers=[1], comment_id=specific_comment_id
991
- )
992
-
993
- assert len(issues) == 1
994
- assert issues[0].number == 1
995
- assert issues[0].title == 'Issue 1'
996
- assert issues[0].thread_comments == ['Specific comment body']
997
-
998
-
999
- if __name__ == '__main__':
1000
- pytest.main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/unit/resolver/gitlab/test_gitlab_send_pull_request.py DELETED
@@ -1,1206 +0,0 @@
1
- import os
2
- import tempfile
3
- from unittest.mock import ANY, MagicMock, call, patch
4
- from urllib.parse import quote
5
-
6
- import pytest
7
-
8
- from openhands.core.config import LLMConfig
9
- from openhands.integrations.service_types import ProviderType
10
- from openhands.resolver.interfaces.gitlab import GitlabIssueHandler
11
- from openhands.resolver.interfaces.issue import ReviewThread
12
- from openhands.resolver.resolver_output import Issue, ResolverOutput
13
- from openhands.resolver.send_pull_request import (
14
- apply_patch,
15
- initialize_repo,
16
- load_single_resolver_output,
17
- main,
18
- make_commit,
19
- process_single_issue,
20
- send_pull_request,
21
- update_existing_pull_request,
22
- )
23
-
24
-
25
- @pytest.fixture
26
- def mock_output_dir():
27
- with tempfile.TemporaryDirectory() as temp_dir:
28
- repo_path = os.path.join(temp_dir, 'repo')
29
- # Initialize a Gitlab repo in "repo" and add a commit with "README.md"
30
- os.makedirs(repo_path)
31
- os.system(f'git init {repo_path}')
32
- readme_path = os.path.join(repo_path, 'README.md')
33
- with open(readme_path, 'w') as f:
34
- f.write('hello world')
35
- os.system(f'git -C {repo_path} add README.md')
36
- os.system(f"git -C {repo_path} commit -m 'Initial commit'")
37
- yield temp_dir
38
-
39
-
40
- @pytest.fixture
41
- def mock_issue():
42
- return Issue(
43
- number=42,
44
- title='Test Issue',
45
- owner='test-owner',
46
- repo='test-repo',
47
- body='Test body',
48
- )
49
-
50
-
51
- @pytest.fixture
52
- def mock_llm_config():
53
- return LLMConfig()
54
-
55
-
56
- def test_load_single_resolver_output():
57
- mock_output_jsonl = 'tests/unit/resolver/mock_output/output.jsonl'
58
-
59
- # Test loading an existing issue
60
- resolver_output = load_single_resolver_output(mock_output_jsonl, 5)
61
- assert isinstance(resolver_output, ResolverOutput)
62
- assert resolver_output.issue.number == 5
63
- assert resolver_output.issue.title == 'Add MIT license'
64
- assert resolver_output.issue.owner == 'neubig'
65
- assert resolver_output.issue.repo == 'pr-viewer'
66
-
67
- # Test loading a non-existent issue
68
- with pytest.raises(ValueError):
69
- load_single_resolver_output(mock_output_jsonl, 999)
70
-
71
-
72
- def test_apply_patch(mock_output_dir):
73
- # Create a sample file in the mock repo
74
- sample_file = os.path.join(mock_output_dir, 'sample.txt')
75
- with open(sample_file, 'w') as f:
76
- f.write('Original content')
77
-
78
- # Create a sample patch
79
- patch_content = """
80
- diff --git a/sample.txt b/sample.txt
81
- index 9daeafb..b02def2 100644
82
- --- a/sample.txt
83
- +++ b/sample.txt
84
- @@ -1 +1,2 @@
85
- -Original content
86
- +Updated content
87
- +New line
88
- """
89
-
90
- # Apply the patch
91
- apply_patch(mock_output_dir, patch_content)
92
-
93
- # Check if the file was updated correctly
94
- with open(sample_file, 'r') as f:
95
- updated_content = f.read()
96
-
97
- assert updated_content.strip() == 'Updated content\nNew line'.strip()
98
-
99
-
100
- def test_apply_patch_preserves_line_endings(mock_output_dir):
101
- # Create sample files with different line endings
102
- unix_file = os.path.join(mock_output_dir, 'unix_style.txt')
103
- dos_file = os.path.join(mock_output_dir, 'dos_style.txt')
104
-
105
- with open(unix_file, 'w', newline='\n') as f:
106
- f.write('Line 1\nLine 2\nLine 3')
107
-
108
- with open(dos_file, 'w', newline='\r\n') as f:
109
- f.write('Line 1\r\nLine 2\r\nLine 3')
110
-
111
- # Create patches for both files
112
- unix_patch = """
113
- diff --git a/unix_style.txt b/unix_style.txt
114
- index 9daeafb..b02def2 100644
115
- --- a/unix_style.txt
116
- +++ b/unix_style.txt
117
- @@ -1,3 +1,3 @@
118
- Line 1
119
- -Line 2
120
- +Updated Line 2
121
- Line 3
122
- """
123
-
124
- dos_patch = """
125
- diff --git a/dos_style.txt b/dos_style.txt
126
- index 9daeafb..b02def2 100644
127
- --- a/dos_style.txt
128
- +++ b/dos_style.txt
129
- @@ -1,3 +1,3 @@
130
- Line 1
131
- -Line 2
132
- +Updated Line 2
133
- Line 3
134
- """
135
-
136
- # Apply patches
137
- apply_patch(mock_output_dir, unix_patch)
138
- apply_patch(mock_output_dir, dos_patch)
139
-
140
- # Check if line endings are preserved
141
- with open(unix_file, 'rb') as f:
142
- unix_content = f.read()
143
- with open(dos_file, 'rb') as f:
144
- dos_content = f.read()
145
-
146
- assert b'\r\n' not in unix_content, (
147
- 'Unix-style line endings were changed to DOS-style'
148
- )
149
- assert b'\r\n' in dos_content, 'DOS-style line endings were changed to Unix-style'
150
-
151
- # Check if content was updated correctly
152
- assert unix_content.decode('utf-8').split('\n')[1] == 'Updated Line 2'
153
- assert dos_content.decode('utf-8').split('\r\n')[1] == 'Updated Line 2'
154
-
155
-
156
- def test_apply_patch_create_new_file(mock_output_dir):
157
- # Create a patch that adds a new file
158
- patch_content = """
159
- diff --git a/new_file.txt b/new_file.txt
160
- new file mode 100644
161
- index 0000000..3b18e51
162
- --- /dev/null
163
- +++ b/new_file.txt
164
- @@ -0,0 +1 @@
165
- +hello world
166
- """
167
-
168
- # Apply the patch
169
- apply_patch(mock_output_dir, patch_content)
170
-
171
- # Check if the new file was created
172
- new_file_path = os.path.join(mock_output_dir, 'new_file.txt')
173
- assert os.path.exists(new_file_path), 'New file was not created'
174
-
175
- # Check if the file content is correct
176
- with open(new_file_path, 'r') as f:
177
- content = f.read().strip()
178
- assert content == 'hello world', 'File content is incorrect'
179
-
180
-
181
- def test_apply_patch_rename_file(mock_output_dir):
182
- # Create a sample file in the mock repo
183
- old_file = os.path.join(mock_output_dir, 'old_name.txt')
184
- with open(old_file, 'w') as f:
185
- f.write('This file will be renamed')
186
-
187
- # Create a patch that renames the file
188
- patch_content = """diff --git a/old_name.txt b/new_name.txt
189
- similarity index 100%
190
- rename from old_name.txt
191
- rename to new_name.txt"""
192
-
193
- # Apply the patch
194
- apply_patch(mock_output_dir, patch_content)
195
-
196
- # Check if the file was renamed
197
- new_file = os.path.join(mock_output_dir, 'new_name.txt')
198
- assert not os.path.exists(old_file), 'Old file still exists'
199
- assert os.path.exists(new_file), 'New file was not created'
200
-
201
- # Check if the content is preserved
202
- with open(new_file, 'r') as f:
203
- content = f.read()
204
- assert content == 'This file will be renamed'
205
-
206
-
207
- def test_apply_patch_delete_file(mock_output_dir):
208
- # Create a sample file in the mock repo
209
- sample_file = os.path.join(mock_output_dir, 'to_be_deleted.txt')
210
- with open(sample_file, 'w') as f:
211
- f.write('This file will be deleted')
212
-
213
- # Create a patch that deletes the file
214
- patch_content = """
215
- diff --git a/to_be_deleted.txt b/to_be_deleted.txt
216
- deleted file mode 100644
217
- index 9daeafb..0000000
218
- --- a/to_be_deleted.txt
219
- +++ /dev/null
220
- @@ -1 +0,0 @@
221
- -This file will be deleted
222
- """
223
-
224
- # Apply the patch
225
- apply_patch(mock_output_dir, patch_content)
226
-
227
- # Check if the file was deleted
228
- assert not os.path.exists(sample_file), 'File was not deleted'
229
-
230
-
231
- def test_initialize_repo(mock_output_dir):
232
- issue_type = 'issue'
233
- # Copy the repo to patches
234
- ISSUE_NUMBER = 3
235
- initialize_repo(mock_output_dir, ISSUE_NUMBER, issue_type)
236
- patches_dir = os.path.join(mock_output_dir, 'patches', f'issue_{ISSUE_NUMBER}')
237
-
238
- # Check if files were copied correctly
239
- assert os.path.exists(os.path.join(patches_dir, 'README.md'))
240
-
241
- # Check file contents
242
- with open(os.path.join(patches_dir, 'README.md'), 'r') as f:
243
- assert f.read() == 'hello world'
244
-
245
-
246
- @patch('openhands.resolver.interfaces.gitlab.GitlabIssueHandler.reply_to_comment')
247
- @patch('httpx.post')
248
- @patch('subprocess.run')
249
- @patch('openhands.resolver.send_pull_request.LLM')
250
- def test_update_existing_pull_request(
251
- mock_llm_class,
252
- mock_subprocess_run,
253
- mock_requests_post,
254
- mock_reply_to_comment,
255
- ):
256
- # Arrange: Set up test data
257
- issue = Issue(
258
- owner='test-owner',
259
- repo='test-repo',
260
- number=1,
261
- title='Test PR',
262
- body='This is a test PR',
263
- thread_ids=['comment1', 'comment2'],
264
- head_branch='test-branch',
265
- )
266
- token = 'test-token'
267
- username = 'test-user'
268
- patch_dir = '/path/to/patch'
269
- additional_message = '["Fixed bug in function A", "Updated documentation for B"]'
270
-
271
- # Mock the subprocess.run call for git push
272
- mock_subprocess_run.return_value = MagicMock(returncode=0)
273
-
274
- # Mock the requests.post call for adding a PR comment
275
- mock_requests_post.return_value.status_code = 201
276
-
277
- # Mock LLM instance and completion call
278
- mock_llm_instance = MagicMock()
279
- mock_completion_response = MagicMock()
280
- mock_completion_response.choices = [
281
- MagicMock(message=MagicMock(content='This is an issue resolution.'))
282
- ]
283
- mock_llm_instance.completion.return_value = mock_completion_response
284
- mock_llm_class.return_value = mock_llm_instance
285
-
286
- llm_config = LLMConfig()
287
-
288
- # Act: Call the function without comment_message to test auto-generation
289
- result = update_existing_pull_request(
290
- issue,
291
- token,
292
- username,
293
- ProviderType.GITLAB,
294
- patch_dir,
295
- llm_config,
296
- comment_message=None,
297
- additional_message=additional_message,
298
- )
299
-
300
- # Assert: Check if the git push command was executed
301
- push_command = (
302
- f'git -C {patch_dir} push '
303
- f'https://{username}:{token}@gitlab.com/'
304
- f'{issue.owner}/{issue.repo}.git {issue.head_branch}'
305
- )
306
- mock_subprocess_run.assert_called_once_with(
307
- push_command, shell=True, capture_output=True, text=True
308
- )
309
-
310
- # Assert: Check if the auto-generated comment was posted to the PR
311
- comment_url = f'https://gitlab.com/api/v4/projects/{quote(f"{issue.owner}/{issue.repo}", safe="")}/issues/{issue.number}/notes'
312
- expected_comment = 'This is an issue resolution.'
313
- mock_requests_post.assert_called_once_with(
314
- comment_url,
315
- headers={
316
- 'Authorization': f'Bearer {token}',
317
- 'Accept': 'application/json',
318
- },
319
- json={'body': expected_comment},
320
- )
321
-
322
- # Assert: Check if the reply_to_comment function was called for each thread ID
323
- mock_reply_to_comment.assert_has_calls(
324
- [
325
- call(issue.number, 'comment1', 'Fixed bug in function A'),
326
- call(issue.number, 'comment2', 'Updated documentation for B'),
327
- ]
328
- )
329
-
330
- # Assert: Check the returned PR URL
331
- assert (
332
- result
333
- == f'https://gitlab.com/{issue.owner}/{issue.repo}/-/merge_requests/{issue.number}'
334
- )
335
-
336
-
337
- @pytest.mark.parametrize(
338
- 'pr_type,target_branch,pr_title',
339
- [
340
- ('branch', None, None),
341
- ('draft', None, None),
342
- ('ready', None, None),
343
- ('branch', 'feature', None),
344
- ('draft', 'develop', None),
345
- ('ready', 'staging', None),
346
- ('ready', None, 'Custom PR Title'),
347
- ('draft', 'develop', 'Another Custom Title'),
348
- ],
349
- )
350
- @patch('subprocess.run')
351
- @patch('httpx.post')
352
- @patch('httpx.get')
353
- def test_send_pull_request(
354
- mock_get,
355
- mock_post,
356
- mock_run,
357
- mock_issue,
358
- mock_llm_config,
359
- mock_output_dir,
360
- pr_type,
361
- target_branch,
362
- pr_title,
363
- ):
364
- repo_path = os.path.join(mock_output_dir, 'repo')
365
-
366
- # Mock API responses based on whether target_branch is specified
367
- if target_branch:
368
- mock_get.side_effect = [
369
- MagicMock(status_code=404), # Branch doesn't exist
370
- MagicMock(status_code=200), # Target branch exists
371
- MagicMock(json=lambda: {'default_branch': 'main'}), # Get default branch
372
- ]
373
- else:
374
- mock_get.side_effect = [
375
- MagicMock(status_code=404), # Branch doesn't exist
376
- MagicMock(json=lambda: {'default_branch': 'main'}), # Get default branch
377
- MagicMock(json=lambda: {'default_branch': 'main'}), # Get default branch
378
- ]
379
-
380
- mock_post.return_value.json.return_value = {
381
- 'web_url': 'https://gitlab.com/test-owner/test-repo/-/merge_requests/1',
382
- }
383
-
384
- # Mock subprocess.run calls
385
- mock_run.side_effect = [
386
- MagicMock(returncode=0), # git checkout -b
387
- MagicMock(returncode=0), # git push
388
- ]
389
-
390
- # Call the function
391
- result = send_pull_request(
392
- issue=mock_issue,
393
- token='test-token',
394
- username='test-user',
395
- platform=ProviderType.GITLAB,
396
- patch_dir=repo_path,
397
- pr_type=pr_type,
398
- target_branch=target_branch,
399
- pr_title=pr_title,
400
- )
401
-
402
- # Assert API calls
403
- expected_get_calls = 2
404
- if pr_type == 'branch':
405
- expected_get_calls = 3
406
-
407
- assert mock_get.call_count == expected_get_calls
408
-
409
- # Check branch creation and push
410
- assert mock_run.call_count == 2
411
- checkout_call, push_call = mock_run.call_args_list
412
-
413
- assert checkout_call == call(
414
- ['git', '-C', repo_path, 'checkout', '-b', 'openhands-fix-issue-42'],
415
- capture_output=True,
416
- text=True,
417
- )
418
- assert push_call == call(
419
- [
420
- 'git',
421
- '-C',
422
- repo_path,
423
- 'push',
424
- 'https://test-user:[email protected]/test-owner/test-repo.git',
425
- 'openhands-fix-issue-42',
426
- ],
427
- capture_output=True,
428
- text=True,
429
- )
430
-
431
- # Check PR creation based on pr_type
432
- if pr_type == 'branch':
433
- assert (
434
- result
435
- == 'https://gitlab.com/test-owner/test-repo/-/compare/main...openhands-fix-issue-42'
436
- )
437
- mock_post.assert_not_called()
438
- else:
439
- assert result == 'https://gitlab.com/test-owner/test-repo/-/merge_requests/1'
440
- mock_post.assert_called_once()
441
- post_data = mock_post.call_args[1]['json']
442
- expected_title = pr_title if pr_title else 'Fix issue #42: Test Issue'
443
- assert post_data['title'] == expected_title
444
- assert post_data['description'].startswith('This pull request fixes #42.')
445
- assert post_data['source_branch'] == 'openhands-fix-issue-42'
446
- assert post_data['target_branch'] == (
447
- target_branch if target_branch else 'main'
448
- )
449
- assert post_data['draft'] == (pr_type == 'draft')
450
-
451
-
452
- @patch('subprocess.run')
453
- @patch('httpx.post')
454
- @patch('httpx.put')
455
- @patch('httpx.get')
456
- def test_send_pull_request_with_reviewer(
457
- mock_get,
458
- mock_put,
459
- mock_post,
460
- mock_run,
461
- mock_issue,
462
- mock_output_dir,
463
- mock_llm_config,
464
- ):
465
- repo_path = os.path.join(mock_output_dir, 'repo')
466
- reviewer = 'test-reviewer'
467
-
468
- # Mock API responses
469
- mock_get.side_effect = [
470
- MagicMock(status_code=404), # Branch doesn't exist
471
- MagicMock(json=lambda: {'default_branch': 'main'}), # Get default branch
472
- MagicMock(json=lambda: [{'id': 123}]), # Get user data
473
- ]
474
-
475
- # Mock PR creation response
476
- mock_post.side_effect = [
477
- MagicMock(
478
- status_code=200,
479
- json=lambda: {
480
- 'web_url': 'https://gitlab.com/test-owner/test-repo/-/merge_requests/1',
481
- 'iid': 1,
482
- },
483
- ), # PR creation
484
- ]
485
-
486
- # Mock request reviwers response
487
- mock_put.side_effect = [
488
- MagicMock(status_code=200), # Reviewer request
489
- ]
490
-
491
- # Mock subprocess.run calls
492
- mock_run.side_effect = [
493
- MagicMock(returncode=0), # git checkout -b
494
- MagicMock(returncode=0), # git push
495
- ]
496
-
497
- # Call the function with reviewer
498
- result = send_pull_request(
499
- issue=mock_issue,
500
- token='test-token',
501
- username='test-user',
502
- platform=ProviderType.GITLAB,
503
- patch_dir=repo_path,
504
- pr_type='ready',
505
- reviewer=reviewer,
506
- )
507
-
508
- # Assert API calls
509
- assert mock_get.call_count == 3
510
- assert mock_post.call_count == 1
511
- assert mock_put.call_count == 1
512
-
513
- # Check PR creation
514
- pr_create_call = mock_post.call_args_list[0]
515
- assert pr_create_call[1]['json']['title'] == 'Fix issue #42: Test Issue'
516
-
517
- # Check reviewer request
518
- reviewer_request_call = mock_put.call_args_list[0]
519
- assert (
520
- reviewer_request_call[0][0]
521
- == 'https://gitlab.com/api/v4/projects/test-owner%2Ftest-repo/merge_requests/1'
522
- )
523
- assert reviewer_request_call[1]['json'] == {'reviewer_ids': [123]}
524
-
525
- # Check the result URL
526
- assert result == 'https://gitlab.com/test-owner/test-repo/-/merge_requests/1'
527
-
528
-
529
- @patch('httpx.get')
530
- def test_send_pull_request_invalid_target_branch(
531
- mock_get, mock_issue, mock_output_dir, mock_llm_config
532
- ):
533
- """Test that an error is raised when specifying a non-existent target branch"""
534
- repo_path = os.path.join(mock_output_dir, 'repo')
535
-
536
- # Mock API response for non-existent branch
537
- mock_get.side_effect = [
538
- MagicMock(status_code=404), # Branch doesn't exist
539
- MagicMock(status_code=404), # Target branch doesn't exist
540
- ]
541
-
542
- # Test that ValueError is raised when target branch doesn't exist
543
- with pytest.raises(
544
- ValueError, match='Target branch nonexistent-branch does not exist'
545
- ):
546
- send_pull_request(
547
- issue=mock_issue,
548
- token='test-token',
549
- username='test-user',
550
- platform=ProviderType.GITLAB,
551
- patch_dir=repo_path,
552
- pr_type='ready',
553
- target_branch='nonexistent-branch',
554
- )
555
-
556
- # Verify API calls
557
- assert mock_get.call_count == 2
558
-
559
-
560
- @patch('subprocess.run')
561
- @patch('httpx.post')
562
- @patch('httpx.get')
563
- def test_send_pull_request_git_push_failure(
564
- mock_get, mock_post, mock_run, mock_issue, mock_output_dir, mock_llm_config
565
- ):
566
- repo_path = os.path.join(mock_output_dir, 'repo')
567
-
568
- # Mock API responses
569
- mock_get.return_value = MagicMock(json=lambda: {'default_branch': 'main'})
570
-
571
- # Mock the subprocess.run calls
572
- mock_run.side_effect = [
573
- MagicMock(returncode=0), # git checkout -b
574
- MagicMock(returncode=1, stderr='Error: failed to push some refs'), # git push
575
- ]
576
-
577
- # Test that RuntimeError is raised when git push fails
578
- with pytest.raises(
579
- RuntimeError, match='Failed to push changes to the remote repository'
580
- ):
581
- send_pull_request(
582
- issue=mock_issue,
583
- token='test-token',
584
- username='test-user',
585
- platform=ProviderType.GITLAB,
586
- patch_dir=repo_path,
587
- pr_type='ready',
588
- )
589
-
590
- # Assert that subprocess.run was called twice
591
- assert mock_run.call_count == 2
592
-
593
- # Check the git checkout -b command
594
- checkout_call = mock_run.call_args_list[0]
595
- assert checkout_call[0][0] == [
596
- 'git',
597
- '-C',
598
- repo_path,
599
- 'checkout',
600
- '-b',
601
- 'openhands-fix-issue-42',
602
- ]
603
-
604
- # Check the git push command
605
- push_call = mock_run.call_args_list[1]
606
- assert push_call[0][0] == [
607
- 'git',
608
- '-C',
609
- repo_path,
610
- 'push',
611
- 'https://test-user:[email protected]/test-owner/test-repo.git',
612
- 'openhands-fix-issue-42',
613
- ]
614
-
615
- # Assert that no pull request was created
616
- mock_post.assert_not_called()
617
-
618
-
619
- @patch('subprocess.run')
620
- @patch('httpx.post')
621
- @patch('httpx.get')
622
- def test_send_pull_request_permission_error(
623
- mock_get, mock_post, mock_run, mock_issue, mock_output_dir, mock_llm_config
624
- ):
625
- repo_path = os.path.join(mock_output_dir, 'repo')
626
-
627
- # Mock API responses
628
- mock_get.return_value = MagicMock(json=lambda: {'default_branch': 'main'})
629
- mock_post.return_value.status_code = 403
630
-
631
- # Mock subprocess.run calls
632
- mock_run.side_effect = [
633
- MagicMock(returncode=0), # git checkout -b
634
- MagicMock(returncode=0), # git push
635
- ]
636
-
637
- # Test that RuntimeError is raised when PR creation fails due to permissions
638
- with pytest.raises(
639
- RuntimeError, match='Failed to create pull request due to missing permissions.'
640
- ):
641
- send_pull_request(
642
- issue=mock_issue,
643
- token='test-token',
644
- username='test-user',
645
- platform=ProviderType.GITLAB,
646
- patch_dir=repo_path,
647
- pr_type='ready',
648
- )
649
-
650
- # Assert that the branch was created and pushed
651
- assert mock_run.call_count == 2
652
- mock_post.assert_called_once()
653
-
654
-
655
- @patch('httpx.post')
656
- @patch('httpx.get')
657
- def test_reply_to_comment(mock_get, mock_post, mock_issue):
658
- # Arrange: set up the test data
659
- token = 'test_token'
660
- comment_id = 'GID/test_comment_id'
661
- reply = 'This is a test reply.'
662
-
663
- # Create an instance of GitlabIssueHandler
664
- handler = GitlabIssueHandler(
665
- owner='test-owner', repo='test-repo', token=token, username='test-user'
666
- )
667
-
668
- mock_get.return_value = MagicMock(
669
- json=lambda: {
670
- 'notes': [
671
- {
672
- 'id': 123,
673
- }
674
- ]
675
- }
676
- )
677
-
678
- # Mock the response from the GraphQL API
679
- mock_response = MagicMock()
680
- mock_response.status_code = 200
681
- mock_response.json.return_value = {
682
- 'id': 123,
683
- 'body': 'Openhands fix success summary\n\n\nThis is a test reply.',
684
- 'createdAt': '2024-10-01T12:34:56Z',
685
- }
686
-
687
- mock_post.return_value = mock_response
688
-
689
- # Act: call the function
690
- handler.reply_to_comment(mock_issue.number, comment_id, reply)
691
-
692
- # Assert: check that the POST request was made with the correct parameters
693
- data = {
694
- 'body': 'Openhands fix success summary\n\n\nThis is a test reply.',
695
- 'note_id': 123,
696
- }
697
-
698
- # Check that the correct request was made to the API
699
- mock_post.assert_called_once_with(
700
- f'https://gitlab.com/api/v4/projects/{quote(f"{mock_issue.owner}/{mock_issue.repo}", safe="")}/merge_requests/{mock_issue.number}/discussions/{comment_id.split("/")[-1]}/notes',
701
- headers={
702
- 'Authorization': f'Bearer {token}',
703
- 'Accept': 'application/json',
704
- },
705
- json=data,
706
- )
707
-
708
- # Check that the response status was checked (via response.raise_for_status)
709
- mock_response.raise_for_status.assert_called_once()
710
-
711
-
712
- @patch('openhands.resolver.send_pull_request.initialize_repo')
713
- @patch('openhands.resolver.send_pull_request.apply_patch')
714
- @patch('openhands.resolver.send_pull_request.update_existing_pull_request')
715
- @patch('openhands.resolver.send_pull_request.make_commit')
716
- def test_process_single_pr_update(
717
- mock_make_commit,
718
- mock_update_existing_pull_request,
719
- mock_apply_patch,
720
- mock_initialize_repo,
721
- mock_output_dir,
722
- mock_llm_config,
723
- ):
724
- # Initialize test data
725
- token = 'test_token'
726
- username = 'test_user'
727
- pr_type = 'draft'
728
-
729
- resolver_output = ResolverOutput(
730
- issue=Issue(
731
- owner='test-owner',
732
- repo='test-repo',
733
- number=1,
734
- title='Issue 1',
735
- body='Body 1',
736
- closing_issues=[],
737
- review_threads=[
738
- ReviewThread(comment='review comment for feedback', files=[])
739
- ],
740
- thread_ids=['1'],
741
- head_branch='branch 1',
742
- ),
743
- issue_type='pr',
744
- instruction='Test instruction 1',
745
- base_commit='def456',
746
- git_patch='Test patch 1',
747
- history=[],
748
- metrics={},
749
- success=True,
750
- comment_success=None,
751
- result_explanation='[Test success 1]',
752
- error=None,
753
- )
754
-
755
- mock_update_existing_pull_request.return_value = (
756
- 'https://gitlab.com/test-owner/test-repo/-/merge_requests/1'
757
- )
758
- mock_initialize_repo.return_value = f'{mock_output_dir}/patches/pr_1'
759
-
760
- process_single_issue(
761
- mock_output_dir,
762
- resolver_output,
763
- token,
764
- username,
765
- ProviderType.GITLAB,
766
- pr_type,
767
- mock_llm_config,
768
- None,
769
- False,
770
- None,
771
- )
772
-
773
- mock_initialize_repo.assert_called_once_with(mock_output_dir, 1, 'pr', 'branch 1')
774
- mock_apply_patch.assert_called_once_with(
775
- f'{mock_output_dir}/patches/pr_1', resolver_output.git_patch
776
- )
777
- mock_make_commit.assert_called_once_with(
778
- f'{mock_output_dir}/patches/pr_1', resolver_output.issue, 'pr'
779
- )
780
- mock_update_existing_pull_request.assert_called_once_with(
781
- issue=resolver_output.issue,
782
- token=token,
783
- username=username,
784
- platform=ProviderType.GITLAB,
785
- patch_dir=f'{mock_output_dir}/patches/pr_1',
786
- additional_message='[Test success 1]',
787
- llm_config=mock_llm_config,
788
- base_domain='gitlab.com',
789
- )
790
-
791
-
792
- @patch('openhands.resolver.send_pull_request.initialize_repo')
793
- @patch('openhands.resolver.send_pull_request.apply_patch')
794
- @patch('openhands.resolver.send_pull_request.send_pull_request')
795
- @patch('openhands.resolver.send_pull_request.make_commit')
796
- def test_process_single_issue(
797
- mock_make_commit,
798
- mock_send_pull_request,
799
- mock_apply_patch,
800
- mock_initialize_repo,
801
- mock_output_dir,
802
- mock_llm_config,
803
- ):
804
- # Initialize test data
805
- token = 'test_token'
806
- username = 'test_user'
807
- pr_type = 'draft'
808
- platform = ProviderType.GITLAB
809
-
810
- resolver_output = ResolverOutput(
811
- issue=Issue(
812
- owner='test-owner',
813
- repo='test-repo',
814
- number=1,
815
- title='Issue 1',
816
- body='Body 1',
817
- ),
818
- issue_type='issue',
819
- instruction='Test instruction 1',
820
- base_commit='def456',
821
- git_patch='Test patch 1',
822
- history=[],
823
- metrics={},
824
- success=True,
825
- comment_success=None,
826
- result_explanation='Test success 1',
827
- error=None,
828
- )
829
-
830
- # Mock return value
831
- mock_send_pull_request.return_value = (
832
- 'https://gitlab.com/test-owner/test-repo/-/merge_requests/1'
833
- )
834
- mock_initialize_repo.return_value = f'{mock_output_dir}/patches/issue_1'
835
-
836
- # Call the function
837
- process_single_issue(
838
- mock_output_dir,
839
- resolver_output,
840
- token,
841
- username,
842
- platform,
843
- pr_type,
844
- mock_llm_config,
845
- None,
846
- False,
847
- None,
848
- )
849
-
850
- # Assert that the mocked functions were called with correct arguments
851
- mock_initialize_repo.assert_called_once_with(mock_output_dir, 1, 'issue', 'def456')
852
- mock_apply_patch.assert_called_once_with(
853
- f'{mock_output_dir}/patches/issue_1', resolver_output.git_patch
854
- )
855
- mock_make_commit.assert_called_once_with(
856
- f'{mock_output_dir}/patches/issue_1', resolver_output.issue, 'issue'
857
- )
858
- mock_send_pull_request.assert_called_once_with(
859
- issue=resolver_output.issue,
860
- token=token,
861
- username=username,
862
- platform=platform,
863
- patch_dir=f'{mock_output_dir}/patches/issue_1',
864
- pr_type=pr_type,
865
- fork_owner=None,
866
- additional_message=resolver_output.result_explanation,
867
- target_branch=None,
868
- reviewer=None,
869
- pr_title=None,
870
- base_domain='gitlab.com',
871
- )
872
-
873
-
874
- @patch('openhands.resolver.send_pull_request.initialize_repo')
875
- @patch('openhands.resolver.send_pull_request.apply_patch')
876
- @patch('openhands.resolver.send_pull_request.send_pull_request')
877
- @patch('openhands.resolver.send_pull_request.make_commit')
878
- def test_process_single_issue_unsuccessful(
879
- mock_make_commit,
880
- mock_send_pull_request,
881
- mock_apply_patch,
882
- mock_initialize_repo,
883
- mock_output_dir,
884
- mock_llm_config,
885
- ):
886
- # Initialize test data
887
- token = 'test_token'
888
- username = 'test_user'
889
- pr_type = 'draft'
890
-
891
- resolver_output = ResolverOutput(
892
- issue=Issue(
893
- owner='test-owner',
894
- repo='test-repo',
895
- number=1,
896
- title='Issue 1',
897
- body='Body 1',
898
- ),
899
- issue_type='issue',
900
- instruction='Test instruction 1',
901
- base_commit='def456',
902
- git_patch='Test patch 1',
903
- history=[],
904
- metrics={},
905
- success=False,
906
- comment_success=None,
907
- result_explanation='',
908
- error='Test error',
909
- )
910
-
911
- # Call the function
912
- process_single_issue(
913
- mock_output_dir,
914
- resolver_output,
915
- token,
916
- username,
917
- ProviderType.GITLAB,
918
- pr_type,
919
- mock_llm_config,
920
- None,
921
- False,
922
- None,
923
- )
924
-
925
- # Assert that none of the mocked functions were called
926
- mock_initialize_repo.assert_not_called()
927
- mock_apply_patch.assert_not_called()
928
- mock_make_commit.assert_not_called()
929
- mock_send_pull_request.assert_not_called()
930
-
931
-
932
- @patch('httpx.get')
933
- @patch('subprocess.run')
934
- def test_send_pull_request_branch_naming(
935
- mock_run, mock_get, mock_issue, mock_output_dir, mock_llm_config
936
- ):
937
- repo_path = os.path.join(mock_output_dir, 'repo')
938
-
939
- # Mock API responses
940
- mock_get.side_effect = [
941
- MagicMock(status_code=200), # First branch exists
942
- MagicMock(status_code=200), # Second branch exists
943
- MagicMock(status_code=404), # Third branch doesn't exist
944
- MagicMock(json=lambda: {'default_branch': 'main'}), # Get default branch
945
- MagicMock(json=lambda: {'default_branch': 'main'}), # Get default branch
946
- ]
947
-
948
- # Mock subprocess.run calls
949
- mock_run.side_effect = [
950
- MagicMock(returncode=0), # git checkout -b
951
- MagicMock(returncode=0), # git push
952
- ]
953
-
954
- # Call the function
955
- result = send_pull_request(
956
- issue=mock_issue,
957
- token='test-token',
958
- username='test-user',
959
- platform=ProviderType.GITLAB,
960
- patch_dir=repo_path,
961
- pr_type='branch',
962
- )
963
-
964
- # Assert API calls
965
- assert mock_get.call_count == 5
966
-
967
- # Check branch creation and push
968
- assert mock_run.call_count == 2
969
- checkout_call, push_call = mock_run.call_args_list
970
-
971
- assert checkout_call == call(
972
- ['git', '-C', repo_path, 'checkout', '-b', 'openhands-fix-issue-42-try3'],
973
- capture_output=True,
974
- text=True,
975
- )
976
- assert push_call == call(
977
- [
978
- 'git',
979
- '-C',
980
- repo_path,
981
- 'push',
982
- 'https://test-user:[email protected]/test-owner/test-repo.git',
983
- 'openhands-fix-issue-42-try3',
984
- ],
985
- capture_output=True,
986
- text=True,
987
- )
988
-
989
- # Check the result
990
- assert (
991
- result
992
- == 'https://gitlab.com/test-owner/test-repo/-/compare/main...openhands-fix-issue-42-try3'
993
- )
994
-
995
-
996
- @patch('openhands.resolver.send_pull_request.argparse.ArgumentParser')
997
- @patch('openhands.resolver.send_pull_request.process_single_issue')
998
- @patch('openhands.resolver.send_pull_request.load_single_resolver_output')
999
- @patch('openhands.resolver.send_pull_request.identify_token')
1000
- @patch('os.path.exists')
1001
- @patch('os.getenv')
1002
- def test_main(
1003
- mock_getenv,
1004
- mock_path_exists,
1005
- mock_identify_token,
1006
- mock_load_single_resolver_output,
1007
- mock_process_single_issue,
1008
- mock_parser,
1009
- ):
1010
- # Setup mock parser
1011
- mock_args = MagicMock()
1012
- mock_args.token = None
1013
- mock_args.username = 'mock_username'
1014
- mock_args.output_dir = '/mock/output'
1015
- mock_args.pr_type = 'draft'
1016
- mock_args.issue_number = '42'
1017
- mock_args.fork_owner = None
1018
- mock_args.send_on_failure = False
1019
- mock_args.llm_model = 'mock_model'
1020
- mock_args.llm_base_url = 'mock_url'
1021
- mock_args.llm_api_key = 'mock_key'
1022
- mock_args.target_branch = None
1023
- mock_args.reviewer = None
1024
- mock_args.pr_title = None
1025
- mock_args.selected_repo = None
1026
- mock_parser.return_value.parse_args.return_value = mock_args
1027
-
1028
- # Setup environment variables
1029
- mock_getenv.side_effect = (
1030
- lambda key, default=None: 'mock_token' if key == 'GITLAB_TOKEN' else default
1031
- )
1032
-
1033
- # Setup path exists
1034
- mock_path_exists.return_value = True
1035
-
1036
- # Setup mock resolver output
1037
- mock_resolver_output = MagicMock()
1038
- mock_load_single_resolver_output.return_value = mock_resolver_output
1039
-
1040
- mock_identify_token.return_value = ProviderType.GITLAB
1041
-
1042
- # Run main function
1043
- main()
1044
-
1045
- mock_identify_token.assert_called_with('mock_token', mock_args.base_domain)
1046
-
1047
- llm_config = LLMConfig(
1048
- model=mock_args.llm_model,
1049
- base_url=mock_args.llm_base_url,
1050
- api_key=mock_args.llm_api_key,
1051
- )
1052
-
1053
- # Use any_call instead of assert_called_with for more flexible matching
1054
- assert mock_process_single_issue.call_args == call(
1055
- '/mock/output',
1056
- mock_resolver_output,
1057
- 'mock_token',
1058
- 'mock_username',
1059
- ProviderType.GITLAB,
1060
- 'draft',
1061
- llm_config,
1062
- None,
1063
- False,
1064
- mock_args.target_branch,
1065
- mock_args.reviewer,
1066
- mock_args.pr_title,
1067
- ANY,
1068
- )
1069
-
1070
- # Other assertions
1071
- mock_parser.assert_called_once()
1072
- mock_getenv.assert_any_call('GITLAB_TOKEN')
1073
- mock_path_exists.assert_called_with('/mock/output')
1074
- mock_load_single_resolver_output.assert_called_with('/mock/output/output.jsonl', 42)
1075
-
1076
- # Test for invalid issue number
1077
- mock_args.issue_number = 'invalid'
1078
- with pytest.raises(ValueError):
1079
- main()
1080
-
1081
- # Test for invalid token
1082
- mock_args.issue_number = '42' # Reset to valid issue number
1083
- mock_getenv.side_effect = (
1084
- lambda key, default=None: None
1085
- ) # Return None for all env vars
1086
- with pytest.raises(ValueError, match='token is not set'):
1087
- main()
1088
-
1089
-
1090
- @patch('subprocess.run')
1091
- def test_make_commit_escapes_issue_title(mock_subprocess_run):
1092
- # Setup
1093
- repo_dir = '/path/to/repo'
1094
- issue = Issue(
1095
- owner='test-owner',
1096
- repo='test-repo',
1097
- number=42,
1098
- title='Issue with "quotes" and $pecial characters',
1099
- body='Test body',
1100
- )
1101
-
1102
- # Mock subprocess.run to return success for all calls
1103
- mock_subprocess_run.return_value = MagicMock(
1104
- returncode=0, stdout='sample output', stderr=''
1105
- )
1106
-
1107
- # Call the function
1108
- issue_type = 'issue'
1109
- make_commit(repo_dir, issue, issue_type)
1110
-
1111
- # Assert that subprocess.run was called with the correct arguments
1112
- calls = mock_subprocess_run.call_args_list
1113
- assert len(calls) == 4 # git config check, git add, git commit
1114
-
1115
- # Check the git commit call
1116
- git_commit_call = calls[3][0][0]
1117
- expected_commit_message = (
1118
- 'Fix issue #42: Issue with "quotes" and $pecial characters'
1119
- )
1120
- assert [
1121
- 'git',
1122
- '-C',
1123
- '/path/to/repo',
1124
- 'commit',
1125
- '-m',
1126
- expected_commit_message,
1127
- ] == git_commit_call
1128
-
1129
-
1130
- @patch('subprocess.run')
1131
- def test_make_commit_no_changes(mock_subprocess_run):
1132
- # Setup
1133
- repo_dir = '/path/to/repo'
1134
- issue = Issue(
1135
- owner='test-owner',
1136
- repo='test-repo',
1137
- number=42,
1138
- title='Issue with no changes',
1139
- body='Test body',
1140
- )
1141
-
1142
- # Mock subprocess.run to simulate no changes in the repo
1143
- mock_subprocess_run.side_effect = [
1144
- MagicMock(returncode=0),
1145
- MagicMock(returncode=0),
1146
- MagicMock(returncode=1, stdout=''), # git status --porcelain (no changes)
1147
- ]
1148
-
1149
- with pytest.raises(
1150
- RuntimeError, match='ERROR: Openhands failed to make code changes.'
1151
- ):
1152
- make_commit(repo_dir, issue, 'issue')
1153
-
1154
- # Check that subprocess.run was called for checking git status and add, but not commit
1155
- assert mock_subprocess_run.call_count == 3
1156
- git_status_call = mock_subprocess_run.call_args_list[2][0][0]
1157
- assert f'git -C {repo_dir} status --porcelain' in git_status_call
1158
-
1159
-
1160
- def test_apply_patch_rename_directory(mock_output_dir):
1161
- # Create a sample directory structure
1162
- old_dir = os.path.join(mock_output_dir, 'prompts', 'resolve')
1163
- os.makedirs(old_dir)
1164
-
1165
- # Create test files
1166
- test_files = [
1167
- 'issue-success-check.jinja',
1168
- 'pr-feedback-check.jinja',
1169
- 'pr-thread-check.jinja',
1170
- ]
1171
- for filename in test_files:
1172
- file_path = os.path.join(old_dir, filename)
1173
- with open(file_path, 'w') as f:
1174
- f.write(f'Content of {filename}')
1175
-
1176
- # Create a patch that renames the directory
1177
- patch_content = """diff --git a/prompts/resolve/issue-success-check.jinja b/prompts/guess_success/issue-success-check.jinja
1178
- similarity index 100%
1179
- rename from prompts/resolve/issue-success-check.jinja
1180
- rename to prompts/guess_success/issue-success-check.jinja
1181
- diff --git a/prompts/resolve/pr-feedback-check.jinja b/prompts/guess_success/pr-feedback-check.jinja
1182
- similarity index 100%
1183
- rename from prompts/resolve/pr-feedback-check.jinja
1184
- rename to prompts/guess_success/pr-feedback-check.jinja
1185
- diff --git a/prompts/resolve/pr-thread-check.jinja b/prompts/guess_success/pr-thread-check.jinja
1186
- similarity index 100%
1187
- rename from prompts/resolve/pr-thread-check.jinja
1188
- rename to prompts/guess_success/pr-thread-check.jinja"""
1189
-
1190
- # Apply the patch
1191
- apply_patch(mock_output_dir, patch_content)
1192
-
1193
- # Check if files were moved correctly
1194
- new_dir = os.path.join(mock_output_dir, 'prompts', 'guess_success')
1195
- assert not os.path.exists(old_dir), 'Old directory still exists'
1196
- assert os.path.exists(new_dir), 'New directory was not created'
1197
-
1198
- # Check if all files were moved and content preserved
1199
- for filename in test_files:
1200
- old_path = os.path.join(old_dir, filename)
1201
- new_path = os.path.join(new_dir, filename)
1202
- assert not os.path.exists(old_path), f'Old file {filename} still exists'
1203
- assert os.path.exists(new_path), f'New file {filename} was not created'
1204
- with open(new_path, 'r') as f:
1205
- content = f.read()
1206
- assert content == f'Content of {filename}', f'Content mismatch for {filename}'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/unit/resolver/mock_output/output.jsonl DELETED
The diff for this file is too large to render. See raw diff
 
tests/unit/resolver/mock_output/repo/src/App.css DELETED
@@ -1,42 +0,0 @@
1
- #root {
2
- max-width: 1280px;
3
- margin: 0 auto;
4
- padding: 2rem;
5
- text-align: center;
6
- }
7
-
8
- .logo {
9
- height: 6em;
10
- padding: 1.5em;
11
- will-change: filter;
12
- transition: filter 300ms;
13
- }
14
- .logo:hover {
15
- filter: drop-shadow(0 0 2em #646cffaa);
16
- }
17
- .logo.react:hover {
18
- filter: drop-shadow(0 0 2em #61dafbaa);
19
- }
20
-
21
- @keyframes logo-spin {
22
- from {
23
- transform: rotate(0deg);
24
- }
25
- to {
26
- transform: rotate(360deg);
27
- }
28
- }
29
-
30
- @media (prefers-reduced-motion: no-preference) {
31
- a:nth-of-type(2) .logo {
32
- animation: logo-spin infinite 20s linear;
33
- }
34
- }
35
-
36
- .card {
37
- padding: 2em;
38
- }
39
-
40
- .read-the-docs {
41
- color: #888;
42
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/unit/resolver/mock_output/repo/src/App.tsx DELETED
@@ -1,14 +0,0 @@
1
-
2
- import React from 'react'
3
- import './App.css'
4
- import PullRequestViewer from './PullRequestViewer'
5
-
6
- function App() {
7
- return (
8
- <div className="App">
9
- <PullRequestViewer />
10
- </div>
11
- )
12
- }
13
-
14
- export default App
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/unit/resolver/mock_output/repo/src/PullRequestViewer.test.tsx DELETED
@@ -1,19 +0,0 @@
1
-
2
-
3
- import React from 'react';
4
- import { render, screen } from '@testing-library/react';
5
- import PullRequestViewer from './PullRequestViewer';
6
-
7
- describe('PullRequestViewer', () => {
8
- it('renders the component title', () => {
9
- render(<PullRequestViewer />);
10
- const titleElement = screen.getByText(/Pull Request Viewer/i);
11
- expect(titleElement).toBeInTheDocument();
12
- });
13
-
14
- it('renders the repository select dropdown', () => {
15
- render(<PullRequestViewer />);
16
- const selectElement = screen.getByRole('combobox', { name: /select a repository/i });
17
- expect(selectElement).toBeInTheDocument();
18
- });
19
- });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/unit/resolver/mock_output/repo/src/PullRequestViewer.tsx DELETED
@@ -1,112 +0,0 @@
1
-
2
-
3
-
4
- import React, { useState, useEffect } from 'react';
5
- import axios from 'axios';
6
- import { Octokit } from '@octokit/rest';
7
- import Select from 'react-select';
8
-
9
- const octokit = new Octokit({ auth: import.meta.env.VITE_GITHUB_TOKEN });
10
-
11
- interface PullRequest {
12
- title: string;
13
- html_url: string;
14
- user: {
15
- login: string;
16
- };
17
- }
18
-
19
- interface Repo {
20
- value: string;
21
- label: string;
22
- }
23
-
24
- const PullRequestViewer: React.FC = () => {
25
- const [repos, setRepos] = useState<Repo[]>([]);
26
- const [selectedRepo, setSelectedRepo] = useState<Repo | null>(null);
27
- const [pullRequests, setPullRequests] = useState<PullRequest[]>([]);
28
-
29
- useEffect(() => {
30
- const fetchRepos = async () => {
31
- try {
32
- const response = await octokit.repos.listForOrg({
33
- org: 'OpenDevin',
34
- type: 'all',
35
- });
36
- const repoOptions = response.data.map(repo => ({
37
- value: repo.name,
38
- label: repo.name,
39
- }));
40
- setRepos(repoOptions);
41
- } catch (error) {
42
- console.error('Error fetching repos:', error);
43
- }
44
- };
45
- fetchRepos();
46
- }, []);
47
-
48
- useEffect(() => {
49
- const fetchPullRequests = async () => {
50
- if (selectedRepo) {
51
- try {
52
- let allPullRequests: PullRequest[] = [];
53
- let page = 1;
54
- let hasNextPage = true;
55
-
56
- while (hasNextPage) {
57
- const response = await octokit.pulls.list({
58
- owner: 'OpenDevin',
59
- repo: selectedRepo.value,
60
- state: 'open',
61
- per_page: 100,
62
- page: page,
63
- });
64
-
65
- allPullRequests = [...allPullRequests, ...response.data];
66
-
67
- if (response.data.length < 100) {
68
- hasNextPage = false;
69
- } else {
70
- page++;
71
- }
72
- }
73
-
74
- setPullRequests(allPullRequests);
75
- } catch (error) {
76
- console.error('Error fetching pull requests:', error);
77
- }
78
- }
79
- };
80
- fetchPullRequests();
81
- }, [selectedRepo]);
82
-
83
- return (
84
- <div>
85
- <h1>Pull Request Viewer</h1>
86
- <Select
87
- options={repos}
88
- value={selectedRepo}
89
- onChange={(option) => setSelectedRepo(option as Repo)}
90
- placeholder="Select a repository"
91
- aria-label="Select a repository"
92
- />
93
- {pullRequests.length > 0 ? (
94
- <ul>
95
- {pullRequests.map((pr) => (
96
- <li key={pr.html_url}>
97
- <a href={pr.html_url} target="_blank" rel="noopener noreferrer">
98
- {pr.title}
99
- </a>
100
- {' by '}
101
- {pr.user.login}
102
- </li>
103
- ))}
104
- </ul>
105
- ) : (
106
- <p>No open pull requests found.</p>
107
- )}
108
- </div>
109
- );
110
- };
111
-
112
- export default PullRequestViewer;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/unit/resolver/test_issue_handler_factory.py DELETED
@@ -1,77 +0,0 @@
1
- from typing import Type
2
- from unittest.mock import MagicMock
3
-
4
- import pytest
5
- from pydantic import SecretStr
6
-
7
- from openhands.core.config import LLMConfig
8
- from openhands.integrations.provider import ProviderType
9
- from openhands.resolver.interfaces.github import GithubIssueHandler, GithubPRHandler
10
- from openhands.resolver.interfaces.gitlab import GitlabIssueHandler, GitlabPRHandler
11
- from openhands.resolver.issue_handler_factory import IssueHandlerFactory
12
- from openhands.resolver.interfaces.issue_definitions import (
13
- ServiceContextIssue,
14
- ServiceContextPR,
15
- )
16
-
17
-
18
- @pytest.fixture
19
- def llm_config():
20
- return LLMConfig(
21
- model='test-model',
22
- api_key=SecretStr('test-key'),
23
- )
24
-
25
-
26
- @pytest.fixture
27
- def factory_params(llm_config):
28
- return {
29
- 'owner': 'test-owner',
30
- 'repo': 'test-repo',
31
- 'token': 'test-token',
32
- 'username': 'test-user',
33
- 'base_domain': 'github.com',
34
- 'llm_config': llm_config,
35
- }
36
-
37
-
38
- test_cases = [
39
- # platform, issue_type, expected_context_type, expected_handler_type
40
- (ProviderType.GITHUB, 'issue', ServiceContextIssue, GithubIssueHandler),
41
- (ProviderType.GITHUB, 'pr', ServiceContextPR, GithubPRHandler),
42
- (ProviderType.GITLAB, 'issue', ServiceContextIssue, GitlabIssueHandler),
43
- (ProviderType.GITLAB, 'pr', ServiceContextPR, GitlabPRHandler),
44
- ]
45
-
46
-
47
- @pytest.mark.parametrize(
48
- 'platform,issue_type,expected_context_type,expected_handler_type',
49
- test_cases
50
- )
51
- def test_handler_creation(
52
- factory_params,
53
- platform: ProviderType,
54
- issue_type: str,
55
- expected_context_type: Type,
56
- expected_handler_type: Type,
57
- ):
58
- factory = IssueHandlerFactory(
59
- **factory_params,
60
- platform=platform,
61
- issue_type=issue_type
62
- )
63
-
64
- handler = factory.create()
65
-
66
- assert isinstance(handler, expected_context_type)
67
- assert isinstance(handler._strategy, expected_handler_type)
68
-
69
- def test_invalid_issue_type(factory_params):
70
- factory = IssueHandlerFactory(
71
- **factory_params,
72
- platform=ProviderType.GITHUB,
73
- issue_type='invalid'
74
- )
75
-
76
- with pytest.raises(ValueError, match='Invalid issue type: invalid'):
77
- factory.create()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/unit/resolver/test_issue_references.py DELETED
@@ -1,56 +0,0 @@
1
- from openhands.resolver.utils import extract_issue_references
2
-
3
-
4
- def test_extract_issue_references():
5
- # Test basic issue reference
6
- assert extract_issue_references('Fixes #123') == [123]
7
-
8
- # Test multiple issue references
9
- assert extract_issue_references('Fixes #123, #456') == [123, 456]
10
-
11
- # Test issue references in code blocks should be ignored
12
- assert extract_issue_references("""
13
- Here's a code block:
14
- ```python
15
- # This is a comment with #123
16
- def func():
17
- pass # Another #456
18
- ```
19
- But this #789 should be extracted
20
- """) == [789]
21
-
22
- # Test issue references in inline code should be ignored
23
- assert extract_issue_references(
24
- 'This `#123` should be ignored but #456 should be extracted'
25
- ) == [456]
26
- assert extract_issue_references(
27
- 'This `#123` should be ignored but #456 should be extracted'
28
- ) == [456]
29
-
30
- # Test issue references in URLs should be ignored
31
- assert extract_issue_references(
32
- 'Check http://example.com/#123 but #456 should be extracted'
33
- ) == [456]
34
- assert extract_issue_references(
35
- 'Check http://example.com/#123 but #456 should be extracted'
36
- ) == [456]
37
-
38
- # Test issue references in markdown links should be extracted
39
- assert extract_issue_references('[Link to #123](http://example.com) and #456') == [
40
- 123,
41
- 456,
42
- ]
43
- assert extract_issue_references('[Link to #123](http://example.com) and #456') == [
44
- 123,
45
- 456,
46
- ]
47
-
48
- # Test issue references with text around them
49
- assert extract_issue_references('Issue #123 is fixed and #456 is pending') == [
50
- 123,
51
- 456,
52
- ]
53
- assert extract_issue_references('Issue #123 is fixed and #456 is pending') == [
54
- 123,
55
- 456,
56
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/unit/resolver/test_patch_apply.py DELETED
@@ -1,47 +0,0 @@
1
- from openhands.resolver.patching.apply import apply_diff
2
- from openhands.resolver.patching.patch import diffobj, parse_diff
3
-
4
-
5
- def test_patch_apply_with_empty_lines():
6
- # The original file has no indentation and uses \n line endings
7
- original_content = '# PR Viewer\n\nThis React application allows you to view open pull requests from GitHub repositories in a GitHub organization. By default, it uses the All-Hands-AI organization.\n\n## Setup'
8
-
9
- # The patch has spaces at the start of each line and uses \n line endings
10
- patch = """diff --git a/README.md b/README.md
11
- index b760a53..5071727 100644
12
- --- a/README.md
13
- +++ b/README.md
14
- @@ -1,3 +1,3 @@
15
- # PR Viewer
16
-
17
- -This React application allows you to view open pull requests from GitHub repositories in a GitHub organization. By default, it uses the All-Hands-AI organization.
18
- +This React application was created by Graham Neubig and OpenHands. It allows you to view open pull requests from GitHub repositories in a GitHub organization. By default, it uses the All-Hands-AI organization."""
19
-
20
- print('Original content lines:')
21
- for i, line in enumerate(original_content.splitlines(), 1):
22
- print(f'{i}: {repr(line)}')
23
-
24
- print('\nPatch lines:')
25
- for i, line in enumerate(patch.splitlines(), 1):
26
- print(f'{i}: {repr(line)}')
27
-
28
- changes = parse_diff(patch)
29
- print('\nParsed changes:')
30
- for change in changes:
31
- print(
32
- f'Change(old={change.old}, new={change.new}, line={repr(change.line)}, hunk={change.hunk})'
33
- )
34
- diff = diffobj(header=None, changes=changes, text=patch)
35
-
36
- # Apply the patch
37
- result = apply_diff(diff, original_content)
38
-
39
- # The patch should be applied successfully
40
- expected_result = [
41
- '# PR Viewer',
42
- '',
43
- 'This React application was created by Graham Neubig and OpenHands. It allows you to view open pull requests from GitHub repositories in a GitHub organization. By default, it uses the All-Hands-AI organization.',
44
- '',
45
- '## Setup',
46
- ]
47
- assert result == expected_result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/unit/resolver/test_resolve_issue.py DELETED
@@ -1,171 +0,0 @@
1
- from unittest import mock
2
-
3
- import pytest
4
-
5
- from openhands.core.config import SandboxConfig,OpenHandsConfig
6
- from openhands.events.action import CmdRunAction
7
- from openhands.resolver.issue_resolver import IssueResolver
8
-
9
-
10
- def assert_sandbox_config(
11
- config: SandboxConfig,
12
- base_container_image=SandboxConfig.model_fields['base_container_image'].default,
13
- runtime_container_image='ghcr.io/all-hands-ai/runtime:mock-nikolaik', # Default to mock version
14
- local_runtime_url=SandboxConfig.model_fields['local_runtime_url'].default,
15
- ):
16
- """Helper function to assert the properties of the SandboxConfig object."""
17
- assert isinstance(config, SandboxConfig)
18
- assert config.base_container_image == base_container_image
19
- assert config.runtime_container_image == runtime_container_image
20
- assert config.enable_auto_lint is False
21
- assert config.use_host_network is False
22
- assert config.timeout == 300
23
- assert config.local_runtime_url == local_runtime_url
24
-
25
-
26
- def test_setup_sandbox_config_default():
27
- """Test default configuration when no images provided and not experimental"""
28
- with mock.patch('openhands.__version__', 'mock'):
29
- openhands_config = OpenHandsConfig()
30
-
31
- IssueResolver.update_sandbox_config(
32
- openhands_config=openhands_config,
33
- base_container_image=None,
34
- runtime_container_image=None,
35
- is_experimental=False,
36
- )
37
-
38
- assert_sandbox_config(
39
- openhands_config.sandbox, runtime_container_image='ghcr.io/all-hands-ai/runtime:mock-nikolaik'
40
- )
41
-
42
-
43
- def test_setup_sandbox_config_both_images():
44
- """Test that providing both container images raises ValueError"""
45
- with pytest.raises(
46
- ValueError, match='Cannot provide both runtime and base container images.'
47
- ):
48
- openhands_config = OpenHandsConfig()
49
-
50
- IssueResolver.update_sandbox_config(
51
- openhands_config=openhands_config,
52
- base_container_image='base-image',
53
- runtime_container_image='runtime-image',
54
- is_experimental=False,
55
- )
56
-
57
-
58
- def test_setup_sandbox_config_base_only():
59
- """Test configuration when only base_container_image is provided"""
60
- base_image = 'custom-base-image'
61
- openhands_config = OpenHandsConfig()
62
-
63
- IssueResolver.update_sandbox_config(
64
- openhands_config=openhands_config,
65
- base_container_image=base_image,
66
- runtime_container_image=None,
67
- is_experimental=False,
68
- )
69
-
70
- assert_sandbox_config(
71
- openhands_config.sandbox, base_container_image=base_image, runtime_container_image=None
72
- )
73
-
74
-
75
- def test_setup_sandbox_config_runtime_only():
76
- """Test configuration when only runtime_container_image is provided"""
77
- runtime_image = 'custom-runtime-image'
78
- openhands_config = OpenHandsConfig()
79
-
80
- IssueResolver.update_sandbox_config(
81
- openhands_config=openhands_config,
82
- base_container_image=None,
83
- runtime_container_image=runtime_image,
84
- is_experimental=False,
85
- )
86
-
87
- assert_sandbox_config(openhands_config.sandbox, runtime_container_image=runtime_image)
88
-
89
-
90
- def test_setup_sandbox_config_experimental():
91
- """Test configuration when experimental mode is enabled"""
92
- with mock.patch('openhands.__version__', 'mock'):
93
- openhands_config = OpenHandsConfig()
94
-
95
- IssueResolver.update_sandbox_config(
96
- openhands_config=openhands_config,
97
- base_container_image=None,
98
- runtime_container_image=None,
99
- is_experimental=True,
100
- )
101
-
102
- assert_sandbox_config(openhands_config.sandbox, runtime_container_image=None)
103
-
104
-
105
- @mock.patch('openhands.resolver.issue_resolver.os.getuid', return_value=0)
106
- @mock.patch('openhands.resolver.issue_resolver.get_unique_uid', return_value=1001)
107
- def test_setup_sandbox_config_gitlab_ci(mock_get_unique_uid, mock_getuid):
108
- """Test GitLab CI specific configuration when running as root"""
109
- with mock.patch('openhands.__version__', 'mock'):
110
- with mock.patch.object(IssueResolver, 'GITLAB_CI', True):
111
- openhands_config = OpenHandsConfig()
112
-
113
- IssueResolver.update_sandbox_config(
114
- openhands_config=openhands_config,
115
- base_container_image=None,
116
- runtime_container_image=None,
117
- is_experimental=False,
118
- )
119
-
120
- assert_sandbox_config(openhands_config.sandbox, local_runtime_url='http://localhost')
121
-
122
-
123
- @mock.patch('openhands.resolver.issue_resolver.os.getuid', return_value=1000)
124
- def test_setup_sandbox_config_gitlab_ci_non_root(mock_getuid):
125
- """Test GitLab CI configuration when not running as root"""
126
- with mock.patch('openhands.__version__', 'mock'):
127
- with mock.patch.object(IssueResolver, 'GITLAB_CI', True):
128
- openhands_config = OpenHandsConfig()
129
-
130
- IssueResolver.update_sandbox_config(
131
- openhands_config=openhands_config,
132
- base_container_image=None,
133
- runtime_container_image=None,
134
- is_experimental=False,
135
- )
136
-
137
- assert_sandbox_config(openhands_config.sandbox, local_runtime_url='http://localhost')
138
-
139
-
140
- @mock.patch('openhands.events.observation.CmdOutputObservation')
141
- @mock.patch('openhands.runtime.base.Runtime')
142
- def test_initialize_runtime_runs_setup_script_and_git_hooks(
143
- mock_runtime, mock_cmd_output
144
- ):
145
- """Test that initialize_runtime calls maybe_run_setup_script and maybe_setup_git_hooks"""
146
-
147
- # Create a minimal resolver instance with just the methods we need
148
- class MinimalResolver:
149
- def initialize_runtime(self, runtime):
150
- # This is the method we're testing
151
- action = CmdRunAction(command='git config --global core.pager ""')
152
- runtime.run_action(action)
153
-
154
- # Run setup script if it exists
155
- runtime.maybe_run_setup_script()
156
-
157
- # Setup git hooks if they exist
158
- runtime.maybe_setup_git_hooks()
159
-
160
- resolver = MinimalResolver()
161
-
162
- # Mock the runtime's run_action method to return a successful CmdOutputObservation
163
- mock_cmd_output.return_value.exit_code = 0
164
- mock_runtime.run_action.return_value = mock_cmd_output.return_value
165
-
166
- # Call the method
167
- resolver.initialize_runtime(mock_runtime)
168
-
169
- # Verify that both methods were called
170
- mock_runtime.maybe_run_setup_script.assert_called_once()
171
- mock_runtime.maybe_setup_git_hooks.assert_called_once()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/unit/test_acompletion.py DELETED
@@ -1,196 +0,0 @@
1
- import asyncio
2
- from contextlib import contextmanager
3
- from unittest.mock import AsyncMock, MagicMock, patch
4
-
5
- import pytest
6
-
7
- from openhands.core.config import load_openhands_config
8
- from openhands.core.exceptions import UserCancelledError
9
- from openhands.llm.async_llm import AsyncLLM
10
- from openhands.llm.llm import LLM
11
- from openhands.llm.streaming_llm import StreamingLLM
12
-
13
- config = load_openhands_config()
14
-
15
-
16
- @pytest.fixture
17
- def test_llm():
18
- return _get_llm(LLM)
19
-
20
-
21
- def _get_llm(type_: type[LLM]):
22
- with _patch_http():
23
- return type_(config=config.get_llm_config())
24
-
25
-
26
- @pytest.fixture
27
- def mock_response():
28
- return [
29
- {'choices': [{'delta': {'content': 'This is a'}}]},
30
- {'choices': [{'delta': {'content': ' test'}}]},
31
- {'choices': [{'delta': {'content': ' message.'}}]},
32
- {'choices': [{'delta': {'content': ' It is'}}]},
33
- {'choices': [{'delta': {'content': ' a bit'}}]},
34
- {'choices': [{'delta': {'content': ' longer'}}]},
35
- {'choices': [{'delta': {'content': ' than'}}]},
36
- {'choices': [{'delta': {'content': ' the'}}]},
37
- {'choices': [{'delta': {'content': ' previous'}}]},
38
- {'choices': [{'delta': {'content': ' one,'}}]},
39
- {'choices': [{'delta': {'content': ' but'}}]},
40
- {'choices': [{'delta': {'content': ' hopefully'}}]},
41
- {'choices': [{'delta': {'content': ' still'}}]},
42
- {'choices': [{'delta': {'content': ' short'}}]},
43
- {'choices': [{'delta': {'content': ' enough.'}}]},
44
- ]
45
-
46
-
47
- @contextmanager
48
- def _patch_http():
49
- with patch('openhands.llm.llm.httpx.get', MagicMock()) as mock_http:
50
- mock_http.json.return_value = {
51
- 'data': [
52
- {'model_name': 'some_model'},
53
- {'model_name': 'another_model'},
54
- ]
55
- }
56
- yield
57
-
58
-
59
- @pytest.mark.asyncio
60
- async def test_acompletion_non_streaming():
61
- with patch.object(AsyncLLM, '_call_acompletion') as mock_call_acompletion:
62
- mock_response = {
63
- 'choices': [{'message': {'content': 'This is a test message.'}}]
64
- }
65
- mock_call_acompletion.return_value = mock_response
66
- test_llm = _get_llm(AsyncLLM)
67
- response = await test_llm.async_completion(
68
- messages=[{'role': 'user', 'content': 'Hello!'}],
69
- stream=False,
70
- drop_params=True,
71
- )
72
- # Assertions for non-streaming completion
73
- assert response['choices'][0]['message']['content'] != ''
74
-
75
-
76
- @pytest.mark.asyncio
77
- async def test_acompletion_streaming(mock_response):
78
- with patch.object(StreamingLLM, '_call_acompletion') as mock_call_acompletion:
79
- mock_call_acompletion.return_value.__aiter__.return_value = iter(mock_response)
80
- test_llm = _get_llm(StreamingLLM)
81
- async for chunk in test_llm.async_streaming_completion(
82
- messages=[{'role': 'user', 'content': 'Hello!'}], stream=True
83
- ):
84
- print(f'Chunk: {chunk["choices"][0]["delta"]["content"]}')
85
- # Assertions for streaming completion
86
- assert chunk['choices'][0]['delta']['content'] in [
87
- r['choices'][0]['delta']['content'] for r in mock_response
88
- ]
89
-
90
-
91
- @pytest.mark.asyncio
92
- async def test_completion(test_llm):
93
- with patch.object(LLM, 'completion') as mock_completion:
94
- mock_completion.return_value = {
95
- 'choices': [{'message': {'content': 'This is a test message.'}}]
96
- }
97
- response = test_llm.completion(messages=[{'role': 'user', 'content': 'Hello!'}])
98
- assert response['choices'][0]['message']['content'] == 'This is a test message.'
99
-
100
-
101
- @pytest.mark.asyncio
102
- @pytest.mark.parametrize('cancel_delay', [0.1, 0.3, 0.5, 0.7, 0.9])
103
- async def test_async_completion_with_user_cancellation(cancel_delay):
104
- cancel_event = asyncio.Event()
105
-
106
- async def mock_on_cancel_requested():
107
- is_set = cancel_event.is_set()
108
- print(f'Cancel requested: {is_set}')
109
- return is_set
110
-
111
- async def mock_acompletion(*args, **kwargs):
112
- print('Starting mock_acompletion')
113
- for i in range(20): # Increased iterations for longer running task
114
- print(f'mock_acompletion iteration {i}')
115
- await asyncio.sleep(0.1)
116
- if await mock_on_cancel_requested():
117
- print('Cancellation detected in mock_acompletion')
118
- raise UserCancelledError('LLM request cancelled by user')
119
- print('Completing mock_acompletion without cancellation')
120
- return {'choices': [{'message': {'content': 'This is a test message.'}}]}
121
-
122
- with patch.object(
123
- AsyncLLM, '_call_acompletion', new_callable=AsyncMock
124
- ) as mock_call_acompletion:
125
- mock_call_acompletion.side_effect = mock_acompletion
126
- test_llm = _get_llm(AsyncLLM)
127
-
128
- async def cancel_after_delay():
129
- print(f'Starting cancel_after_delay with delay {cancel_delay}')
130
- await asyncio.sleep(cancel_delay)
131
- print('Setting cancel event')
132
- cancel_event.set()
133
-
134
- with pytest.raises(UserCancelledError):
135
- await asyncio.gather(
136
- test_llm.async_completion(
137
- messages=[{'role': 'user', 'content': 'Hello!'}],
138
- stream=False,
139
- ),
140
- cancel_after_delay(),
141
- )
142
-
143
- # Ensure the mock was called
144
- mock_call_acompletion.assert_called_once()
145
-
146
-
147
- @pytest.mark.asyncio
148
- @pytest.mark.parametrize('cancel_after_chunks', [1, 3, 5, 7, 9])
149
- async def test_async_streaming_completion_with_user_cancellation(cancel_after_chunks):
150
- cancel_requested = False
151
-
152
- test_messages = [
153
- 'This is ',
154
- 'a test ',
155
- 'message ',
156
- 'with ',
157
- 'multiple ',
158
- 'chunks ',
159
- 'to ',
160
- 'simulate ',
161
- 'a ',
162
- 'longer ',
163
- 'streaming ',
164
- 'response.',
165
- ]
166
-
167
- async def mock_acompletion(*args, **kwargs):
168
- for i, content in enumerate(test_messages):
169
- yield {'choices': [{'delta': {'content': content}}]}
170
- if i + 1 == cancel_after_chunks:
171
- nonlocal cancel_requested
172
- cancel_requested = True
173
- if cancel_requested:
174
- raise UserCancelledError('LLM request cancelled by user')
175
- await asyncio.sleep(0.05) # Simulate some delay between chunks
176
-
177
- with patch.object(
178
- AsyncLLM, '_call_acompletion', new_callable=AsyncMock
179
- ) as mock_call_acompletion:
180
- mock_call_acompletion.return_value = mock_acompletion()
181
- test_llm = _get_llm(StreamingLLM)
182
-
183
- received_chunks = []
184
- with pytest.raises(UserCancelledError):
185
- async for chunk in test_llm.async_streaming_completion(
186
- messages=[{'role': 'user', 'content': 'Hello!'}], stream=True
187
- ):
188
- received_chunks.append(chunk['choices'][0]['delta']['content'])
189
- print(f'Chunk: {chunk["choices"][0]["delta"]["content"]}')
190
-
191
- # Assert that we received the expected number of chunks before cancellation
192
- assert len(received_chunks) == cancel_after_chunks
193
- assert received_chunks == test_messages[:cancel_after_chunks]
194
-
195
- # Ensure the mock was called
196
- mock_call_acompletion.assert_called_once()