Spaces:

Backup-bdg
/

OpenHands

Build error

App Files Files Community

Backup-bdg commited on Jun 9

Commit

680c11c

verified ·

1 Parent(s): 9678537

Delete tests

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

tests/runtime/README.md +0 -60
tests/runtime/conftest.py +0 -300
tests/runtime/test_aci_edit.py +0 -733
tests/runtime/test_bash.py +0 -1462
tests/runtime/test_browsergym_envs.py +0 -73
tests/runtime/test_browsing.py +0 -213
tests/runtime/test_docker_images.py +0 -96
tests/runtime/test_env_vars.py +0 -120
tests/runtime/test_glob_and_grep.py +0 -303
tests/runtime/test_ipython.py +0 -382
tests/runtime/test_llm_based_edit.py +0 -413
tests/runtime/test_mcp_action.py +0 -362
tests/runtime/test_microagent.py +0 -443
tests/runtime/test_replay.py +0 -161
tests/runtime/test_runtime_resource.py +0 -115
tests/runtime/test_setup.py +0 -84
tests/runtime/test_stress_remote_runtime.py +0 -483
tests/runtime/trajs/basic.json +0 -202
tests/runtime/trajs/basic_gui_mode.json +0 -631
tests/runtime/trajs/basic_interactions.json +0 -128
tests/runtime/trajs/wrong_initial_state.json +0 -454
tests/runtime/utils/test_system_stats.py +0 -60
tests/test_fileops.py +0 -66
tests/unit/README.md +0 -29
tests/unit/core/config/test_config_utils.py +0 -168
tests/unit/frontend/test_translation_completeness.py +0 -33
tests/unit/resolver/github/test_guess_success.py +0 -202
tests/unit/resolver/github/test_issue_handler.py +0 -645
tests/unit/resolver/github/test_issue_handler_error_handling.py +0 -281
tests/unit/resolver/github/test_pr_handler_guess_success.py +0 -672
tests/unit/resolver/github/test_pr_title_escaping.py +0 -166
tests/unit/resolver/github/test_resolve_issues.py +0 -1035
tests/unit/resolver/github/test_send_pull_request.py +0 -1304
tests/unit/resolver/gitlab/test_gitlab_guess_success.py +0 -202
tests/unit/resolver/gitlab/test_gitlab_issue_handler.py +0 -683
tests/unit/resolver/gitlab/test_gitlab_issue_handler_error_handling.py +0 -283
tests/unit/resolver/gitlab/test_gitlab_pr_handler_guess_success.py +0 -672
tests/unit/resolver/gitlab/test_gitlab_pr_title_escaping.py +0 -166
tests/unit/resolver/gitlab/test_gitlab_resolve_issues.py +0 -1000
tests/unit/resolver/gitlab/test_gitlab_send_pull_request.py +0 -1206
tests/unit/resolver/mock_output/output.jsonl +0 -0
tests/unit/resolver/mock_output/repo/src/App.css +0 -42
tests/unit/resolver/mock_output/repo/src/App.tsx +0 -14
tests/unit/resolver/mock_output/repo/src/PullRequestViewer.test.tsx +0 -19
tests/unit/resolver/mock_output/repo/src/PullRequestViewer.tsx +0 -112
tests/unit/resolver/test_issue_handler_factory.py +0 -77
tests/unit/resolver/test_issue_references.py +0 -56
tests/unit/resolver/test_patch_apply.py +0 -47
tests/unit/resolver/test_resolve_issue.py +0 -171
tests/unit/test_acompletion.py +0 -196

tests/runtime/README.md DELETED Viewed

@@ -1,60 +0,0 @@
-## Runtime Tests
-This folder contains integration tests that verify the functionality of OpenHands' runtime environments and their interactions with various tools and features.
-### What are Runtime Tests?
-Runtime tests focus on testing:
-- Tool interactions within a runtime environment (bash commands, browsing, file operations)
-- Environment setup and configuration
-- Resource management and cleanup
-- Browser-based operations and file viewing capabilities
-- IPython/Jupyter integration
-- Environment variables and configuration handling
-The tests can be run against different runtime environments (Docker, Local, Remote, Runloop, or Daytona) by setting the TEST_RUNTIME environment variable. By default, tests run using the Docker runtime.
-### How are they different from Unit Tests?
-While unit tests in `tests/unit/` focus on testing individual components in isolation, runtime tests verify:
-1. Integration between components
-2. Actual execution of commands in different runtime environments
-3. System-level interactions (file system, network, browser)
-4. Environment setup and teardown
-5. Tool functionality in real runtime contexts
-### Running the Tests
-Run all runtime tests:
-```bash
-poetry run pytest ./tests/runtime
-```
-Run specific test file:
-```bash
-poetry run pytest ./tests/runtime/test_bash.py
-```
-Run specific test:
-```bash
-poetry run pytest ./tests/runtime/test_bash.py::test_bash_command_env
-```
-For verbose output, add the `-v` flag (more verbose: `-vv` and `-vvv`):
-```bash
-poetry run pytest -v ./tests/runtime/test_bash.py
-```
-### Environment Variables
-The runtime tests can be configured using environment variables:
-- `TEST_IN_CI`: Set to 'True' when running in CI environment
-- `TEST_RUNTIME`: Specify the runtime to test ('docker', 'local', 'remote', 'runloop', 'daytona')
-- `RUN_AS_OPENHANDS`: Set to 'True' to run tests as openhands user (default), 'False' for root
-- `SANDBOX_BASE_CONTAINER_IMAGE`: Specify a custom base container image for Docker runtime
-For more details on pytest usage, see the [pytest documentation](https://docs.pytest.org/en/latest/contents.html).

tests/runtime/conftest.py DELETED Viewed

@@ -1,300 +0,0 @@
-import os
-import random
-import shutil
-import stat
-import time
-import pytest
-from pytest import TempPathFactory
-from openhands.core.config import MCPConfig, OpenHandsConfig, load_openhands_config
-from openhands.core.logger import openhands_logger as logger
-from openhands.events import EventStream
-from openhands.runtime.base import Runtime
-from openhands.runtime.impl.cli.cli_runtime import CLIRuntime
-from openhands.runtime.impl.daytona.daytona_runtime import DaytonaRuntime
-from openhands.runtime.impl.docker.docker_runtime import DockerRuntime
-from openhands.runtime.impl.local.local_runtime import LocalRuntime
-from openhands.runtime.impl.remote.remote_runtime import RemoteRuntime
-from openhands.runtime.impl.runloop.runloop_runtime import RunloopRuntime
-from openhands.runtime.plugins import AgentSkillsRequirement, JupyterRequirement
-from openhands.storage import get_file_store
-from openhands.utils.async_utils import call_async_from_sync
-TEST_IN_CI = os.getenv('TEST_IN_CI', 'False').lower() in ['true', '1', 'yes']
-TEST_RUNTIME = os.getenv('TEST_RUNTIME', 'docker').lower()
-RUN_AS_OPENHANDS = os.getenv('RUN_AS_OPENHANDS', 'True').lower() in ['true', '1', 'yes']
-test_mount_path = ''
-project_dir = os.path.dirname(
-    os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-)
-sandbox_test_folder = '/workspace'
-def _get_runtime_sid(runtime: Runtime) -> str:
-    logger.debug(f'\nruntime.sid: {runtime.sid}')
-    return runtime.sid
-def _get_host_folder(runtime: Runtime) -> str:
-    return runtime.config.workspace_mount_path
-def _remove_folder(folder: str) -> bool:
-    success = False
-    if folder and os.path.isdir(folder):
-        try:
-            os.rmdir(folder)
-            success = True
-        except OSError:
-            try:
-                shutil.rmtree(folder)
-                success = True
-            except OSError:
-                pass
-        logger.debug(f'\nCleanup: `{folder}`: ' + ('[OK]' if success else '[FAILED]'))
-    return success
-def _close_test_runtime(runtime: Runtime) -> None:
-    if isinstance(runtime, DockerRuntime):
-        runtime.close(rm_all_containers=False)
-    else:
-        runtime.close()
-    time.sleep(1)
-def _reset_cwd() -> None:
-    global project_dir
-    # Try to change back to project directory
-    try:
-        os.chdir(project_dir)
-        logger.info(f'Changed back to project directory `{project_dir}')
-    except Exception as e:
-        logger.error(f'Failed to change back to project directory: {e}')
-# *****************************************************************************
-# *****************************************************************************
-@pytest.fixture(autouse=True)
-def print_method_name(request):
-    print(
-        '\n\n########################################################################'
-    )
-    print(f'Running test: {request.node.name}')
-    print(
-        '########################################################################\n\n'
-    )
-@pytest.fixture
-def temp_dir(tmp_path_factory: TempPathFactory, request) -> str:
-    """Creates a unique temporary directory.
-    Upon finalization, the temporary directory and its content is removed.
-    The cleanup function is also called upon KeyboardInterrupt.
-    Parameters:
-    - tmp_path_factory (TempPathFactory): A TempPathFactory class
-    Returns:
-    - str: The temporary directory path that was created
-    """
-    temp_dir = tmp_path_factory.mktemp(
-        'rt_' + str(random.randint(100000, 999999)), numbered=False
-    )
-    logger.info(f'\n*** {request.node.name}\n>> temp folder: {temp_dir}\n')
-    # Set permissions to ensure the directory is writable and deletable
-    os.chmod(temp_dir, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)  # 0777 permissions
-    def cleanup():
-        global project_dir
-        os.chdir(project_dir)
-        _remove_folder(temp_dir)
-    request.addfinalizer(cleanup)
-    return str(temp_dir)
-# Depending on TEST_RUNTIME, feed the appropriate box class(es) to the test.
-def get_runtime_classes() -> list[type[Runtime]]:
-    runtime = TEST_RUNTIME
-    if runtime.lower() == 'docker' or runtime.lower() == 'eventstream':
-        return [DockerRuntime]
-    elif runtime.lower() == 'local':
-        return [LocalRuntime]
-    elif runtime.lower() == 'remote':
-        return [RemoteRuntime]
-    elif runtime.lower() == 'runloop':
-        return [RunloopRuntime]
-    elif runtime.lower() == 'daytona':
-        return [DaytonaRuntime]
-    elif runtime.lower() == 'cli':
-        return [CLIRuntime]
-    else:
-        raise ValueError(f'Invalid runtime: {runtime}')
-def get_run_as_openhands() -> list[bool]:
-    print(
-        '\n\n########################################################################'
-    )
-    print('USER: ' + 'openhands' if RUN_AS_OPENHANDS else 'root')
-    print(
-        '########################################################################\n\n'
-    )
-    return [RUN_AS_OPENHANDS]
-@pytest.fixture(scope='module')  # for xdist
-def runtime_setup_module():
-    _reset_cwd()
-    yield
-    _reset_cwd()
-@pytest.fixture(scope='session')  # not for xdist
-def runtime_setup_session():
-    _reset_cwd()
-    yield
-    _reset_cwd()
-# This assures that all tests run together per runtime, not alternating between them,
-# which cause errors (especially outside GitHub actions).
-@pytest.fixture(scope='module', params=get_runtime_classes())
-def runtime_cls(request):
-    time.sleep(1)
-    return request.param
-# TODO: We will change this to `run_as_user` when `ServerRuntime` is deprecated.
-# since `DockerRuntime` supports running as an arbitrary user.
-@pytest.fixture(scope='module', params=get_run_as_openhands())
-def run_as_openhands(request):
-    time.sleep(1)
-    return request.param
-@pytest.fixture(scope='module', params=None)
-def base_container_image(request):
-    time.sleep(1)
-    env_image = os.environ.get('SANDBOX_BASE_CONTAINER_IMAGE')
-    if env_image:
-        request.param = env_image
-    else:
-        if not hasattr(request, 'param'):  # prevent runtime AttributeError
-            request.param = None
-        if request.param is None and hasattr(request.config, 'sandbox'):
-            try:
-                request.param = request.config.sandbox.getoption(
-                    '--base_container_image'
-                )
-            except ValueError:
-                request.param = None
-        if request.param is None:
-            request.param = pytest.param(
-                'nikolaik/python-nodejs:python3.12-nodejs22',
-                'golang:1.23-bookworm',
-            )
-    print(f'Container image: {request.param}')
-    return request.param
-def _load_runtime(
-    temp_dir,
-    runtime_cls,
-    run_as_openhands: bool = True,
-    enable_auto_lint: bool = False,
-    base_container_image: str | None = None,
-    browsergym_eval_env: str | None = None,
-    use_workspace: bool | None = None,
-    force_rebuild_runtime: bool = False,
-    runtime_startup_env_vars: dict[str, str] | None = None,
-    docker_runtime_kwargs: dict[str, str] | None = None,
-    override_mcp_config: MCPConfig | None = None,
-) -> tuple[Runtime, OpenHandsConfig]:
-    sid = 'rt_' + str(random.randint(100000, 999999))
-    # AgentSkills need to be initialized **before** Jupyter
-    # otherwise Jupyter will not access the proper dependencies installed by AgentSkills
-    plugins = [AgentSkillsRequirement(), JupyterRequirement()]
-    config = load_openhands_config()
-    config.run_as_openhands = run_as_openhands
-    config.sandbox.force_rebuild_runtime = force_rebuild_runtime
-    config.sandbox.keep_runtime_alive = False
-    config.sandbox.docker_runtime_kwargs = docker_runtime_kwargs
-    # Folder where all tests create their own folder
-    global test_mount_path
-    if use_workspace:
-        test_mount_path = os.path.join(config.workspace_base, 'rt')
-    elif temp_dir is not None:
-        test_mount_path = temp_dir
-    else:
-        test_mount_path = None
-    config.workspace_base = test_mount_path
-    config.workspace_mount_path = test_mount_path
-    # Mounting folder specific for this test inside the sandbox
-    config.workspace_mount_path_in_sandbox = f'{sandbox_test_folder}'
-    print('\nPaths used:')
-    print(f'use_host_network: {config.sandbox.use_host_network}')
-    print(f'workspace_base: {config.workspace_base}')
-    print(f'workspace_mount_path: {config.workspace_mount_path}')
-    print(
-        f'workspace_mount_path_in_sandbox: {config.workspace_mount_path_in_sandbox}\n'
-    )
-    config.sandbox.browsergym_eval_env = browsergym_eval_env
-    config.sandbox.enable_auto_lint = enable_auto_lint
-    if runtime_startup_env_vars is not None:
-        config.sandbox.runtime_startup_env_vars = runtime_startup_env_vars
-    if base_container_image is not None:
-        config.sandbox.base_container_image = base_container_image
-        config.sandbox.runtime_container_image = None
-    if override_mcp_config is not None:
-        config.mcp = override_mcp_config
-    file_store = file_store = get_file_store(
-        config.file_store,
-        config.file_store_path,
-        config.file_store_web_hook_url,
-        config.file_store_web_hook_headers,
-    )
-    event_stream = EventStream(sid, file_store)
-    runtime = runtime_cls(
-        config=config,
-        event_stream=event_stream,
-        sid=sid,
-        plugins=plugins,
-    )
-    # For CLIRuntime, the tests' assertions should be based on the physical workspace path,
-    # not the logical "/workspace". So, we adjust config.workspace_mount_path_in_sandbox
-    # to reflect the actual physical path used by CLIRuntime's OHEditor.
-    if isinstance(runtime, CLIRuntime):
-        config.workspace_mount_path_in_sandbox = str(runtime.workspace_root)
-        logger.info(
-            f'Adjusted workspace_mount_path_in_sandbox for CLIRuntime to: {config.workspace_mount_path_in_sandbox}'
-        )
-    call_async_from_sync(runtime.connect)
-    time.sleep(2)
-    return runtime, runtime.config
-# Export necessary function
-__all__ = [
-    '_load_runtime',
-    '_get_host_folder',
-    '_remove_folder',
-]

tests/runtime/test_aci_edit.py DELETED Viewed

@@ -1,733 +0,0 @@
-"""Editor-related tests for the DockerRuntime."""
-import os
-from unittest.mock import MagicMock
-from conftest import _close_test_runtime, _load_runtime
-from openhands.core.logger import openhands_logger as logger
-from openhands.events.action import FileEditAction, FileWriteAction
-from openhands.runtime.action_execution_server import _execute_file_editor
-from openhands.runtime.impl.cli.cli_runtime import CLIRuntime
-def test_view_file(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        # Create test file
-        test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
-        action = FileWriteAction(
-            content='This is a test file.\nThis file is for testing purposes.',
-            path=test_file,
-        )
-        obs = runtime.run_action(action)
-        # Test view command
-        action = FileEditAction(
-            command='view',
-            path=test_file,
-        )
-        obs = runtime.run_action(action)
-        assert f"Here's the result of running `cat -n` on {test_file}:" in obs.content
-        assert '1\tThis is a test file.' in obs.content
-        assert '2\tThis file is for testing purposes.' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_view_directory(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        # Create test file
-        test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
-        action = FileWriteAction(
-            content='This is a test file.\nThis file is for testing purposes.',
-            path=test_file,
-        )
-        obs = runtime.run_action(action)
-        # Test view command
-        action = FileEditAction(
-            command='view',
-            path=config.workspace_mount_path_in_sandbox,
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert (
-            obs.content
-            == f"""Here's the files and directories up to 2 levels deep in {config.workspace_mount_path_in_sandbox}, excluding hidden items:
-{config.workspace_mount_path_in_sandbox}/
-{config.workspace_mount_path_in_sandbox}/test.txt"""
-        )
-    finally:
-        _close_test_runtime(runtime)
-def test_create_file(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        new_file = os.path.join(config.workspace_mount_path_in_sandbox, 'new_file.txt')
-        action = FileEditAction(
-            command='create',
-            path=new_file,
-            file_text='New file content',
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert 'File created successfully' in obs.content
-        # Verify file content
-        action = FileEditAction(
-            command='view',
-            path=new_file,
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert 'New file content' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_create_file_with_empty_content(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        new_file = os.path.join(config.workspace_mount_path_in_sandbox, 'new_file.txt')
-        action = FileEditAction(
-            command='create',
-            path=new_file,
-            file_text='',
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert 'File created successfully' in obs.content
-        # Verify file content
-        action = FileEditAction(
-            command='view',
-            path=new_file,
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert '1\t' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_create_with_none_file_text(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        new_file = os.path.join(
-            config.workspace_mount_path_in_sandbox, 'none_content.txt'
-        )
-        action = FileEditAction(
-            command='create',
-            path=new_file,
-            file_text=None,
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert (
-            obs.content
-            == 'ERROR:\nParameter `file_text` is required for command: create.'
-        )
-    finally:
-        _close_test_runtime(runtime)
-def test_str_replace(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        # Create test file
-        test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
-        action = FileWriteAction(
-            content='This is a test file.\nThis file is for testing purposes.',
-            path=test_file,
-        )
-        runtime.run_action(action)
-        # Test str_replace command
-        action = FileEditAction(
-            command='str_replace',
-            path=test_file,
-            old_str='test file',
-            new_str='sample file',
-        )
-        obs = runtime.run_action(action)
-        assert f'The file {test_file} has been edited' in obs.content
-        # Verify file content
-        action = FileEditAction(
-            command='view',
-            path=test_file,
-        )
-        obs = runtime.run_action(action)
-        assert 'This is a sample file.' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_str_replace_multi_line(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
-        action = FileWriteAction(
-            content='This is a test file.\nThis file is for testing purposes.',
-            path=test_file,
-        )
-        runtime.run_action(action)
-        # Test str_replace command
-        action = FileEditAction(
-            command='str_replace',
-            path=test_file,
-            old_str='This is a test file.\nThis file is for testing purposes.',
-            new_str='This is a sample file.\nThis file is for testing purposes.',
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert f'The file {test_file} has been edited.' in obs.content
-        assert 'This is a sample file.' in obs.content
-        assert 'This file is for testing purposes.' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_str_replace_multi_line_with_tabs(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
-        action = FileEditAction(
-            command='create',
-            path=test_file,
-            file_text='def test():\n\tprint("Hello, World!")',
-        )
-        runtime.run_action(action)
-        # Test str_replace command
-        action = FileEditAction(
-            command='str_replace',
-            path=test_file,
-            old_str='def test():\n\tprint("Hello, World!")',
-            new_str='def test():\n\tprint("Hello, Universe!")',
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert (
-            obs.content
-            == f"""The file {test_file} has been edited. Here's the result of running `cat -n` on a snippet of {test_file}:
-     1\tdef test():
-     2\t\tprint("Hello, Universe!")
-Review the changes and make sure they are as expected. Edit the file again if necessary."""
-        )
-    finally:
-        _close_test_runtime(runtime)
-def test_str_replace_error_multiple_occurrences(
-    temp_dir, runtime_cls, run_as_openhands
-):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
-        action = FileWriteAction(
-            content='This is a test file.\nThis file is for testing purposes.',
-            path=test_file,
-        )
-        runtime.run_action(action)
-        action = FileEditAction(
-            command='str_replace', path=test_file, old_str='test', new_str='sample'
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert 'Multiple occurrences of old_str `test`' in obs.content
-        assert '[1, 2]' in obs.content  # Should show both line numbers
-    finally:
-        _close_test_runtime(runtime)
-def test_str_replace_error_multiple_multiline_occurrences(
-    temp_dir, runtime_cls, run_as_openhands
-):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
-        # Create a file with two identical multi-line blocks
-        multi_block = """def example():
-        print("Hello")
-        return True"""
-        content = f"{multi_block}\n\nprint('separator')\n\n{multi_block}"
-        action = FileWriteAction(
-            content=content,
-            path=test_file,
-        )
-        runtime.run_action(action)
-        # Test str_replace command
-        action = FileEditAction(
-            command='str_replace',
-            path=test_file,
-            old_str=multi_block,
-            new_str='def new():\n    print("World")',
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert 'Multiple occurrences of old_str' in obs.content
-        assert '[1, 7]' in obs.content  # Should show correct starting line numbers
-    finally:
-        _close_test_runtime(runtime)
-def test_str_replace_nonexistent_string(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
-        action = FileWriteAction(
-            content='Line 1\nLine 2',
-            path=test_file,
-        )
-        runtime.run_action(action)
-        action = FileEditAction(
-            command='str_replace',
-            path=test_file,
-            old_str='Non-existent Line',
-            new_str='New Line',
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert 'No replacement was performed' in obs.content
-        assert (
-            f'old_str `Non-existent Line` did not appear verbatim in {test_file}'
-            in obs.content
-        )
-    finally:
-        _close_test_runtime(runtime)
-def test_str_replace_with_empty_new_str(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
-        action = FileWriteAction(
-            content='Line 1\nLine to remove\nLine 3',
-            path=test_file,
-        )
-        runtime.run_action(action)
-        action = FileEditAction(
-            command='str_replace',
-            path=test_file,
-            old_str='Line to remove\n',
-            new_str='',
-        )
-        obs = runtime.run_action(action)
-        assert 'Line to remove' not in obs.content
-        assert 'Line 1' in obs.content
-        assert 'Line 3' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_str_replace_with_empty_old_str(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
-        action = FileWriteAction(
-            content='Line 1\nLine 2\nLine 3',
-            path=test_file,
-        )
-        runtime.run_action(action)
-        action = FileEditAction(
-            command='str_replace',
-            path=test_file,
-            old_str='',
-            new_str='New string',
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        if isinstance(runtime, CLIRuntime):
-            # CLIRuntime with a 3-line file without a trailing newline reports 3 occurrences for an empty old_str
-            assert (
-                'No replacement was performed. Multiple occurrences of old_str `` in lines [1, 2, 3]. Please ensure it is unique.'
-                in obs.content
-            )
-        else:
-            # Other runtimes might behave differently (e.g., implicitly add a newline, leading to 4 matches)
-            # TODO: Why do they have 4 lines?
-            assert (
-                'No replacement was performed. Multiple occurrences of old_str `` in lines [1, 2, 3, 4]. Please ensure it is unique.'
-                in obs.content
-            )
-    finally:
-        _close_test_runtime(runtime)
-def test_str_replace_with_none_old_str(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
-        action = FileWriteAction(
-            content='Line 1\nLine 2\nLine 3',
-            path=test_file,
-        )
-        runtime.run_action(action)
-        action = FileEditAction(
-            command='str_replace',
-            path=test_file,
-            old_str=None,
-            new_str='new content',
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert 'old_str' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_insert(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        # Create test file
-        test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
-        action = FileWriteAction(
-            content='Line 1\nLine 2',
-            path=test_file,
-        )
-        runtime.run_action(action)
-        # Test insert command
-        action = FileEditAction(
-            command='insert',
-            path=test_file,
-            insert_line=1,
-            new_str='Inserted line',
-        )
-        obs = runtime.run_action(action)
-        assert f'The file {test_file} has been edited' in obs.content
-        # Verify file content
-        action = FileEditAction(
-            command='view',
-            path=test_file,
-        )
-        obs = runtime.run_action(action)
-        assert 'Line 1' in obs.content
-        assert 'Inserted line' in obs.content
-        assert 'Line 2' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_insert_invalid_line(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
-        action = FileWriteAction(
-            content='Line 1\nLine 2',
-            path=test_file,
-        )
-        runtime.run_action(action)
-        action = FileEditAction(
-            command='insert',
-            path=test_file,
-            insert_line=10,
-            new_str='Invalid Insert',
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert 'Invalid `insert_line` parameter' in obs.content
-        assert 'It should be within the range of allowed values' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_insert_with_empty_string(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
-        action = FileWriteAction(
-            content='Line 1\nLine 2',
-            path=test_file,
-        )
-        runtime.run_action(action)
-        action = FileEditAction(
-            command='insert',
-            path=test_file,
-            insert_line=1,
-            new_str='',
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert '1\tLine 1' in obs.content
-        assert '2\t\n' in obs.content
-        assert '3\tLine 2' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_insert_with_none_new_str(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
-        action = FileWriteAction(
-            content='Line 1\nLine 2',
-            path=test_file,
-        )
-        runtime.run_action(action)
-        action = FileEditAction(
-            command='insert',
-            path=test_file,
-            insert_line=1,
-            new_str=None,
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert 'ERROR' in obs.content
-        assert 'Parameter `new_str` is required for command: insert' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_undo_edit(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        # Create test file
-        test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
-        action = FileWriteAction(
-            content='This is a test file.',
-            path=test_file,
-        )
-        runtime.run_action(action)
-        # Make an edit
-        action = FileEditAction(
-            command='str_replace',
-            path=test_file,
-            old_str='test',
-            new_str='sample',
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert 'This is a sample file.' in obs.content
-        # Undo the edit
-        action = FileEditAction(
-            command='undo_edit',
-            path=test_file,
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert 'Last edit to' in obs.content
-        assert 'This is a test file.' in obs.content
-        # Verify file content
-        action = FileEditAction(
-            command='view',
-            path=test_file,
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert 'This is a test file.' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_validate_path_invalid(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        invalid_file = os.path.join(
-            config.workspace_mount_path_in_sandbox, 'nonexistent.txt'
-        )
-        action = FileEditAction(
-            command='view',
-            path=invalid_file,
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert 'Invalid `path` parameter' in obs.content
-        assert f'The path {invalid_file} does not exist' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_create_existing_file_error(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
-        action = FileWriteAction(
-            content='Line 1\nLine 2',
-            path=test_file,
-        )
-        runtime.run_action(action)
-        action = FileEditAction(
-            command='create',
-            path=test_file,
-            file_text='New content',
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert 'File already exists' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_str_replace_missing_old_str(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
-        action = FileWriteAction(
-            content='Line 1\nLine 2',
-            path=test_file,
-        )
-        runtime.run_action(action)
-        action = FileEditAction(
-            command='str_replace',
-            path=test_file,
-            old_str='',
-            new_str='sample',
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert (
-            'No replacement was performed. Multiple occurrences of old_str ``'
-            in obs.content
-        )
-    finally:
-        _close_test_runtime(runtime)
-def test_str_replace_new_str_and_old_str_same(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
-        action = FileWriteAction(
-            content='Line 1\nLine 2',
-            path=test_file,
-        )
-        runtime.run_action(action)
-        action = FileEditAction(
-            command='str_replace',
-            path=test_file,
-            old_str='test file',
-            new_str='test file',
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert (
-            'No replacement was performed. `new_str` and `old_str` must be different.'
-            in obs.content
-        )
-    finally:
-        _close_test_runtime(runtime)
-def test_insert_missing_line_param(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
-        action = FileWriteAction(
-            content='Line 1\nLine 2',
-            path=test_file,
-        )
-        runtime.run_action(action)
-        action = FileEditAction(
-            command='insert',
-            path=test_file,
-            new_str='Missing insert line',
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert 'Parameter `insert_line` is required for command: insert' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_undo_edit_no_history_error(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        empty_file = os.path.join(config.workspace_mount_path_in_sandbox, 'empty.txt')
-        action = FileWriteAction(
-            content='',
-            path=empty_file,
-        )
-        runtime.run_action(action)
-        action = FileEditAction(
-            command='undo_edit',
-            path=empty_file,
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert 'No edit history found for' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_view_large_file_with_truncation(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        # Create a large file to trigger truncation
-        large_file = os.path.join(
-            config.workspace_mount_path_in_sandbox, 'large_test.txt'
-        )
-        large_content = 'Line 1\n' * 16000  # 16000 lines should trigger truncation
-        action = FileWriteAction(
-            content=large_content,
-            path=large_file,
-        )
-        runtime.run_action(action)
-        action = FileEditAction(
-            command='view',
-            path=large_file,
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert (
-            'Due to the max output limit, only part of this file has been shown to you.'
-            in obs.content
-        )
-    finally:
-        _close_test_runtime(runtime)
-def test_insert_line_string_conversion():
-    """Test that insert_line is properly converted from string to int.
-    This test reproduces issue #8369 Example 2 where a string value for insert_line
-    causes a TypeError in the editor.
-    """
-    # Mock the OHEditor
-    mock_editor = MagicMock()
-    mock_editor.return_value = MagicMock(
-        error=None, output='Success', old_content=None, new_content=None
-    )
-    # Test with string insert_line
-    result, _ = _execute_file_editor(
-        editor=mock_editor,
-        command='insert',
-        path='/test/path.py',
-        insert_line='185',  # String instead of int
-        new_str='test content',
-    )
-    # Verify the editor was called with the correct parameters (insert_line converted to int)
-    mock_editor.assert_called_once()
-    args, kwargs = mock_editor.call_args
-    assert isinstance(kwargs['insert_line'], int)
-    assert kwargs['insert_line'] == 185
-    assert result == 'Success'

tests/runtime/test_bash.py DELETED Viewed

@@ -1,1462 +0,0 @@
-"""Bash-related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
-import os
-import sys
-import time
-from pathlib import Path
-import pytest
-from conftest import (
-    _close_test_runtime,
-    _load_runtime,
-)
-from openhands.core.logger import openhands_logger as logger
-from openhands.events.action import CmdRunAction
-from openhands.events.observation import CmdOutputObservation, ErrorObservation
-from openhands.runtime.impl.cli.cli_runtime import CLIRuntime
-from openhands.runtime.impl.local.local_runtime import LocalRuntime
-from openhands.runtime.utils.bash_constants import TIMEOUT_MESSAGE_TEMPLATE
-def get_timeout_suffix(timeout_seconds):
-    """Helper function to generate the expected timeout suffix."""
-    return (
-        f'[The command timed out after {timeout_seconds} seconds. '
-        f'{TIMEOUT_MESSAGE_TEMPLATE}]'
-    )
-# ============================================================================================================================
-# Bash-specific tests
-# ============================================================================================================================
-# Helper function to determine if running on Windows
-def is_windows():
-    return sys.platform == 'win32'
-def _run_cmd_action(runtime, custom_command: str):
-    action = CmdRunAction(command=custom_command)
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    assert isinstance(obs, (CmdOutputObservation, ErrorObservation))
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    return obs
-# Get platform-appropriate command
-def get_platform_command(linux_cmd, windows_cmd):
-    return windows_cmd if is_windows() else linux_cmd
-def test_bash_server(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        # Use python -u for unbuffered output, potentially helping capture initial output on Windows
-        action = CmdRunAction(command='python -u -m http.server 8081')
-        action.set_hard_timeout(1)
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert isinstance(obs, CmdOutputObservation)
-        assert obs.exit_code == -1
-        assert 'Serving HTTP on' in obs.content
-        if runtime_cls == CLIRuntime:
-            assert '[The command timed out after 1.0 seconds.]' in obs.metadata.suffix
-        else:
-            assert get_timeout_suffix(1.0) in obs.metadata.suffix
-        action = CmdRunAction(command='C-c', is_input=True)
-        action.set_hard_timeout(30)
-        obs_interrupt = runtime.run_action(action)
-        logger.info(obs_interrupt, extra={'msg_type': 'OBSERVATION'})
-        if runtime_cls == CLIRuntime:
-            assert isinstance(obs_interrupt, ErrorObservation)
-            assert (
-                "CLIRuntime does not support interactive input from the agent (e.g., 'C-c'). The command 'C-c' was not sent to any process."
-                in obs_interrupt.content
-            )
-            assert obs_interrupt.error_id == 'AGENT_ERROR$BAD_ACTION'
-        else:
-            assert isinstance(obs_interrupt, CmdOutputObservation)
-            assert obs_interrupt.exit_code == 0
-            if not is_windows():
-                # Linux/macOS behavior
-                assert 'Keyboard interrupt received, exiting.' in obs_interrupt.content
-                assert (
-                    config.workspace_mount_path_in_sandbox
-                    in obs_interrupt.metadata.working_dir
-                )
-        # Verify the server is actually stopped by trying to start another one
-        # on the same port (regardless of OS)
-        action = CmdRunAction(command='ls')
-        action.set_hard_timeout(1)
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert isinstance(obs, CmdOutputObservation)
-        assert obs.exit_code == 0
-        # Check that the interrupt message is NOT present in subsequent output
-        assert 'Keyboard interrupt received, exiting.' not in obs.content
-        # Check working directory remains correct after interrupt handling
-        if runtime_cls == CLIRuntime:
-            # For CLIRuntime, working_dir is the absolute host path
-            assert obs.metadata.working_dir == config.workspace_base
-        else:
-            # For other runtimes (e.g., Docker), it's relative to or contains the sandbox path
-            assert config.workspace_mount_path_in_sandbox in obs.metadata.working_dir
-        # run it again!
-        action = CmdRunAction(command='python -u -m http.server 8081')
-        action.set_hard_timeout(1)
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert isinstance(obs, CmdOutputObservation)
-        assert obs.exit_code == -1
-        assert 'Serving HTTP on' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_bash_background_server(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    server_port = 8081
-    try:
-        # Start the server, expect it to timeout (run in background manner)
-        action = CmdRunAction(f'python3 -m http.server {server_port} &')
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert isinstance(obs, CmdOutputObservation)
-        if runtime_cls == CLIRuntime:
-            # The '&' does not detach cleanly; the PTY session remains active.
-            # the main cmd ends, then the server may receive SIGHUP.
-            assert obs.exit_code == 0
-            # Give the server a moment to be ready
-            time.sleep(1)
-            # `curl --fail` exits non-zero if connection fails or server returns an error.
-            # Use a short connect timeout as the server is expected to be down.
-            curl_action = CmdRunAction(
-                f'curl --fail --connect-timeout 1 http://localhost:{server_port}'
-            )
-            curl_obs = runtime.run_action(curl_action)
-            logger.info(curl_obs, extra={'msg_type': 'OBSERVATION'})
-            assert isinstance(curl_obs, CmdOutputObservation)
-            assert curl_obs.exit_code != 0
-            # Confirm with pkill (CLIRuntime is assumed non-Windows here).
-            # pkill returns 1 if no processes were matched.
-            kill_action = CmdRunAction('pkill -f "http.server"')
-            kill_obs = runtime.run_action(kill_action)
-            logger.info(kill_obs, extra={'msg_type': 'OBSERVATION'})
-            assert isinstance(kill_obs, CmdOutputObservation)
-            # For CLIRuntime, bash -c "cmd &" exits quickly, orphaning "cmd".
-            # CLIRuntime's timeout tries to kill the already-exited bash -c.
-            # The orphaned http.server continues running.
-            # So, pkill should find and kill the server.
-            assert kill_obs.exit_code == 0
-        else:
-            assert obs.exit_code == 0
-            # Give the server a moment to be ready
-            time.sleep(1)
-            # Verify the server is running by curling it
-            if is_windows():
-                curl_action = CmdRunAction(
-                    f'Invoke-WebRequest -Uri http://localhost:{server_port} -UseBasicParsing | Select-Object -ExpandProperty Content'
-                )
-            else:
-                curl_action = CmdRunAction(f'curl http://localhost:{server_port}')
-            curl_obs = runtime.run_action(curl_action)
-            logger.info(curl_obs, extra={'msg_type': 'OBSERVATION'})
-            assert isinstance(curl_obs, CmdOutputObservation)
-            assert curl_obs.exit_code == 0
-            # Check for content typical of python http.server directory listing
-            assert 'Directory listing for' in curl_obs.content
-            # Kill the server
-            if is_windows():
-                # This assumes PowerShell context if LocalRuntime is used on Windows.
-                kill_action = CmdRunAction('Get-Job | Stop-Job')
-            else:
-                kill_action = CmdRunAction('pkill -f "http.server"')
-            kill_obs = runtime.run_action(kill_action)
-            logger.info(kill_obs, extra={'msg_type': 'OBSERVATION'})
-            assert isinstance(kill_obs, CmdOutputObservation)
-            assert kill_obs.exit_code == 0
-    finally:
-        _close_test_runtime(runtime)
-def test_multiline_commands(temp_dir, runtime_cls):
-    runtime, config = _load_runtime(temp_dir, runtime_cls)
-    try:
-        if is_windows():
-            # Windows PowerShell version using backticks for line continuation
-            obs = _run_cmd_action(runtime, 'Write-Output `\n "foo"')
-            assert obs.exit_code == 0, 'The exit code should be 0.'
-            assert 'foo' in obs.content
-            # test multiline output
-            obs = _run_cmd_action(runtime, 'Write-Output "hello`nworld"')
-            assert obs.exit_code == 0, 'The exit code should be 0.'
-            assert 'hello\nworld' in obs.content
-            # test whitespace
-            obs = _run_cmd_action(runtime, 'Write-Output "a`n`n`nz"')
-            assert obs.exit_code == 0, 'The exit code should be 0.'
-            assert '\n\n\n' in obs.content
-        else:
-            # Original Linux bash version
-            # single multiline command
-            obs = _run_cmd_action(runtime, 'echo \\\n -e "foo"')
-            assert obs.exit_code == 0, 'The exit code should be 0.'
-            assert 'foo' in obs.content
-            # test multiline echo
-            obs = _run_cmd_action(runtime, 'echo -e "hello\nworld"')
-            assert obs.exit_code == 0, 'The exit code should be 0.'
-            assert 'hello\nworld' in obs.content
-            # test whitespace
-            obs = _run_cmd_action(runtime, 'echo -e "a\\n\\n\\nz"')
-            assert obs.exit_code == 0, 'The exit code should be 0.'
-            assert '\n\n\n' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-@pytest.mark.skipif(
-    is_windows(), reason='Test relies on Linux bash-specific complex commands'
-)
-def test_complex_commands(temp_dir, runtime_cls, run_as_openhands):
-    cmd = """count=0; tries=0; while [ $count -lt 3 ]; do result=$(echo "Heads"); tries=$((tries+1)); echo "Flip $tries: $result"; if [ "$result" = "Heads" ]; then count=$((count+1)); else count=0; fi; done; echo "Got 3 heads in a row after $tries flips!";"""
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        obs = _run_cmd_action(runtime, cmd)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert obs.exit_code == 0, 'The exit code should be 0.'
-        assert 'Got 3 heads in a row after 3 flips!' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_no_ps2_in_output(temp_dir, runtime_cls, run_as_openhands):
-    """Test that the PS2 sign is not added to the output of a multiline command."""
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        if is_windows():
-            obs = _run_cmd_action(runtime, 'Write-Output "hello`nworld"')
-        else:
-            obs = _run_cmd_action(runtime, 'echo -e "hello\nworld"')
-        assert obs.exit_code == 0, 'The exit code should be 0.'
-        assert 'hello\nworld' in obs.content
-        assert '>' not in obs.content
-    finally:
-        _close_test_runtime(runtime)
-@pytest.mark.skipif(
-    is_windows(), reason='Test uses Linux-specific bash loops and sed commands'
-)
-def test_multiline_command_loop(temp_dir, runtime_cls):
-    # https://github.com/All-Hands-AI/OpenHands/issues/3143
-    init_cmd = """mkdir -p _modules && \
-for month in {01..04}; do
-    for day in {01..05}; do
-        touch "_modules/2024-${month}-${day}-sample.md"
-    done
-done && echo "created files"
-"""
-    follow_up_cmd = """for file in _modules/*.md; do
-    new_date=$(echo $file | sed -E 's/2024-(01|02|03|04)-/2024-/;s/2024-01/2024-08/;s/2024-02/2024-09/;s/2024-03/2024-10/;s/2024-04/2024-11/')
-    mv "$file" "$new_date"
-done && echo "success"
-"""
-    runtime, config = _load_runtime(temp_dir, runtime_cls)
-    try:
-        obs = _run_cmd_action(runtime, init_cmd)
-        assert obs.exit_code == 0, 'The exit code should be 0.'
-        assert 'created files' in obs.content
-        obs = _run_cmd_action(runtime, follow_up_cmd)
-        assert obs.exit_code == 0, 'The exit code should be 0.'
-        assert 'success' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-@pytest.mark.skipif(
-    os.getenv('TEST_RUNTIME') == 'cli',
-    reason='CLIRuntime uses bash -c which handles newline-separated commands. This test expects rejection. See test_cliruntime_multiple_newline_commands.',
-)
-def test_multiple_multiline_commands(temp_dir, runtime_cls, run_as_openhands):
-    if is_windows():
-        cmds = [
-            'Get-ChildItem',
-            'Write-Output "hello`nworld"',
-            """Write-Output "hello it's me\"""",
-            """Write-Output `
-    ('hello ' + `
-    'world')""",
-            """Write-Output 'hello\nworld\nare\nyou\nthere?'""",
-            """Write-Output 'hello\nworld\nare\nyou\n\nthere?'""",
-            """Write-Output 'hello\nworld "'""",  # Escape the trailing double quote
-        ]
-    else:
-        cmds = [
-            'ls -l',
-            'echo -e "hello\nworld"',
-            """echo -e "hello it's me\"""",
-            """echo \\
-    -e 'hello' \\
-    world""",
-            """echo -e 'hello\\nworld\\nare\\nyou\\nthere?'""",
-            """echo -e 'hello\nworld\nare\nyou\n\nthere?'""",
-            """echo -e 'hello\nworld "'""",
-        ]
-    joined_cmds = '\n'.join(cmds)
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        # First test that running multiple commands at once fails
-        obs = _run_cmd_action(runtime, joined_cmds)
-        assert isinstance(obs, ErrorObservation)
-        assert 'Cannot execute multiple commands at once' in obs.content
-        # Now run each command individually and verify they work
-        results = []
-        for cmd in cmds:
-            obs = _run_cmd_action(runtime, cmd)
-            assert isinstance(obs, CmdOutputObservation)
-            assert obs.exit_code == 0
-            results.append(obs.content)
-        # Verify all expected outputs are present
-        if is_windows():
-            assert '.git_config' in results[0]  # Get-ChildItem
-        else:
-            assert 'total 0' in results[0]  # ls -l
-        assert 'hello\nworld' in results[1]  # echo -e "hello\nworld"
-        assert "hello it's me" in results[2]  # echo -e "hello it\'s me"
-        assert 'hello world' in results[3]  # echo -e 'hello' world
-        assert (
-            'hello\nworld\nare\nyou\nthere?' in results[4]
-        )  # echo -e 'hello\nworld\nare\nyou\nthere?'
-        assert (
-            'hello\nworld\nare\nyou\n\nthere?' in results[5]
-        )  # echo -e with literal newlines
-        assert 'hello\nworld "' in results[6]  # echo -e with quote
-    finally:
-        _close_test_runtime(runtime)
-def test_cliruntime_multiple_newline_commands(temp_dir, run_as_openhands):
-    # This test is specific to CLIRuntime
-    runtime_cls = CLIRuntime
-    if is_windows():
-        # Minimal check for Windows if CLIRuntime were to support it robustly with PowerShell for this.
-        # For now, this test primarily targets the bash -c behavior on non-Windows.
-        pytest.skip(
-            'CLIRuntime newline command test primarily for non-Windows bash behavior'
-        )
-        # cmds = [
-        #     'Get-ChildItem -Name .git_config', # Simpler command
-        #     'Write-Output "hello`nworld"'
-        # ]
-        # expected_outputs = ['.git_config', 'hello\nworld']
-    else:
-        cmds = [
-            'echo "hello"',  # A command that will always work
-            'echo -e "hello\nworld"',
-            """echo -e "hello it's me\"""",
-        ]
-        expected_outputs = [
-            'hello',  # Simple string output
-            'hello\nworld',
-            "hello it's me",
-        ]  # Simplified expectations
-    joined_cmds = '\n'.join(cmds)
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        obs = _run_cmd_action(runtime, joined_cmds)
-        assert isinstance(obs, CmdOutputObservation)
-        assert obs.exit_code == 0
-        # Check that parts of each command's expected output are present
-        for expected_part in expected_outputs:
-            assert expected_part in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_cmd_run(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        if is_windows():
-            # Windows PowerShell version
-            obs = _run_cmd_action(
-                runtime, f'Get-ChildItem -Path {config.workspace_mount_path_in_sandbox}'
-            )
-            assert obs.exit_code == 0
-            obs = _run_cmd_action(runtime, 'Get-ChildItem')
-            assert obs.exit_code == 0
-            obs = _run_cmd_action(runtime, 'New-Item -ItemType Directory -Path test')
-            assert obs.exit_code == 0
-            obs = _run_cmd_action(runtime, 'Get-ChildItem')
-            assert obs.exit_code == 0
-            assert 'test' in obs.content
-            obs = _run_cmd_action(runtime, 'New-Item -ItemType File -Path test/foo.txt')
-            assert obs.exit_code == 0
-            obs = _run_cmd_action(runtime, 'Get-ChildItem test')
-            assert obs.exit_code == 0
-            assert 'foo.txt' in obs.content
-            # clean up
-            _run_cmd_action(runtime, 'Remove-Item -Recurse -Force test')
-            assert obs.exit_code == 0
-        else:
-            # Unix version
-            obs = _run_cmd_action(
-                runtime, f'ls -l {config.workspace_mount_path_in_sandbox}'
-            )
-            assert obs.exit_code == 0
-            obs = _run_cmd_action(runtime, 'ls -l')
-            assert obs.exit_code == 0
-            assert 'total 0' in obs.content
-            obs = _run_cmd_action(runtime, 'mkdir test')
-            assert obs.exit_code == 0
-            obs = _run_cmd_action(runtime, 'ls -l')
-            assert obs.exit_code == 0
-            if (
-                run_as_openhands
-                and runtime_cls != CLIRuntime
-                and runtime_cls != LocalRuntime
-            ):
-                assert 'openhands' in obs.content
-            elif runtime_cls == LocalRuntime or runtime_cls == CLIRuntime:
-                assert 'root' not in obs.content and 'openhands' not in obs.content
-            else:
-                assert 'root' in obs.content
-            assert 'test' in obs.content
-            obs = _run_cmd_action(runtime, 'touch test/foo.txt')
-            assert obs.exit_code == 0
-            obs = _run_cmd_action(runtime, 'ls -l test')
-            assert obs.exit_code == 0
-            assert 'foo.txt' in obs.content
-            # clean up: this is needed, since CI will not be
-            # run as root, and this test may leave a file
-            # owned by root
-            _run_cmd_action(runtime, 'rm -rf test')
-            assert obs.exit_code == 0
-    finally:
-        _close_test_runtime(runtime)
-@pytest.mark.skipif(
-    sys.platform != 'win32' and os.getenv('TEST_RUNTIME') == 'cli',
-    reason='CLIRuntime runs as the host user, so ~ is the host home. This test assumes a sandboxed user.',
-)
-def test_run_as_user_correct_home_dir(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        if is_windows():
-            # Windows PowerShell version
-            obs = _run_cmd_action(runtime, 'cd $HOME && Get-Location')
-            assert obs.exit_code == 0
-            # Check for Windows-style home paths
-            if runtime_cls == LocalRuntime:
-                assert (
-                    os.getenv('USERPROFILE') in obs.content
-                    or os.getenv('HOME') in obs.content
-                )
-            # For non-local runtime, we are less concerned with precise paths
-        else:
-            # Original Linux version
-            obs = _run_cmd_action(runtime, 'cd ~ && pwd')
-            assert obs.exit_code == 0
-            if runtime_cls == LocalRuntime:
-                assert os.getenv('HOME') in obs.content
-            elif run_as_openhands:
-                assert '/home/openhands' in obs.content
-            else:
-                assert '/root' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_multi_cmd_run_in_single_line(temp_dir, runtime_cls):
-    runtime, config = _load_runtime(temp_dir, runtime_cls)
-    try:
-        if is_windows():
-            # Windows PowerShell version using semicolon
-            obs = _run_cmd_action(runtime, 'Get-Location && Get-ChildItem')
-            assert obs.exit_code == 0
-            assert config.workspace_mount_path_in_sandbox in obs.content
-            assert '.git_config' in obs.content
-        else:
-            # Original Linux version using &&
-            obs = _run_cmd_action(runtime, 'pwd && ls -l')
-            assert obs.exit_code == 0
-            assert config.workspace_mount_path_in_sandbox in obs.content
-            assert 'total 0' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_stateful_cmd(temp_dir, runtime_cls):
-    runtime, config = _load_runtime(temp_dir, runtime_cls)
-    try:
-        if is_windows():
-            # Windows PowerShell version
-            obs = _run_cmd_action(
-                runtime, 'New-Item -ItemType Directory -Path test -Force'
-            )
-            assert obs.exit_code == 0, 'The exit code should be 0.'
-            obs = _run_cmd_action(runtime, 'Set-Location test')
-            assert obs.exit_code == 0, 'The exit code should be 0.'
-            obs = _run_cmd_action(runtime, 'Get-Location')
-            assert obs.exit_code == 0, 'The exit code should be 0.'
-            # Account for both forward and backward slashes in path
-            norm_path = config.workspace_mount_path_in_sandbox.replace(
-                '\\', '/'
-            ).replace('//', '/')
-            test_path = f'{norm_path}/test'.replace('//', '/')
-            assert test_path in obs.content.replace('\\', '/')
-        else:
-            # Original Linux version
-            obs = _run_cmd_action(runtime, 'mkdir -p test')
-            assert obs.exit_code == 0, 'The exit code should be 0.'
-            if runtime_cls == CLIRuntime:
-                # For CLIRuntime, test CWD change and command execution within a single action
-                # as CWD is enforced in the workspace.
-                obs = _run_cmd_action(runtime, 'cd test && pwd')
-            else:
-                # For other runtimes, test stateful CWD change across actions
-                obs = _run_cmd_action(runtime, 'cd test')
-                assert obs.exit_code == 0, 'The exit code should be 0 for cd test.'
-                obs = _run_cmd_action(runtime, 'pwd')
-            assert obs.exit_code == 0, (
-                'The exit code for the pwd command (or combined command) should be 0.'
-            )
-            assert (
-                f'{config.workspace_mount_path_in_sandbox}/test' in obs.content.strip()
-            )
-    finally:
-        _close_test_runtime(runtime)
-def test_failed_cmd(temp_dir, runtime_cls):
-    runtime, config = _load_runtime(temp_dir, runtime_cls)
-    try:
-        obs = _run_cmd_action(runtime, 'non_existing_command')
-        assert obs.exit_code != 0, 'The exit code should not be 0 for a failed command.'
-    finally:
-        _close_test_runtime(runtime)
-def _create_test_file(host_temp_dir):
-    # Single file
-    with open(os.path.join(host_temp_dir, 'test_file.txt'), 'w') as f:
-        f.write('Hello, World!')
-def test_copy_single_file(temp_dir, runtime_cls):
-    runtime, config = _load_runtime(temp_dir, runtime_cls)
-    try:
-        sandbox_dir = config.workspace_mount_path_in_sandbox
-        sandbox_file = os.path.join(sandbox_dir, 'test_file.txt')
-        _create_test_file(temp_dir)
-        runtime.copy_to(os.path.join(temp_dir, 'test_file.txt'), sandbox_dir)
-        if is_windows():
-            obs = _run_cmd_action(runtime, f'Get-ChildItem -Path {sandbox_dir}')
-            assert obs.exit_code == 0
-            assert 'test_file.txt' in obs.content
-            obs = _run_cmd_action(runtime, f'Get-Content {sandbox_file}')
-            assert obs.exit_code == 0
-            assert 'Hello, World!' in obs.content
-        else:
-            obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}')
-            assert obs.exit_code == 0
-            assert 'test_file.txt' in obs.content
-            obs = _run_cmd_action(runtime, f'cat {sandbox_file}')
-            assert obs.exit_code == 0
-            assert 'Hello, World!' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def _create_host_test_dir_with_files(test_dir):
-    logger.debug(f'creating `{test_dir}`')
-    if not os.path.isdir(test_dir):
-        os.makedirs(test_dir, exist_ok=True)
-    logger.debug('creating test files in `test_dir`')
-    with open(os.path.join(test_dir, 'file1.txt'), 'w') as f:
-        f.write('File 1 content')
-    with open(os.path.join(test_dir, 'file2.txt'), 'w') as f:
-        f.write('File 2 content')
-def test_copy_directory_recursively(temp_dir, runtime_cls):
-    runtime, config = _load_runtime(temp_dir, runtime_cls)
-    sandbox_dir = config.workspace_mount_path_in_sandbox
-    try:
-        temp_dir_copy = os.path.join(temp_dir, 'test_dir')
-        # We need a separate directory, since temp_dir is mounted to /workspace
-        _create_host_test_dir_with_files(temp_dir_copy)
-        runtime.copy_to(temp_dir_copy, sandbox_dir, recursive=True)
-        if is_windows():
-            obs = _run_cmd_action(runtime, f'Get-ChildItem -Path {sandbox_dir}')
-            assert obs.exit_code == 0
-            assert 'test_dir' in obs.content
-            assert 'file1.txt' not in obs.content
-            assert 'file2.txt' not in obs.content
-            obs = _run_cmd_action(
-                runtime, f'Get-ChildItem -Path {sandbox_dir}/test_dir'
-            )
-            assert obs.exit_code == 0
-            assert 'file1.txt' in obs.content
-            assert 'file2.txt' in obs.content
-            obs = _run_cmd_action(
-                runtime, f'Get-Content {sandbox_dir}/test_dir/file1.txt'
-            )
-            assert obs.exit_code == 0
-            assert 'File 1 content' in obs.content
-        else:
-            obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}')
-            assert obs.exit_code == 0
-            assert 'test_dir' in obs.content
-            assert 'file1.txt' not in obs.content
-            assert 'file2.txt' not in obs.content
-            obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}/test_dir')
-            assert obs.exit_code == 0
-            assert 'file1.txt' in obs.content
-            assert 'file2.txt' in obs.content
-            obs = _run_cmd_action(runtime, f'cat {sandbox_dir}/test_dir/file1.txt')
-            assert obs.exit_code == 0
-            assert 'File 1 content' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_copy_to_non_existent_directory(temp_dir, runtime_cls):
-    runtime, config = _load_runtime(temp_dir, runtime_cls)
-    try:
-        sandbox_dir = config.workspace_mount_path_in_sandbox
-        _create_test_file(temp_dir)
-        runtime.copy_to(
-            os.path.join(temp_dir, 'test_file.txt'), f'{sandbox_dir}/new_dir'
-        )
-        obs = _run_cmd_action(runtime, f'cat {sandbox_dir}/new_dir/test_file.txt')
-        assert obs.exit_code == 0
-        assert 'Hello, World!' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_overwrite_existing_file(temp_dir, runtime_cls):
-    runtime, config = _load_runtime(temp_dir, runtime_cls)
-    try:
-        sandbox_dir = config.workspace_mount_path_in_sandbox
-        sandbox_file = os.path.join(sandbox_dir, 'test_file.txt')
-        if is_windows():
-            # Check initial state
-            obs = _run_cmd_action(runtime, f'Get-ChildItem -Path {sandbox_dir}')
-            assert obs.exit_code == 0
-            assert 'test_file.txt' not in obs.content
-            # Create an empty file
-            obs = _run_cmd_action(
-                runtime, f'New-Item -ItemType File -Path {sandbox_file} -Force'
-            )
-            assert obs.exit_code == 0
-            # Verify file exists and is empty
-            obs = _run_cmd_action(runtime, f'Get-ChildItem -Path {sandbox_dir}')
-            assert obs.exit_code == 0
-            assert 'test_file.txt' in obs.content
-            obs = _run_cmd_action(runtime, f'Get-Content {sandbox_file}')
-            assert obs.exit_code == 0
-            assert obs.content.strip() == ''  # Empty file
-            assert 'Hello, World!' not in obs.content
-            # Create host file and copy to overwrite
-            _create_test_file(temp_dir)
-            runtime.copy_to(os.path.join(temp_dir, 'test_file.txt'), sandbox_dir)
-            # Verify file content is overwritten
-            obs = _run_cmd_action(runtime, f'Get-Content {sandbox_file}')
-            assert obs.exit_code == 0
-            assert 'Hello, World!' in obs.content
-        else:
-            # Original Linux version
-            obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}')
-            assert obs.exit_code == 0
-            assert 'test_file.txt' not in obs.content  # Check initial state
-            obs = _run_cmd_action(runtime, f'touch {sandbox_file}')
-            assert obs.exit_code == 0
-            obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}')
-            assert obs.exit_code == 0
-            assert 'test_file.txt' in obs.content
-            obs = _run_cmd_action(runtime, f'cat {sandbox_file}')
-            assert obs.exit_code == 0
-            assert obs.content.strip() == ''  # Empty file
-            assert 'Hello, World!' not in obs.content
-            _create_test_file(temp_dir)
-            runtime.copy_to(os.path.join(temp_dir, 'test_file.txt'), sandbox_dir)
-            obs = _run_cmd_action(runtime, f'cat {sandbox_file}')
-            assert obs.exit_code == 0
-            assert 'Hello, World!' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_copy_non_existent_file(temp_dir, runtime_cls):
-    runtime, config = _load_runtime(temp_dir, runtime_cls)
-    try:
-        sandbox_dir = config.workspace_mount_path_in_sandbox
-        with pytest.raises(FileNotFoundError):
-            runtime.copy_to(
-                os.path.join(sandbox_dir, 'non_existent_file.txt'),
-                f'{sandbox_dir}/should_not_exist.txt',
-            )
-        obs = _run_cmd_action(runtime, f'ls {sandbox_dir}/should_not_exist.txt')
-        assert obs.exit_code != 0  # File should not exist
-    finally:
-        _close_test_runtime(runtime)
-def test_copy_from_directory(temp_dir, runtime_cls):
-    runtime, config = _load_runtime(temp_dir, runtime_cls)
-    sandbox_dir = config.workspace_mount_path_in_sandbox
-    try:
-        temp_dir_copy = os.path.join(temp_dir, 'test_dir')
-        # We need a separate directory, since temp_dir is mounted to /workspace
-        _create_host_test_dir_with_files(temp_dir_copy)
-        # Initial state
-        runtime.copy_to(temp_dir_copy, sandbox_dir, recursive=True)
-        path_to_copy_from = f'{sandbox_dir}/test_dir'
-        result = runtime.copy_from(path=path_to_copy_from)
-        # Result is returned as a path
-        assert isinstance(result, Path)
-        if result.exists() and not is_windows():
-            result.unlink()
-    finally:
-        _close_test_runtime(runtime)
-@pytest.mark.skipif(
-    is_windows(), reason='Test uses Linux-specific file permissions and sudo commands'
-)
-def test_git_operation(temp_dir, runtime_cls):
-    # do not mount workspace, since workspace mount by tests will be owned by root
-    # while the user_id we get via os.getuid() is different from root
-    # which causes permission issues
-    runtime, config = _load_runtime(
-        temp_dir=temp_dir,
-        use_workspace=False,
-        runtime_cls=runtime_cls,
-        # Need to use non-root user to expose issues
-        run_as_openhands=True,
-    )
-    # this will happen if permission of runtime is not properly configured
-    # fatal: detected dubious ownership in repository at config.workspace_mount_path_in_sandbox
-    try:
-        if runtime_cls != LocalRuntime and runtime_cls != CLIRuntime:
-            # on local machine, permissionless sudo will probably not be available
-            obs = _run_cmd_action(runtime, 'sudo chown -R openhands:root .')
-            assert obs.exit_code == 0
-        # check the ownership of the current directory
-        obs = _run_cmd_action(runtime, 'ls -alh .')
-        assert obs.exit_code == 0
-        # drwx--S--- 2 openhands root   64 Aug  7 23:32 .
-        # drwxr-xr-x 1 root      root 4.0K Aug  7 23:33 ..
-        for line in obs.content.split('\n'):
-            if runtime_cls == LocalRuntime or runtime_cls == CLIRuntime:
-                continue  # skip these checks
-            if ' ..' in line:
-                # parent directory should be owned by root
-                assert 'root' in line
-                assert 'openhands' not in line
-            elif ' .' in line:
-                # current directory should be owned by openhands
-                # and its group should be root
-                assert 'openhands' in line
-                assert 'root' in line
-        # make sure all git operations are allowed
-        obs = _run_cmd_action(runtime, 'git init')
-        assert obs.exit_code == 0
-        # create a file
-        obs = _run_cmd_action(runtime, 'echo "hello" > test_file.txt')
-        assert obs.exit_code == 0
-        if runtime_cls == LocalRuntime or runtime_cls == CLIRuntime:
-            # set git config author in CI only, not on local machine
-            logger.info('Setting git config author')
-            obs = _run_cmd_action(
-                runtime,
-                'git config user.name "openhands" && git config user.email "[email protected]"',
-            )
-            assert obs.exit_code == 0
-            # Set up git config - list current settings (should be empty or just what was set)
-            obs = _run_cmd_action(runtime, 'git config --list')
-            assert obs.exit_code == 0
-        # git add
-        obs = _run_cmd_action(runtime, 'git add test_file.txt')
-        assert obs.exit_code == 0
-        # git diff
-        obs = _run_cmd_action(runtime, 'git diff --no-color --cached')
-        assert obs.exit_code == 0
-        assert 'b/test_file.txt' in obs.content
-        assert '+hello' in obs.content
-        # git commit
-        obs = _run_cmd_action(runtime, 'git commit -m "test commit"')
-        assert obs.exit_code == 0
-    finally:
-        _close_test_runtime(runtime)
-def test_python_version(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        obs = runtime.run_action(CmdRunAction(command='python --version'))
-        assert isinstance(obs, CmdOutputObservation), (
-            'The observation should be a CmdOutputObservation.'
-        )
-        assert obs.exit_code == 0, 'The exit code should be 0.'
-        assert 'Python 3' in obs.content, 'The output should contain "Python 3".'
-    finally:
-        _close_test_runtime(runtime)
-def test_pwd_property(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        # Create a subdirectory and verify pwd updates
-        obs = _run_cmd_action(runtime, 'mkdir -p random_dir')
-        assert obs.exit_code == 0
-        obs = _run_cmd_action(runtime, 'cd random_dir && pwd')
-        assert obs.exit_code == 0
-        assert 'random_dir' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_basic_command(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        if is_windows():
-            # Test simple command
-            obs = _run_cmd_action(runtime, "Write-Output 'hello world'")
-            assert 'hello world' in obs.content
-            assert obs.exit_code == 0
-            # Test command with error
-            obs = _run_cmd_action(runtime, 'nonexistent_command')
-            assert obs.exit_code != 0
-            assert 'not recognized' in obs.content or 'command not found' in obs.content
-            # Test command with special characters
-            obs = _run_cmd_action(
-                runtime, 'Write-Output "hello   world    with`nspecial  chars"'
-            )
-            assert 'hello   world    with\nspecial  chars' in obs.content
-            assert obs.exit_code == 0
-            # Test multiple commands in sequence
-            obs = _run_cmd_action(
-                runtime,
-                'Write-Output "first" && Write-Output "second" && Write-Output "third"',
-            )
-            assert 'first' in obs.content
-            assert 'second' in obs.content
-            assert 'third' in obs.content
-            assert obs.exit_code == 0
-        else:
-            # Original Linux version
-            # Test simple command
-            obs = _run_cmd_action(runtime, "echo 'hello world'")
-            assert 'hello world' in obs.content
-            assert obs.exit_code == 0
-            # Test command with error
-            obs = _run_cmd_action(runtime, 'nonexistent_command')
-            assert obs.exit_code == 127
-            assert 'nonexistent_command: command not found' in obs.content
-            # Test command with special characters
-            obs = _run_cmd_action(
-                runtime, "echo 'hello   world    with\nspecial  chars'"
-            )
-            assert 'hello   world    with\nspecial  chars' in obs.content
-            assert obs.exit_code == 0
-            # Test multiple commands in sequence
-            obs = _run_cmd_action(
-                runtime, 'echo "first" && echo "second" && echo "third"'
-            )
-            assert 'first' in obs.content
-            assert 'second' in obs.content
-            assert 'third' in obs.content
-            assert obs.exit_code == 0
-    finally:
-        _close_test_runtime(runtime)
-@pytest.mark.skipif(
-    is_windows(), reason='Powershell does not support interactive commands'
-)
-@pytest.mark.skipif(
-    os.getenv('TEST_RUNTIME') == 'cli',
-    reason='CLIRuntime does not support interactive commands from the agent.',
-)
-def test_interactive_command(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(
-        temp_dir,
-        runtime_cls,
-        run_as_openhands,
-        runtime_startup_env_vars={'NO_CHANGE_TIMEOUT_SECONDS': '1'},
-    )
-    try:
-        # Test interactive command
-        action = CmdRunAction('read -p "Enter name: " name && echo "Hello $name"')
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        # This should trigger SOFT timeout, so no need to set hard timeout
-        assert 'Enter name:' in obs.content
-        assert '[The command has no new output after 1 seconds.' in obs.metadata.suffix
-        action = CmdRunAction('John', is_input=True)
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert 'Hello John' in obs.content
-        assert '[The command completed with exit code 0.]' in obs.metadata.suffix
-        # Test multiline command input with here document
-        action = CmdRunAction("""cat << EOF
-line 1
-line 2
-EOF""")
-        obs = runtime.run_action(action)
-        assert 'line 1\nline 2' in obs.content
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert '[The command completed with exit code 0.]' in obs.metadata.suffix
-        assert obs.exit_code == 0
-    finally:
-        _close_test_runtime(runtime)
-@pytest.mark.skipif(
-    is_windows(),
-    reason='Test relies on Linux-specific commands like seq and bash for loops',
-)
-def test_long_output(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        # Generate a long output
-        action = CmdRunAction('for i in $(seq 1 5000); do echo "Line $i"; done')
-        action.set_hard_timeout(10)
-        obs = runtime.run_action(action)
-        assert obs.exit_code == 0
-        assert 'Line 1' in obs.content
-        assert 'Line 5000' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-@pytest.mark.skipif(
-    is_windows(),
-    reason='Test relies on Linux-specific commands like seq and bash for loops',
-)
-@pytest.mark.skipif(
-    os.getenv('TEST_RUNTIME') == 'cli',
-    reason='CLIRuntime does not truncate command output.',
-)
-def test_long_output_exceed_history_limit(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        # Generate a long output
-        action = CmdRunAction('for i in $(seq 1 50000); do echo "Line $i"; done')
-        action.set_hard_timeout(30)
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert obs.exit_code == 0
-        assert 'Previous command outputs are truncated' in obs.metadata.prefix
-        assert 'Line 40000' in obs.content
-        assert 'Line 50000' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-@pytest.mark.skipif(
-    is_windows(), reason='Test uses Linux-specific temp directory and bash for loops'
-)
-def test_long_output_from_nested_directories(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        # Create nested directories with many files
-        setup_cmd = 'mkdir -p /tmp/test_dir && cd /tmp/test_dir && for i in $(seq 1 100); do mkdir -p "folder_$i"; for j in $(seq 1 100); do touch "folder_$i/file_$j.txt"; done; done'
-        setup_action = CmdRunAction(setup_cmd.strip())
-        setup_action.set_hard_timeout(60)
-        obs = runtime.run_action(setup_action)
-        assert obs.exit_code == 0
-        # List the directory structure recursively
-        action = CmdRunAction('ls -R /tmp/test_dir')
-        action.set_hard_timeout(60)
-        obs = runtime.run_action(action)
-        assert obs.exit_code == 0
-        # Verify output contains expected files
-        assert 'folder_1' in obs.content
-        assert 'file_1.txt' in obs.content
-        assert 'folder_100' in obs.content
-        assert 'file_100.txt' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-@pytest.mark.skipif(
-    is_windows(),
-    reason='Test uses Linux-specific commands like find and grep with complex syntax',
-)
-def test_command_backslash(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        # Create a file with the content "implemented_function"
-        action = CmdRunAction(
-            'mkdir -p /tmp/test_dir && echo "implemented_function" > /tmp/test_dir/file_1.txt'
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert obs.exit_code == 0
-        # Reproduce an issue we ran into during evaluation
-        # find /workspace/sympy__sympy__1.0 -type f -exec grep -l "implemented_function" {} \;
-        # find: missing argument to `-exec'
-        # --> This is unexpected output due to incorrect escaping of \;
-        # This tests for correct escaping of \;
-        action = CmdRunAction(
-            'find /tmp/test_dir -type f -exec grep -l "implemented_function" {} \\;'
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert obs.exit_code == 0
-        assert '/tmp/test_dir/file_1.txt' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-@pytest.mark.skipif(
-    is_windows(), reason='Test uses Linux-specific ps aux, awk, and grep commands'
-)
-@pytest.mark.skipif(
-    os.getenv('TEST_RUNTIME') == 'cli',
-    reason='CLIRuntime does not support interactive commands from the agent.',
-)
-def test_stress_long_output_with_soft_and_hard_timeout(
-    temp_dir, runtime_cls, run_as_openhands
-):
-    runtime, config = _load_runtime(
-        temp_dir,
-        runtime_cls,
-        run_as_openhands,
-        runtime_startup_env_vars={'NO_CHANGE_TIMEOUT_SECONDS': '1'},
-        docker_runtime_kwargs={
-            'cpu_period': 100000,  # 100ms
-            'cpu_quota': 100000,  # Can use 100ms out of each 100ms period (1 CPU)
-            'mem_limit': '4G',  # 4 GB of memory
-        },
-    )
-    try:
-        # Run a command that generates long output multiple times
-        for i in range(10):
-            start_time = time.time()
-            # Check tmux memory usage (in KB)
-            mem_action = CmdRunAction(
-                'ps aux | awk \'{printf "%8.1f KB  %s\\n", $6, $0}\' | sort -nr | grep "/usr/bin/tmux" | grep -v grep | awk \'{print $1}\''
-            )
-            mem_obs = runtime.run_action(mem_action)
-            assert mem_obs.exit_code == 0
-            logger.info(
-                f'Tmux memory usage (iteration {i}): {mem_obs.content.strip()} KB'
-            )
-            # Check action_execution_server mem
-            mem_action = CmdRunAction(
-                'ps aux | awk \'{printf "%8.1f KB  %s\\n", $6, $0}\' | sort -nr | grep "action_execution_server" | grep "/openhands/poetry" | grep -v grep | awk \'{print $1}\''
-            )
-            mem_obs = runtime.run_action(mem_action)
-            assert mem_obs.exit_code == 0
-            logger.info(
-                f'Action execution server memory usage (iteration {i}): {mem_obs.content.strip()} KB'
-            )
-            # Test soft timeout
-            action = CmdRunAction(
-                'read -p "Do you want to continue? [Y/n] " answer; if [[ $answer == "Y" ]]; then echo "Proceeding with operation..."; echo "Operation completed successfully!"; else echo "Operation cancelled."; exit 1; fi'
-            )
-            obs = runtime.run_action(action)
-            assert 'Do you want to continue?' in obs.content
-            assert obs.exit_code == -1  # Command is still running, waiting for input
-            # Send the confirmation
-            action = CmdRunAction('Y', is_input=True)
-            obs = runtime.run_action(action)
-            assert 'Proceeding with operation...' in obs.content
-            assert 'Operation completed successfully!' in obs.content
-            assert obs.exit_code == 0
-            assert '[The command completed with exit code 0.]' in obs.metadata.suffix
-            # Test hard timeout w/ long output
-            # Generate long output with 1000 asterisks per line
-            action = CmdRunAction(
-                f'export i={i}; for j in $(seq 1 100); do echo "Line $j - Iteration $i - $(printf \'%1000s\' | tr " " "*")"; sleep 1; done'
-            )
-            action.set_hard_timeout(2)
-            obs = runtime.run_action(action)
-            # Verify the output
-            assert obs.exit_code == -1
-            assert f'Line 1 - Iteration {i}' in obs.content
-            # assert f'Line 1000 - Iteration {i}' in obs.content
-            # assert '[The command completed with exit code 0.]' in obs.metadata.suffix
-            # Because hard-timeout is triggered, the terminal will in a weird state
-            # where it will not accept any new commands.
-            obs = runtime.run_action(CmdRunAction('ls'))
-            assert obs.exit_code == -1
-            assert 'The previous command is still running' in obs.metadata.suffix
-            # We need to send a Ctrl+C to reset the terminal.
-            obs = runtime.run_action(CmdRunAction('C-c', is_input=True))
-            assert obs.exit_code == 130
-            # Now make sure the terminal is in a good state
-            obs = runtime.run_action(CmdRunAction('ls'))
-            assert obs.exit_code == 0
-            duration = time.time() - start_time
-            logger.info(f'Completed iteration {i} in {duration:.2f} seconds')
-    finally:
-        _close_test_runtime(runtime)
-@pytest.mark.skipif(
-    os.getenv('TEST_RUNTIME') == 'cli',
-    reason='FIXME: CLIRuntime does not watch previously timed-out commands except for getting full output a short time after timeout.',
-)
-def test_command_output_continuation(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        if is_windows():
-            # Windows PowerShell version
-            action = CmdRunAction(
-                '1..5 | ForEach-Object { Write-Output $_; Start-Sleep 3 }'
-            )
-            action.set_hard_timeout(2.5)
-            obs = runtime.run_action(action)
-            assert obs.content.strip() == '1'
-            assert obs.metadata.prefix == ''
-            assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
-            # Continue watching output
-            action = CmdRunAction('')
-            action.set_hard_timeout(2.5)
-            obs = runtime.run_action(action)
-            assert (
-                '[Below is the output of the previous command.]' in obs.metadata.prefix
-            )
-            assert obs.content.strip() == '2'
-            assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
-            # Continue until completion
-            for expected in ['3', '4', '5']:
-                action = CmdRunAction('')
-                action.set_hard_timeout(2.5)
-                obs = runtime.run_action(action)
-                assert (
-                    '[Below is the output of the previous command.]'
-                    in obs.metadata.prefix
-                )
-                assert obs.content.strip() == expected
-                assert (
-                    '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
-                )
-            # Final empty command to complete
-            action = CmdRunAction('')
-            obs = runtime.run_action(action)
-            assert '[The command completed with exit code 0.]' in obs.metadata.suffix
-        else:
-            # Original Linux version
-            # Start a command that produces output slowly
-            action = CmdRunAction('for i in {1..5}; do echo $i; sleep 3; done')
-            action.set_hard_timeout(2.5)
-            obs = runtime.run_action(action)
-            assert obs.content.strip() == '1'
-            assert obs.metadata.prefix == ''
-            assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
-            # Continue watching output
-            action = CmdRunAction('')
-            action.set_hard_timeout(2.5)
-            obs = runtime.run_action(action)
-            assert (
-                '[Below is the output of the previous command.]' in obs.metadata.prefix
-            )
-            assert obs.content.strip() == '2'
-            assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
-            # Continue until completion
-            for expected in ['3', '4', '5']:
-                action = CmdRunAction('')
-                action.set_hard_timeout(2.5)
-                obs = runtime.run_action(action)
-                assert (
-                    '[Below is the output of the previous command.]'
-                    in obs.metadata.prefix
-                )
-                assert obs.content.strip() == expected
-                assert (
-                    '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
-                )
-            # Final empty command to complete
-            action = CmdRunAction('')
-            obs = runtime.run_action(action)
-            assert '[The command completed with exit code 0.]' in obs.metadata.suffix
-    finally:
-        _close_test_runtime(runtime)
-@pytest.mark.skipif(
-    os.getenv('TEST_RUNTIME') == 'cli',
-    reason='FIXME: CLIRuntime does not implement empty command behavior.',
-)
-def test_long_running_command_follow_by_execute(
-    temp_dir, runtime_cls, run_as_openhands
-):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        if is_windows():
-            action = CmdRunAction('1..3 | ForEach-Object { Write-Output $_; sleep 3 }')
-        else:
-            # Test command that produces output slowly
-            action = CmdRunAction('for i in {1..3}; do echo $i; sleep 3; done')
-        action.set_hard_timeout(2.5)
-        obs = runtime.run_action(action)
-        assert '1' in obs.content  # First number should appear before timeout
-        assert obs.metadata.exit_code == -1  # -1 indicates command is still running
-        assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
-        assert obs.metadata.prefix == ''
-        # Continue watching output
-        action = CmdRunAction('')
-        action.set_hard_timeout(2.5)
-        obs = runtime.run_action(action)
-        assert '2' in obs.content
-        assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
-        assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
-        assert obs.metadata.exit_code == -1  # -1 indicates command is still running
-        # Test command that produces no output
-        action = CmdRunAction('sleep 15')
-        action.set_hard_timeout(2.5)
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert '3' not in obs.content
-        assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
-        assert 'The previous command is still running' in obs.metadata.suffix
-        assert obs.metadata.exit_code == -1  # -1 indicates command is still running
-        # Finally continue again
-        action = CmdRunAction('')
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert '3' in obs.content
-        assert '[The command completed with exit code 0.]' in obs.metadata.suffix
-    finally:
-        _close_test_runtime(runtime)
-@pytest.mark.skipif(
-    os.getenv('TEST_RUNTIME') == 'cli',
-    reason='FIXME: CLIRuntime does not implement empty command behavior.',
-)
-def test_empty_command_errors(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        # Test empty command without previous command - behavior should be the same on all platforms
-        obs = runtime.run_action(CmdRunAction(''))
-        assert isinstance(obs, CmdOutputObservation)
-        assert (
-            'ERROR: No previous running command to retrieve logs from.' in obs.content
-        )
-    finally:
-        _close_test_runtime(runtime)
-@pytest.mark.skipif(
-    is_windows(), reason='Powershell does not support interactive commands'
-)
-@pytest.mark.skipif(
-    os.getenv('TEST_RUNTIME') == 'cli',
-    reason='CLIRuntime does not support interactive commands from the agent.',
-)
-def test_python_interactive_input(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        # Test Python program that asks for input - same for both platforms
-        python_script = """name = input('Enter your name: '); age = input('Enter your age: '); print(f'Hello {name}, you are {age} years old')"""
-        # Start Python with the interactive script
-        # For both platforms we can use the same command
-        obs = runtime.run_action(CmdRunAction(f'python -c "{python_script}"'))
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert 'Enter your name:' in obs.content
-        assert obs.metadata.exit_code == -1  # -1 indicates command is still running
-        # Send first input (name)
-        obs = runtime.run_action(CmdRunAction('Alice', is_input=True))
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert 'Enter your age:' in obs.content
-        assert obs.metadata.exit_code == -1
-        # Send second input (age)
-        obs = runtime.run_action(CmdRunAction('25', is_input=True))
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert 'Hello Alice, you are 25 years old' in obs.content
-        assert obs.metadata.exit_code == 0
-        assert '[The command completed with exit code 0.]' in obs.metadata.suffix
-    finally:
-        _close_test_runtime(runtime)
-@pytest.mark.skipif(
-    is_windows(), reason='Powershell does not support interactive commands'
-)
-@pytest.mark.skipif(
-    os.getenv('TEST_RUNTIME') == 'cli',
-    reason='CLIRuntime does not support interactive commands from the agent.',
-)
-def test_python_interactive_input_without_set_input(
-    temp_dir, runtime_cls, run_as_openhands
-):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        # Test Python program that asks for input
-        python_script = """name = input('Enter your name: '); age = input('Enter your age: '); print(f'Hello {name}, you are {age} years old')"""
-        # Start Python with the interactive script
-        obs = runtime.run_action(CmdRunAction(f'python -c "{python_script}"'))
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert 'Enter your name:' in obs.content
-        assert obs.metadata.exit_code == -1  # -1 indicates command is still running
-        # Send first input (name)
-        obs = runtime.run_action(CmdRunAction('Alice', is_input=False))
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert 'Enter your age:' not in obs.content
-        assert (
-            'Your command "Alice" is NOT executed. The previous command is still running'
-            in obs.metadata.suffix
-        )
-        assert obs.metadata.exit_code == -1
-        # Try again now with input
-        obs = runtime.run_action(CmdRunAction('Alice', is_input=True))
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert 'Enter your age:' in obs.content
-        assert obs.metadata.exit_code == -1
-        obs = runtime.run_action(CmdRunAction('25', is_input=True))
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert 'Hello Alice, you are 25 years old' in obs.content
-        assert obs.metadata.exit_code == 0
-        assert '[The command completed with exit code 0.]' in obs.metadata.suffix
-    finally:
-        _close_test_runtime(runtime)
-def test_bash_remove_prefix(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        # create a git repo - same for both platforms
-        action = CmdRunAction(
-            'git init && git remote add origin https://github.com/All-Hands-AI/OpenHands'
-        )
-        obs = runtime.run_action(action)
-        # logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert obs.metadata.exit_code == 0
-        # Check git remote - same for both platforms
-        obs = runtime.run_action(CmdRunAction('git remote -v'))
-        # logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert obs.metadata.exit_code == 0
-        assert 'https://github.com/All-Hands-AI/OpenHands' in obs.content
-        assert 'git remote -v' not in obs.content
-    finally:
-        _close_test_runtime(runtime)

tests/runtime/test_browsergym_envs.py DELETED Viewed

@@ -1,73 +0,0 @@
-import json
-import pytest
-from openhands.core.logger import openhands_logger as logger
-from openhands.events.action.browse import BrowseInteractiveAction
-from openhands.events.observation.browse import BrowserOutputObservation
-from tests.runtime.conftest import _close_test_runtime, _load_runtime
-def has_miniwob():
-    try:
-        import importlib.util
-        # try to find this browser environment, if it was installed
-        spec = importlib.util.find_spec('browsergym.miniwob')
-        if spec is None:
-            return False
-        # try to import this environment
-        importlib.util.module_from_spec(spec)
-        return True
-    except ImportError:
-        return False
-@pytest.mark.skipif(
-    not has_miniwob(),
-    reason='Requires browsergym-miniwob package to be installed',
-)
-def test_browsergym_eval_env(runtime_cls, temp_dir):
-    runtime, config = _load_runtime(
-        temp_dir,
-        runtime_cls=runtime_cls,
-        run_as_openhands=False,  # need root permission to access file
-        base_container_image='xingyaoww/od-eval-miniwob:v1.0',
-        browsergym_eval_env='browsergym/miniwob.choose-list',
-        force_rebuild_runtime=True,
-    )
-    from openhands.runtime.browser.browser_env import (
-        BROWSER_EVAL_GET_GOAL_ACTION,
-        BROWSER_EVAL_GET_REWARDS_ACTION,
-    )
-    # Test browse
-    action = BrowseInteractiveAction(browser_actions=BROWSER_EVAL_GET_GOAL_ACTION)
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert isinstance(obs, BrowserOutputObservation)
-    assert not obs.error
-    assert 'Select' in obs.content
-    assert 'from the list and click Submit' in obs.content
-    # Make sure the browser can produce observation in eval env
-    action = BrowseInteractiveAction(browser_actions='noop()')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert (
-        obs.url.strip()
-        == 'file:///miniwob-plusplus/miniwob/html/miniwob/choose-list.html'
-    )
-    # Make sure the rewards are working
-    action = BrowseInteractiveAction(browser_actions=BROWSER_EVAL_GET_REWARDS_ACTION)
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert json.loads(obs.content) == [0.0]
-    _close_test_runtime(runtime)

tests/runtime/test_browsing.py DELETED Viewed

@@ -1,213 +0,0 @@
-"""Browsing-related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
-import os
-import pytest
-from conftest import _close_test_runtime, _load_runtime
-from openhands.core.logger import openhands_logger as logger
-from openhands.events.action import (
-    BrowseInteractiveAction,
-    BrowseURLAction,
-    CmdRunAction,
-)
-from openhands.events.observation import (
-    BrowserOutputObservation,
-    CmdOutputObservation,
-)
-# ============================================================================================================================
-# Browsing tests, without evaluation (poetry install --without evaluation)
-# For eval environments, tests need to run with poetry install
-# ============================================================================================================================
-@pytest.mark.skipif(
-    os.environ.get('TEST_RUNTIME') == 'cli',
-    reason='CLIRuntime does not support browsing actions',
-)
-def test_simple_browse(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    # Test browse
-    action_cmd = CmdRunAction(command='python3 -m http.server 8000 > server.log 2>&1 &')
-    logger.info(action_cmd, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action_cmd)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert isinstance(obs, CmdOutputObservation)
-    assert obs.exit_code == 0
-    assert '[1]' in obs.content
-    action_cmd = CmdRunAction(command='sleep 3 && cat server.log')
-    logger.info(action_cmd, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action_cmd)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert obs.exit_code == 0
-    action_browse = BrowseURLAction(url='http://localhost:8000')
-    logger.info(action_browse, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action_browse)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert isinstance(obs, BrowserOutputObservation)
-    assert 'http://localhost:8000' in obs.url
-    assert not obs.error
-    assert obs.open_pages_urls == ['http://localhost:8000/']
-    assert obs.active_page_index == 0
-    assert obs.last_browser_action == 'goto("http://localhost:8000")'
-    assert obs.last_browser_action_error == ''
-    assert 'Directory listing for /' in obs.content
-    assert 'server.log' in obs.content
-    # clean up
-    action = CmdRunAction(command='rm -rf server.log')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert obs.exit_code == 0
-    _close_test_runtime(runtime)
-@pytest.mark.skipif(
-    os.environ.get('TEST_RUNTIME') == 'cli',
-    reason='CLIRuntime does not support browsing actions',
-)
-def test_read_pdf_browse(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        # Create a PDF file using reportlab in the host environment
-        from reportlab.lib.pagesizes import letter
-        from reportlab.pdfgen import canvas
-        pdf_path = os.path.join(temp_dir, 'test_document.pdf')
-        pdf_content = 'This is test content for PDF reading test'
-        c = canvas.Canvas(pdf_path, pagesize=letter)
-        # Add more content to make the PDF more robust
-        c.drawString(100, 750, pdf_content)
-        c.drawString(100, 700, 'Additional line for PDF structure')
-        c.drawString(100, 650, 'Third line to ensure valid PDF')
-        # Explicitly set PDF version and ensure proper structure
-        c.setPageCompression(0)  # Disable compression for simpler structure
-        c.save()
-        # Copy the PDF to the sandbox
-        sandbox_dir = config.workspace_mount_path_in_sandbox
-        runtime.copy_to(pdf_path, sandbox_dir)
-        # Start HTTP server
-        action_cmd = CmdRunAction(command='ls -alh')
-        logger.info(action_cmd, extra={'msg_type': 'ACTION'})
-        obs = runtime.run_action(action_cmd)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert isinstance(obs, CmdOutputObservation)
-        assert obs.exit_code == 0
-        assert 'test_document.pdf' in obs.content
-        # Get server url
-        action_cmd = CmdRunAction(command='cat /tmp/oh-server-url')
-        logger.info(action_cmd, extra={'msg_type': 'ACTION'})
-        obs = runtime.run_action(action_cmd)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert obs.exit_code == 0
-        server_url = obs.content.strip()
-        # Browse to the PDF file
-        pdf_url = f'{server_url}/view?path=/workspace/test_document.pdf'
-        action_browse = BrowseInteractiveAction(browser_actions=f'goto("{pdf_url}")')
-        logger.info(action_browse, extra={'msg_type': 'ACTION'})
-        obs = runtime.run_action(action_browse)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        # Verify the browser observation
-        assert isinstance(obs, BrowserOutputObservation)
-        observation_text = str(obs)
-        assert '[Action executed successfully.]' in observation_text
-        assert 'Canvas' in observation_text
-        assert (
-            'Screenshot saved to: /workspace/.browser_screenshots/screenshot_'
-            in observation_text
-        )
-        # Check the /workspace/.browser_screenshots folder
-        action_cmd = CmdRunAction(command='ls /workspace/.browser_screenshots')
-        logger.info(action_cmd, extra={'msg_type': 'ACTION'})
-        obs = runtime.run_action(action_cmd)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert isinstance(obs, CmdOutputObservation)
-        assert obs.exit_code == 0
-        assert 'screenshot_' in obs.content
-        assert '.png' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-@pytest.mark.skipif(
-    os.environ.get('TEST_RUNTIME') == 'cli',
-    reason='CLIRuntime does not support browsing actions',
-)
-def test_read_png_browse(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        # Create a PNG file using PIL in the host environment
-        from PIL import Image, ImageDraw
-        png_path = os.path.join(temp_dir, 'test_image.png')
-        # Create a simple image with text
-        img = Image.new('RGB', (400, 200), color=(255, 255, 255))
-        d = ImageDraw.Draw(img)
-        text = 'This is a test PNG image'
-        d.text((20, 80), text, fill=(0, 0, 0))
-        img.save(png_path)
-        # Copy the PNG to the sandbox
-        sandbox_dir = config.workspace_mount_path_in_sandbox
-        runtime.copy_to(png_path, sandbox_dir)
-        # Verify the file exists in the sandbox
-        action_cmd = CmdRunAction(command='ls -alh')
-        logger.info(action_cmd, extra={'msg_type': 'ACTION'})
-        obs = runtime.run_action(action_cmd)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert isinstance(obs, CmdOutputObservation)
-        assert obs.exit_code == 0
-        assert 'test_image.png' in obs.content
-        # Get server url
-        action_cmd = CmdRunAction(command='cat /tmp/oh-server-url')
-        logger.info(action_cmd, extra={'msg_type': 'ACTION'})
-        obs = runtime.run_action(action_cmd)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert obs.exit_code == 0
-        server_url = obs.content.strip()
-        # Browse to the PNG file
-        png_url = f'{server_url}/view?path=/workspace/test_image.png'
-        action_browse = BrowseInteractiveAction(browser_actions=f'goto("{png_url}")')
-        logger.info(action_browse, extra={'msg_type': 'ACTION'})
-        obs = runtime.run_action(action_browse)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        # Verify the browser observation
-        assert isinstance(obs, BrowserOutputObservation)
-        observation_text = str(obs)
-        assert '[Action executed successfully.]' in observation_text
-        assert 'File Viewer - test_image.png' in observation_text
-        assert (
-            'Screenshot saved to: /workspace/.browser_screenshots/screenshot_'
-            in observation_text
-        )
-        # Check the /workspace/.browser_screenshots folder
-        action_cmd = CmdRunAction(command='ls /workspace/.browser_screenshots')
-        logger.info(action_cmd, extra={'msg_type': 'ACTION'})
-        obs = runtime.run_action(action_cmd)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert isinstance(obs, CmdOutputObservation)
-        assert obs.exit_code == 0
-        assert 'screenshot_' in obs.content
-        assert '.png' in obs.content
-    finally:
-        _close_test_runtime(runtime)

tests/runtime/test_docker_images.py DELETED Viewed

@@ -1,96 +0,0 @@
-"""Image-related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
-import os
-import pytest
-from conftest import _close_test_runtime, _load_runtime
-from openhands.core.logger import openhands_logger as logger
-from openhands.events.action import CmdRunAction
-# ============================================================================================================================
-# Image-specific tests
-# ============================================================================================================================
-# Skip all tests in this file if running with CLIRuntime or LocalRuntime,
-# as these tests are specific to Docker images.
-pytestmark = pytest.mark.skipif(
-    os.environ.get('TEST_RUNTIME') in ['cli', 'local'],
-    reason='Image tests are specific to DockerRuntime and not applicable to CLIRuntime or LocalRuntime.',
-)
-def test_bash_python_version(temp_dir, runtime_cls, base_container_image):
-    """Make sure Python is available in bash."""
-    if base_container_image not in [
-        'python:3.12-bookworm',
-    ]:
-        pytest.skip('This test is only for python-related images')
-    runtime, config = _load_runtime(
-        temp_dir, runtime_cls, base_container_image=base_container_image
-    )
-    action = CmdRunAction(command='which python')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert obs.exit_code == 0
-    action = CmdRunAction(command='python --version')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert obs.exit_code == 0
-    assert 'Python 3.12' in obs.content  # Check for specific version
-    action = CmdRunAction(command='pip --version')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert obs.exit_code == 0
-    assert 'pip' in obs.content  # Check that pip is available
-    _close_test_runtime(runtime)
-def test_nodejs_22_version(temp_dir, runtime_cls, base_container_image):
-    """Make sure Node.js is available in bash."""
-    if base_container_image not in [
-        'node:22-bookworm',
-    ]:
-        pytest.skip('This test is only for nodejs-related images')
-    runtime, config = _load_runtime(
-        temp_dir, runtime_cls, base_container_image=base_container_image
-    )
-    action = CmdRunAction(command='node --version')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert obs.exit_code == 0
-    assert 'v22' in obs.content  # Check for specific version
-    _close_test_runtime(runtime)
-def test_go_version(temp_dir, runtime_cls, base_container_image):
-    """Make sure Go is available in bash."""
-    if base_container_image not in [
-        'golang:1.23-bookworm',
-    ]:
-        pytest.skip('This test is only for go-related images')
-    runtime, config = _load_runtime(
-        temp_dir, runtime_cls, base_container_image=base_container_image
-    )
-    action = CmdRunAction(command='go version')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert obs.exit_code == 0
-    assert 'go1.23' in obs.content  # Check for specific version
-    _close_test_runtime(runtime)

tests/runtime/test_env_vars.py DELETED Viewed

@@ -1,120 +0,0 @@
-"""Env vars related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
-import os
-from unittest.mock import patch
-import pytest
-from conftest import _close_test_runtime, _load_runtime
-from openhands.events.action import CmdRunAction
-from openhands.events.observation import CmdOutputObservation
-# ============================================================================================================================
-# Environment variables tests
-# ============================================================================================================================
-def test_env_vars_os_environ(temp_dir, runtime_cls, run_as_openhands):
-    with patch.dict(os.environ, {'SANDBOX_ENV_FOOBAR': 'BAZ'}):
-        runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-        obs: CmdOutputObservation = runtime.run_action(CmdRunAction(command='env'))
-        print(obs)
-        obs: CmdOutputObservation = runtime.run_action(
-            CmdRunAction(command='echo $FOOBAR')
-        )
-        print(obs)
-        assert obs.exit_code == 0, 'The exit code should be 0.'
-        assert obs.content.strip().split('\n\r')[0].strip() == 'BAZ', (
-            f'Output: [{obs.content}] for {runtime_cls}'
-        )
-        _close_test_runtime(runtime)
-def test_env_vars_runtime_operations(temp_dir, runtime_cls):
-    runtime, config = _load_runtime(temp_dir, runtime_cls)
-    # Test adding single env var
-    runtime.add_env_vars({'QUUX': 'abc"def'})
-    obs = runtime.run_action(CmdRunAction(command='echo $QUUX'))
-    assert (
-        obs.exit_code == 0 and obs.content.strip().split('\r\n')[0].strip() == 'abc"def'
-    )
-    # Test adding multiple env vars
-    runtime.add_env_vars({'FOOBAR': 'xyz'})
-    obs = runtime.run_action(CmdRunAction(command='echo $QUUX $FOOBAR'))
-    assert (
-        obs.exit_code == 0
-        and obs.content.strip().split('\r\n')[0].strip() == 'abc"def xyz'
-    )
-    # Test adding empty dict
-    prev_env = runtime.run_action(CmdRunAction(command='env')).content
-    runtime.add_env_vars({})
-    current_env = runtime.run_action(CmdRunAction(command='env')).content
-    assert prev_env == current_env
-    # Test overwriting env vars
-    runtime.add_env_vars({'QUUX': 'new_value'})
-    obs = runtime.run_action(CmdRunAction(command='echo $QUUX'))
-    assert (
-        obs.exit_code == 0
-        and obs.content.strip().split('\r\n')[0].strip() == 'new_value'
-    )
-    _close_test_runtime(runtime)
-def test_env_vars_added_by_config(temp_dir, runtime_cls):
-    runtime, config = _load_runtime(
-        temp_dir,
-        runtime_cls,
-        runtime_startup_env_vars={'ADDED_ENV_VAR': 'added_value'},
-    )
-    # Test adding single env var
-    obs = runtime.run_action(CmdRunAction(command='echo $ADDED_ENV_VAR'))
-    assert (
-        obs.exit_code == 0
-        and obs.content.strip().split('\r\n')[0].strip() == 'added_value'
-    )
-    _close_test_runtime(runtime)
-@pytest.mark.skipif(
-    os.environ.get('TEST_RUNTIME') in ['cli', 'local'],
-    reason='This test is specific to DockerRuntime and its pause/resume persistence',
-)
-def test_docker_runtime_env_vars_persist_after_restart(temp_dir):
-    from openhands.runtime.impl.docker.docker_runtime import DockerRuntime
-    runtime, config = _load_runtime(temp_dir, DockerRuntime)
-    # Add a test environment variable
-    runtime.add_env_vars({'GITHUB_TOKEN': 'test_token'})
-    # Verify the variable is set in current session
-    obs = runtime.run_action(CmdRunAction(command='echo $GITHUB_TOKEN'))
-    assert obs.exit_code == 0
-    assert obs.content.strip().split('\r\n')[0].strip() == 'test_token'
-    # Verify the variable is added to .bashrc
-    obs = runtime.run_action(
-        CmdRunAction(command='grep "^export GITHUB_TOKEN=" ~/.bashrc')
-    )
-    assert obs.exit_code == 0
-    assert 'export GITHUB_TOKEN=' in obs.content
-    # Test pause/resume cycle
-    runtime.pause()
-    runtime.resume()
-    # Verify the variable persists after restart
-    obs = runtime.run_action(CmdRunAction(command='echo $GITHUB_TOKEN'))
-    assert obs.exit_code == 0
-    assert obs.content.strip().split('\r\n')[0].strip() == 'test_token'
-    _close_test_runtime(runtime)

tests/runtime/test_glob_and_grep.py DELETED Viewed

@@ -1,303 +0,0 @@
-"""Tests for the command helper functions in function_calling.py."""
-import os
-import pytest
-from conftest import (
-    _close_test_runtime,
-    _load_runtime,
-)
-from openhands.agenthub.readonly_agent.function_calling import (
-    glob_to_cmdrun,
-    grep_to_cmdrun,
-)
-from openhands.core.logger import openhands_logger as logger
-from openhands.events.action import CmdRunAction
-from openhands.events.observation import CmdOutputObservation, ErrorObservation
-# Skip all tests in this file if running with CLIRuntime,
-# as they depend on `rg` (ripgrep) which is not guaranteed to be available.
-# The underlying ReadOnlyAgent tools (GrepTool, GlobTool) also currently depend on `rg`.
-# TODO: implement a fallback version of these tools that uses `find` and `grep`.
-pytestmark = pytest.mark.skipif(
-    os.environ.get('TEST_RUNTIME') == 'cli',
-    reason="CLIRuntime: ReadOnlyAgent's GrepTool/GlobTool tests require `rg` (ripgrep), which may not be installed.",
-)
-def _run_cmd_action(runtime, custom_command: str):
-    action = CmdRunAction(command=custom_command)
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    assert isinstance(obs, (CmdOutputObservation, ErrorObservation))
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    return obs
-def test_grep_to_cmdrun_basic():
-    """Test basic pattern with no special characters."""
-    cmd = grep_to_cmdrun('function', 'src')
-    assert 'rg -li function' in cmd
-    assert 'Below are the execution results' in cmd
-    # With include parameter
-    cmd = grep_to_cmdrun('error', 'src', '*.js')
-    assert 'rg -li error' in cmd
-    assert "--glob '*.js'" in cmd
-    assert 'Below are the execution results' in cmd
-def test_grep_to_cmdrun_quotes(temp_dir, runtime_cls, run_as_openhands):
-    """Test patterns with different types of quotes."""
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        # Double quotes in pattern
-        cmd = grep_to_cmdrun(r'const message = "Hello"', '/workspace')
-        assert 'rg -li' in cmd
-        # Verify command works by executing it on a test file
-        setup_cmd = 'echo \'const message = "Hello";\' > /workspace/test_quotes.js'
-        obs = _run_cmd_action(runtime, setup_cmd)
-        assert obs.exit_code == 0
-        obs = _run_cmd_action(runtime, cmd)
-        assert obs.exit_code == 0
-        assert '/workspace/test_quotes.js' in obs.content
-        # Single quotes in pattern
-        cmd = grep_to_cmdrun("function\\('test'\\)", '/workspace')
-        assert 'rg -li' in cmd
-        setup_cmd = 'echo "function(\'test\') {}" > /workspace/test_quotes2.js'
-        obs = _run_cmd_action(runtime, setup_cmd)
-        assert obs.exit_code == 0
-        obs = _run_cmd_action(runtime, cmd)
-        assert obs.exit_code == 0
-        assert '/workspace/test_quotes2.js' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_grep_to_cmdrun_special_chars(runtime_cls, run_as_openhands, temp_dir):
-    """Test patterns with special shell characters."""
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        # Create test directory and files with special pattern content
-        setup_cmd = """
-        mkdir -p /workspace/test_special_patterns && \
-        echo "testing x && y || z pattern" > /workspace/test_special_patterns/logical.txt && \
-        echo "function() { return x; }" > /workspace/test_special_patterns/function.txt && \
-        echo "using \\$variable here" > /workspace/test_special_patterns/dollar.txt && \
-        echo "using \\`backticks\\` here" > /workspace/test_special_patterns/backticks.txt && \
-        echo "line with \\n newline chars" > /workspace/test_special_patterns/newline.txt && \
-        echo "matching *.js wildcard" > /workspace/test_special_patterns/wildcard.txt && \
-        echo "testing x > y redirection" > /workspace/test_special_patterns/redirect.txt && \
-        echo "testing a | b pipe" > /workspace/test_special_patterns/pipe.txt && \
-        echo "line with #comment" > /workspace/test_special_patterns/comment.txt && \
-        echo "CSS \\!important rule" > /workspace/test_special_patterns/bang.txt
-        """
-        obs = _run_cmd_action(runtime, setup_cmd)
-        assert obs.exit_code == 0, 'Failed to set up test files'
-        special_patterns = [
-            r'x && y \|\| z',  # Shell logical operators (escaping pipe)
-            r'function\(\) \{ return x; \}',  # Properly escaped braces and parentheses
-            r'\$variable',  # Dollar sign
-            # r"`backticks`",            # Backticks
-            r'\\n newline',  # Escaped characters
-            r'\*\.js',  # Wildcards (escaped)
-            r'x > y',  # Redirection
-            r'a \| b',  # Pipe (escaped)
-            r'#comment',  # Hash
-            # r"!important",             # Bang
-        ]
-        for pattern in special_patterns:
-            # Generate the grep command using our helper function
-            cmd = grep_to_cmdrun(pattern, '/workspace/test_special_patterns')
-            assert 'rg -li' in cmd
-            assert 'Below are the execution results of the search command:' in cmd
-            # Execute the command
-            obs = _run_cmd_action(runtime, cmd)
-            # Verify the command executed successfully
-            assert 'command not found' not in obs.content
-            assert 'syntax error' not in obs.content
-            assert 'unexpected' not in obs.content
-            # Check that the pattern was found in the appropriate file
-            if '&&' in pattern:
-                assert 'logical.txt' in obs.content
-            elif 'function' in pattern:
-                assert 'function.txt' in obs.content
-            elif '$variable' in pattern:
-                assert 'dollar.txt' in obs.content
-            # elif "backticks" in pattern:
-            #     assert "backticks.txt" in obs.content
-            elif '\\n newline' in pattern:
-                assert 'newline.txt' in obs.content
-            elif '*' in pattern:
-                assert 'wildcard.txt' in obs.content
-            elif '>' in pattern:
-                assert 'redirect.txt' in obs.content
-            elif '|' in pattern:
-                assert 'pipe.txt' in obs.content
-            elif '#comment' in pattern:
-                assert 'comment.txt' in obs.content
-            # elif "!important" in pattern:
-            #     assert "bang.txt" in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_grep_to_cmdrun_paths_with_spaces(runtime_cls, run_as_openhands, temp_dir):
-    """Test paths with spaces and special characters."""
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        # Create test files with content in paths with spaces
-        setup_cmd = """
-        mkdir -p "src/my project" "test files/unit tests" "src/special$chars" "path with spaces and $pecial ch@rs" && \
-        echo "function searchablePattern() { return true; }" > "src/my project/test.js" && \
-        echo "function testFunction() { return 42; }" > "test files/unit tests/test.js" && \
-        echo "function specialFunction() { return null; }" > "src/special$chars/test.js" && \
-        echo "function weirdFunction() { return []; }" > "path with spaces and $pecial ch@rs/test.js"
-        """
-        obs = _run_cmd_action(runtime, setup_cmd)
-        assert obs.exit_code == 0, 'Failed to set up test files'
-        special_paths = [
-            'src/my project',
-            'test files/unit tests',
-        ]
-        for path in special_paths:
-            # Generate grep command and execute it
-            cmd = grep_to_cmdrun('function', path)
-            assert 'rg -li' in cmd
-            obs = _run_cmd_action(runtime, cmd)
-            assert obs.exit_code == 0, f'Grep command failed for path: {path}'
-            assert 'function' in obs.content, (
-                f'Expected pattern not found in output for path: {path}'
-            )
-            # Verify the actual file was found
-            if path == 'src/my project':
-                assert 'src/my project/test.js' in obs.content
-            elif path == 'test files/unit tests':
-                assert 'test files/unit tests/test.js' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_glob_to_cmdrun_basic():
-    """Test basic glob patterns."""
-    cmd = glob_to_cmdrun('*.js', 'src')
-    assert "rg --files src -g '*.js'" in cmd
-    assert 'head -n 100' in cmd
-    assert 'echo "Below are the execution results of the glob command:' in cmd
-    # Default path
-    cmd = glob_to_cmdrun('*.py')
-    assert "rg --files . -g '*.py'" in cmd
-    assert 'head -n 100' in cmd
-    assert 'echo "Below are the execution results of the glob command:' in cmd
-def test_glob_to_cmdrun_special_patterns(runtime_cls, run_as_openhands, temp_dir):
-    """Test glob patterns with special characters."""
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        # Create test files matching the patterns we'll test
-        setup_cmd = r"""
-        mkdir -p src/components src/utils && \
-        touch src/file1.js src/file2.js src/file9.js && \
-        touch src/components/comp.jsx src/components/comp.tsx && \
-        touch src/$special-file.js && \
-        touch src/temp1.js src/temp2.js && \
-        touch src/file.js src/file.ts src/file.jsx && \
-        touch "src/weird\`file\`.js" && \
-        touch "src/file with spaces.js"
-        """
-        obs = _run_cmd_action(runtime, setup_cmd)
-        assert obs.exit_code == 0, 'Failed to set up test files'
-        special_patterns = [
-            '**/*.js',  # Double glob
-            '**/{*.jsx,*.tsx}',  # Braces
-            'file[0-9].js',  # Character class
-            'temp?.js',  # Single character wildcard
-            'file.{js,ts,jsx}',  # Multiple extensions
-            'file with spaces.js',  # Spaces
-        ]
-        for pattern in special_patterns:
-            cmd = glob_to_cmdrun(pattern, 'src')
-            logger.info(f'Command: {cmd}')
-            # Execute the command
-            obs = _run_cmd_action(runtime, cmd)
-            assert obs.exit_code == 0, f'Glob command failed for pattern: {pattern}'
-            # Verify expected files are found
-            if pattern == '**/*.js':
-                assert 'file1.js' in obs.content
-                assert 'file2.js' in obs.content
-            elif pattern == '**/{*.jsx,*.tsx}':
-                assert 'comp.jsx' in obs.content
-                assert 'comp.tsx' in obs.content
-            elif pattern == 'file[0-9].js':
-                assert 'file1.js' in obs.content
-                assert 'file2.js' in obs.content
-                assert 'file9.js' in obs.content
-            elif pattern == 'temp?.js':
-                assert 'temp1.js' in obs.content
-                assert 'temp2.js' in obs.content
-            elif pattern == 'file.{js,ts,jsx}':
-                assert 'file.js' in obs.content
-                assert 'file.ts' in obs.content
-                assert 'file.jsx' in obs.content
-            elif pattern == 'file with spaces.js':
-                assert 'file with spaces.js' in obs.content
-    finally:
-        _close_test_runtime(runtime)
-def test_glob_to_cmdrun_paths_with_spaces(runtime_cls, run_as_openhands, temp_dir):
-    """Test paths with spaces and special characters for glob command."""
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        # Create test directories with spaces and special characters
-        setup_cmd = """
-        mkdir -p "project files/src" "test results/unit tests" "weird$path/code" "path with spaces and $pecial ch@rs" && \
-        touch "project files/src/file1.js" "project files/src/file2.js" && \
-        touch "test results/unit tests/test1.js" "test results/unit tests/test2.js" && \
-        touch "weird$path/code/weird1.js" "weird$path/code/weird2.js" && \
-        touch "path with spaces and $pecial ch@rs/special1.js" "path with spaces and $pecial ch@rs/special2.js"
-        """
-        obs = _run_cmd_action(runtime, setup_cmd)
-        assert obs.exit_code == 0, 'Failed to set up test files'
-        special_paths = [
-            'project files/src',
-            'test results/unit tests',
-        ]
-        for path in special_paths:
-            cmd = glob_to_cmdrun('*.js', path)
-            # Execute the command
-            obs = _run_cmd_action(runtime, cmd)
-            assert obs.exit_code == 0, f'Glob command failed for path: {path}'
-            # Verify expected files are found in each path
-            if path == 'project files/src':
-                assert 'file1.js' in obs.content
-                assert 'file2.js' in obs.content
-            elif path == 'test results/unit tests':
-                assert 'test1.js' in obs.content
-                assert 'test2.js' in obs.content
-    finally:
-        _close_test_runtime(runtime)

tests/runtime/test_ipython.py DELETED Viewed

@@ -1,382 +0,0 @@
-"""Test the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
-import os
-import pytest
-from conftest import (
-    TEST_IN_CI,
-    _close_test_runtime,
-    _load_runtime,
-)
-from openhands.core.logger import openhands_logger as logger
-from openhands.events.action import (
-    CmdRunAction,
-    FileReadAction,
-    FileWriteAction,
-    IPythonRunCellAction,
-)
-from openhands.events.observation import (
-    CmdOutputObservation,
-    ErrorObservation,
-    FileReadObservation,
-    FileWriteObservation,
-    IPythonRunCellObservation,
-)
-# ============================================================================================================================
-# ipython-specific tests
-# ============================================================================================================================
-@pytest.mark.skipif(
-    os.environ.get('TEST_RUNTIME') == 'cli',
-    reason='CLIRuntime does not support full IPython/Jupyter kernel features or return IPythonRunCellObservation',
-)
-def test_simple_cmd_ipython_and_fileop(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    # Test run command
-    action_cmd = CmdRunAction(command='ls -l')
-    logger.info(action_cmd, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action_cmd)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert isinstance(obs, CmdOutputObservation)
-    assert obs.exit_code == 0
-    assert 'total 0' in obs.content
-    # Test run ipython
-    test_code = "print('Hello, `World`!\\n')"
-    action_ipython = IPythonRunCellAction(code=test_code)
-    logger.info(action_ipython, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action_ipython)
-    assert isinstance(obs, IPythonRunCellObservation)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert obs.content.strip() == (
-        'Hello, `World`!\n'
-        '[Jupyter current working directory: /workspace]\n'
-        '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.12/bin/python]'
-    )
-    # Test read file (file should not exist)
-    action_read = FileReadAction(path='hello.sh')
-    logger.info(action_read, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action_read)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert isinstance(obs, ErrorObservation)
-    assert 'File not found' in obs.content
-    # Test write file
-    action_write = FileWriteAction(content='echo "Hello, World!"', path='hello.sh')
-    logger.info(action_write, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action_write)
-    assert isinstance(obs, FileWriteObservation)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert obs.content == ''
-    # event stream runtime will always use absolute path
-    assert obs.path == '/workspace/hello.sh'
-    # Test read file (file should exist)
-    action_read = FileReadAction(path='hello.sh')
-    logger.info(action_read, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action_read)
-    assert isinstance(obs, FileReadObservation), (
-        'The observation should be a FileReadObservation.'
-    )
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert obs.content == 'echo "Hello, World!"\n'
-    assert obs.path == '/workspace/hello.sh'
-    # clean up
-    action = CmdRunAction(command='rm -rf hello.sh')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert obs.exit_code == 0
-    _close_test_runtime(runtime)
-@pytest.mark.skipif(
-    TEST_IN_CI != 'True',
-    reason='This test is not working in WSL (file ownership)',
-)
-@pytest.mark.skipif(
-    os.environ.get('TEST_RUNTIME') == 'cli',
-    reason='CLIRuntime does not support full IPython/Jupyter kernel features or return IPythonRunCellObservation',
-)
-def test_ipython_multi_user(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    # Test run ipython
-    # get username
-    test_code = "import os; print(os.environ['USER'])"
-    action_ipython = IPythonRunCellAction(code=test_code)
-    logger.info(action_ipython, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action_ipython)
-    assert isinstance(obs, IPythonRunCellObservation)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    if run_as_openhands:
-        assert 'openhands' in obs.content
-    else:
-        assert 'root' in obs.content
-    # print the current working directory
-    test_code = 'import os; print(os.getcwd())'
-    action_ipython = IPythonRunCellAction(code=test_code)
-    logger.info(action_ipython, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action_ipython)
-    assert isinstance(obs, IPythonRunCellObservation)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert (
-        obs.content.strip()
-        == (
-            '/workspace\n'
-            '[Jupyter current working directory: /workspace]\n'
-            '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.12/bin/python]'
-        ).strip()
-    )
-    # write a file
-    test_code = "with open('test.txt', 'w') as f: f.write('Hello, world!')"
-    action_ipython = IPythonRunCellAction(code=test_code)
-    logger.info(action_ipython, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action_ipython)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert isinstance(obs, IPythonRunCellObservation)
-    assert (
-        obs.content.strip()
-        == (
-            '[Code executed successfully with no output]\n'
-            '[Jupyter current working directory: /workspace]\n'
-            '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.12/bin/python]'
-        ).strip()
-    )
-    # check file owner via bash
-    action = CmdRunAction(command='ls -alh test.txt')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert obs.exit_code == 0
-    if run_as_openhands:
-        # -rw-r--r-- 1 openhands root 13 Jul 28 03:53 test.txt
-        assert 'openhands' in obs.content.split('\r\n')[0]
-    else:
-        # -rw-r--r-- 1 root root 13 Jul 28 03:53 test.txt
-        assert 'root' in obs.content.split('\r\n')[0]
-    # clean up
-    action = CmdRunAction(command='rm -rf test')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert obs.exit_code == 0
-    _close_test_runtime(runtime)
-@pytest.mark.skipif(
-    os.environ.get('TEST_RUNTIME') == 'cli',
-    reason='CLIRuntime does not support full IPython/Jupyter kernel features or return IPythonRunCellObservation',
-)
-def test_ipython_simple(temp_dir, runtime_cls):
-    runtime, config = _load_runtime(temp_dir, runtime_cls)
-    # Test run ipython
-    # get username
-    test_code = 'print(1)'
-    action_ipython = IPythonRunCellAction(code=test_code)
-    logger.info(action_ipython, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action_ipython)
-    assert isinstance(obs, IPythonRunCellObservation)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert (
-        obs.content.strip()
-        == (
-            '1\n'
-            '[Jupyter current working directory: /workspace]\n'
-            '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.12/bin/python]'
-        ).strip()
-    )
-    _close_test_runtime(runtime)
-@pytest.mark.skipif(
-    os.environ.get('TEST_RUNTIME') == 'cli',
-    reason='CLIRuntime does not support full IPython/Jupyter kernel features or return IPythonRunCellObservation',
-)
-def test_ipython_chdir(temp_dir, runtime_cls):
-    """Test that os.chdir correctly handles paths with slashes."""
-    runtime, config = _load_runtime(temp_dir, runtime_cls)
-    # Create a test directory and get its absolute path
-    test_code = """
-import os
-os.makedirs('test_dir', exist_ok=True)
-abs_path = os.path.abspath('test_dir')
-print(abs_path)
-"""
-    action_ipython = IPythonRunCellAction(code=test_code)
-    logger.info(action_ipython, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action_ipython)
-    assert isinstance(obs, IPythonRunCellObservation)
-    test_dir_path = obs.content.split('\n')[0].strip()
-    logger.info(f'test_dir_path: {test_dir_path}')
-    assert test_dir_path  # Verify we got a valid path
-    # Change to the test directory using its absolute path
-    test_code = f"""
-import os
-os.chdir(r'{test_dir_path}')
-print(os.getcwd())
-"""
-    action_ipython = IPythonRunCellAction(code=test_code)
-    logger.info(action_ipython, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action_ipython)
-    assert isinstance(obs, IPythonRunCellObservation)
-    current_dir = obs.content.split('\n')[0].strip()
-    assert current_dir == test_dir_path  # Verify we changed to the correct directory
-    # Clean up
-    test_code = """
-import os
-import shutil
-shutil.rmtree('test_dir', ignore_errors=True)
-"""
-    action_ipython = IPythonRunCellAction(code=test_code)
-    logger.info(action_ipython, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action_ipython)
-    assert isinstance(obs, IPythonRunCellObservation)
-    _close_test_runtime(runtime)
-@pytest.mark.skipif(
-    os.environ.get('TEST_RUNTIME') == 'cli',
-    reason='CLIRuntime does not support IPython magics like %pip or return IPythonRunCellObservation',
-)
-def test_ipython_package_install(temp_dir, runtime_cls, run_as_openhands):
-    """Make sure that cd in bash also update the current working directory in ipython."""
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    # It should error out since pymsgbox is not installed
-    action = IPythonRunCellAction(code='import pymsgbox')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert "ModuleNotFoundError: No module named 'pymsgbox'" in obs.content
-    # Install pymsgbox in Jupyter
-    action = IPythonRunCellAction(code='%pip install pymsgbox==1.0.9')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert (
-        'Successfully installed pymsgbox-1.0.9' in obs.content
-        or '[Package installed successfully]' in obs.content
-    )
-    action = IPythonRunCellAction(code='import pymsgbox')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    # import should not error out
-    assert obs.content.strip() == (
-        '[Code executed successfully with no output]\n'
-        '[Jupyter current working directory: /workspace]\n'
-        '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.12/bin/python]'
-    )
-    _close_test_runtime(runtime)
-@pytest.mark.skipif(
-    os.environ.get('TEST_RUNTIME') == 'cli',
-    reason='CLIRuntime does not support sudo with password prompts if the user has not enabled passwordless sudo',
-)
-def test_ipython_file_editor_permissions_as_openhands(temp_dir, runtime_cls):
-    """Test file editor permission behavior when running as different users."""
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands=True)
-    # Create a file owned by root with restricted permissions
-    action = CmdRunAction(
-        command='sudo touch /root/test.txt && sudo chmod 600 /root/test.txt'
-    )
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert obs.exit_code == 0
-    # Try to view the file as openhands user - should fail with permission denied
-    test_code = "print(file_editor(command='view', path='/root/test.txt'))"
-    action = IPythonRunCellAction(code=test_code)
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert 'Permission denied' in obs.content
-    # Try to edit the file as openhands user - should fail with permission denied
-    test_code = "print(file_editor(command='str_replace', path='/root/test.txt', old_str='', new_str='test'))"
-    action = IPythonRunCellAction(code=test_code)
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert 'Permission denied' in obs.content
-    # Try to create a file in root directory - should fail with permission denied
-    test_code = (
-        "print(file_editor(command='create', path='/root/new.txt', file_text='test'))"
-    )
-    action = IPythonRunCellAction(code=test_code)
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert 'Permission denied' in obs.content
-    # Try to use file editor in openhands sandbox directory - should work
-    test_code = """
-# Create file
-print(file_editor(command='create', path='/workspace/test.txt', file_text='Line 1\\nLine 2\\nLine 3'))
-# View file
-print(file_editor(command='view', path='/workspace/test.txt'))
-# Edit file
-print(file_editor(command='str_replace', path='/workspace/test.txt', old_str='Line 2', new_str='New Line 2'))
-# Undo edit
-print(file_editor(command='undo_edit', path='/workspace/test.txt'))
-"""
-    action = IPythonRunCellAction(code=test_code)
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert 'File created successfully' in obs.content
-    assert 'Line 1' in obs.content
-    assert 'Line 2' in obs.content
-    assert 'Line 3' in obs.content
-    assert 'New Line 2' in obs.content
-    assert 'Last edit to' in obs.content
-    assert 'undone successfully' in obs.content
-    # Clean up
-    action = CmdRunAction(command='rm -f /workspace/test.txt')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert obs.exit_code == 0
-    action = CmdRunAction(command='sudo rm -f /root/test.txt')
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert obs.exit_code == 0
-    _close_test_runtime(runtime)

tests/runtime/test_llm_based_edit.py DELETED Viewed

@@ -1,413 +0,0 @@
-"""Edit-related tests for the DockerRuntime."""
-import os
-import pytest
-from conftest import TEST_IN_CI, _close_test_runtime, _load_runtime
-from openhands_aci.utils.diff import get_diff
-from openhands.core.logger import openhands_logger as logger
-from openhands.events.action import FileEditAction, FileReadAction
-from openhands.events.observation import FileEditObservation
-ORGINAL = """from flask import Flask
-app = Flask(__name__)
-@app.route('/')
-def index():
-    numbers = list(range(1, 11))
-    return str(numbers)
-if __name__ == '__main__':
-    app.run(port=5000)
-"""
-@pytest.mark.skipif(
-    TEST_IN_CI != 'True',
-    reason='This test requires LLM to run.',
-)
-def test_edit_from_scratch(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        action = FileEditAction(
-            content=ORGINAL,
-            start=-1,
-            path=os.path.join('/workspace', 'app.py'),
-        )
-        logger.info(action, extra={'msg_type': 'ACTION'})
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert isinstance(obs, FileEditObservation), (
-            'The observation should be a FileEditObservation.'
-        )
-        action = FileReadAction(
-            path=os.path.join('/workspace', 'app.py'),
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert obs.content.strip() == ORGINAL.strip()
-    finally:
-        _close_test_runtime(runtime)
-EDIT = """# above stays the same
-@app.route('/')
-def index():
-    numbers = list(range(1, 11))
-    return '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'
-# below stays the same
-"""
-@pytest.mark.skipif(
-    TEST_IN_CI != 'True',
-    reason='This test requires LLM to run.',
-)
-def test_edit(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        action = FileEditAction(
-            content=ORGINAL,
-            path=os.path.join('/workspace', 'app.py'),
-        )
-        logger.info(action, extra={'msg_type': 'ACTION'})
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert isinstance(obs, FileEditObservation), (
-            'The observation should be a FileEditObservation.'
-        )
-        action = FileReadAction(
-            path=os.path.join('/workspace', 'app.py'),
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert obs.content.strip() == ORGINAL.strip()
-        action = FileEditAction(
-            content=EDIT,
-            path=os.path.join('/workspace', 'app.py'),
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert (
-            obs.content.strip()
-            == (
-                '--- /workspace/app.py\n'
-                '+++ /workspace/app.py\n'
-                '@@ -4,7 +4,7 @@\n'
-                " @app.route('/')\n"
-                ' def index():\n'
-                '     numbers = list(range(1, 11))\n'
-                '-    return str(numbers)\n'
-                "+    return '<table>' + ''.join([f'<tr><td>{i}</td></tr>' for i in numbers]) + '</table>'\n"
-                '\n'
-                " if __name__ == '__main__':\n"
-                '     app.run(port=5000)\n'
-            ).strip()
-        )
-    finally:
-        _close_test_runtime(runtime)
-ORIGINAL_LONG = '\n'.join([f'This is line {i}' for i in range(1, 1000)])
-EDIT_LONG = """
-This is line 100 + 10
-This is line 101 + 10
-"""
-@pytest.mark.skipif(
-    TEST_IN_CI != 'True',
-    reason='This test requires LLM to run.',
-)
-def test_edit_long_file(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        action = FileEditAction(
-            content=ORIGINAL_LONG,
-            path=os.path.join('/workspace', 'app.py'),
-            start=-1,
-        )
-        logger.info(action, extra={'msg_type': 'ACTION'})
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert isinstance(obs, FileEditObservation), (
-            'The observation should be a FileEditObservation.'
-        )
-        action = FileReadAction(
-            path=os.path.join('/workspace', 'app.py'),
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert obs.content.strip() == ORIGINAL_LONG.strip()
-        action = FileEditAction(
-            content=EDIT_LONG,
-            path=os.path.join('/workspace', 'app.py'),
-            start=100,
-            end=200,
-        )
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert (
-            obs.content.strip()
-            == (
-                '--- /workspace/app.py\n'
-                '+++ /workspace/app.py\n'
-                '@@ -97,8 +97,8 @@\n'
-                ' This is line 97\n'
-                ' This is line 98\n'
-                ' This is line 99\n'
-                '-This is line 100\n'
-                '-This is line 101\n'
-                '+This is line 100 + 10\n'
-                '+This is line 101 + 10\n'
-                ' This is line 102\n'
-                ' This is line 103\n'
-                ' This is line 104\n'
-            ).strip()
-        )
-    finally:
-        _close_test_runtime(runtime)
-# ======================================================================================
-# Test FileEditObservation (things that are displayed to the agent)
-# ======================================================================================
-def test_edit_obs_insert_only():
-    EDIT_LONG_INSERT_ONLY = (
-        '\n'.join([f'This is line {i}' for i in range(1, 100)])
-        + EDIT_LONG
-        + '\n'.join([f'This is line {i}' for i in range(100, 1000)])
-    )
-    diff = get_diff(ORIGINAL_LONG, EDIT_LONG_INSERT_ONLY, '/workspace/app.py')
-    obs = FileEditObservation(
-        content=diff,
-        path='/workspace/app.py',
-        prev_exist=True,
-        old_content=ORIGINAL_LONG,
-        new_content=EDIT_LONG_INSERT_ONLY,
-    )
-    assert (
-        str(obs).strip()
-        == """
-[Existing file /workspace/app.py is edited with 1 changes.]
-[begin of edit 1 / 1]
-(content before edit)
-  98|This is line 98
-  99|This is line 99
- 100|This is line 100
- 101|This is line 101
-(content after edit)
-  98|This is line 98
-  99|This is line 99
-+100|This is line 100 + 10
-+101|This is line 101 + 10
- 102|This is line 100
- 103|This is line 101
-[end of edit 1 / 1]
-""".strip()
-    )
-def test_edit_obs_replace():
-    _new_content = (
-        '\n'.join([f'This is line {i}' for i in range(1, 100)])
-        + EDIT_LONG
-        + '\n'.join([f'This is line {i}' for i in range(102, 1000)])
-    )
-    diff = get_diff(ORIGINAL_LONG, _new_content, '/workspace/app.py')
-    obs = FileEditObservation(
-        content=diff,
-        path='/workspace/app.py',
-        prev_exist=True,
-        old_content=ORIGINAL_LONG,
-        new_content=_new_content,
-    )
-    print(str(obs))
-    assert (
-        str(obs).strip()
-        == """
-[Existing file /workspace/app.py is edited with 1 changes.]
-[begin of edit 1 / 1]
-(content before edit)
-  98|This is line 98
-  99|This is line 99
--100|This is line 100
--101|This is line 101
- 102|This is line 102
- 103|This is line 103
-(content after edit)
-  98|This is line 98
-  99|This is line 99
-+100|This is line 100 + 10
-+101|This is line 101 + 10
- 102|This is line 102
- 103|This is line 103
-[end of edit 1 / 1]
-""".strip()
-    )
-def test_edit_obs_replace_with_empty_line():
-    _new_content = (
-        '\n'.join([f'This is line {i}' for i in range(1, 100)])
-        + '\n'
-        + EDIT_LONG
-        + '\n'.join([f'This is line {i}' for i in range(102, 1000)])
-    )
-    diff = get_diff(ORIGINAL_LONG, _new_content, '/workspace/app.py')
-    obs = FileEditObservation(
-        content=diff,
-        path='/workspace/app.py',
-        prev_exist=True,
-        old_content=ORIGINAL_LONG,
-        new_content=_new_content,
-    )
-    print(str(obs))
-    assert (
-        str(obs).strip()
-        == """
-[Existing file /workspace/app.py is edited with 1 changes.]
-[begin of edit 1 / 1]
-(content before edit)
-  98|This is line 98
-  99|This is line 99
--100|This is line 100
--101|This is line 101
- 102|This is line 102
- 103|This is line 103
-(content after edit)
-  98|This is line 98
-  99|This is line 99
-+100|
-+101|This is line 100 + 10
-+102|This is line 101 + 10
- 103|This is line 102
- 104|This is line 103
-[end of edit 1 / 1]
-""".strip()
-    )
-def test_edit_obs_multiple_edits():
-    _new_content = (
-        '\n'.join([f'This is line {i}' for i in range(1, 50)])
-        + '\nbalabala\n'
-        + '\n'.join([f'This is line {i}' for i in range(50, 100)])
-        + EDIT_LONG
-        + '\n'.join([f'This is line {i}' for i in range(102, 1000)])
-    )
-    diff = get_diff(ORIGINAL_LONG, _new_content, '/workspace/app.py')
-    obs = FileEditObservation(
-        content=diff,
-        path='/workspace/app.py',
-        prev_exist=True,
-        old_content=ORIGINAL_LONG,
-        new_content=_new_content,
-    )
-    assert (
-        str(obs).strip()
-        == """
-[Existing file /workspace/app.py is edited with 2 changes.]
-[begin of edit 1 / 2]
-(content before edit)
- 48|This is line 48
- 49|This is line 49
- 50|This is line 50
- 51|This is line 51
-(content after edit)
- 48|This is line 48
- 49|This is line 49
-+50|balabala
- 51|This is line 50
- 52|This is line 51
-[end of edit 1 / 2]
--------------------------
-[begin of edit 2 / 2]
-(content before edit)
-  98|This is line 98
-  99|This is line 99
--100|This is line 100
--101|This is line 101
- 102|This is line 102
- 103|This is line 103
-(content after edit)
-  99|This is line 98
- 100|This is line 99
-+101|This is line 100 + 10
-+102|This is line 101 + 10
- 103|This is line 102
- 104|This is line 103
-[end of edit 2 / 2]
-""".strip()
-    )
-def test_edit_visualize_failed_edit():
-    _new_content = (
-        '\n'.join([f'This is line {i}' for i in range(1, 50)])
-        + '\nbalabala\n'
-        + '\n'.join([f'This is line {i}' for i in range(50, 100)])
-        + EDIT_LONG
-        + '\n'.join([f'This is line {i}' for i in range(102, 1000)])
-    )
-    diff = get_diff(ORIGINAL_LONG, _new_content, '/workspace/app.py')
-    obs = FileEditObservation(
-        content=diff,
-        path='/workspace/app.py',
-        prev_exist=True,
-        old_content=ORIGINAL_LONG,
-        new_content=_new_content,
-    )
-    assert (
-        obs.visualize_diff(change_applied=False).strip()
-        == """
-[Changes are NOT applied to /workspace/app.py - Here's how the file looks like if changes are applied.]
-[begin of ATTEMPTED edit 1 / 2]
-(content before ATTEMPTED edit)
- 48|This is line 48
- 49|This is line 49
- 50|This is line 50
- 51|This is line 51
-(content after ATTEMPTED edit)
- 48|This is line 48
- 49|This is line 49
-+50|balabala
- 51|This is line 50
- 52|This is line 51
-[end of ATTEMPTED edit 1 / 2]
--------------------------
-[begin of ATTEMPTED edit 2 / 2]
-(content before ATTEMPTED edit)
-  98|This is line 98
-  99|This is line 99
--100|This is line 100
--101|This is line 101
- 102|This is line 102
- 103|This is line 103
-(content after ATTEMPTED edit)
-  99|This is line 98
- 100|This is line 99
-+101|This is line 100 + 10
-+102|This is line 101 + 10
- 103|This is line 102
- 104|This is line 103
-[end of ATTEMPTED edit 2 / 2]
-""".strip()
-    )

tests/runtime/test_mcp_action.py DELETED Viewed

@@ -1,362 +0,0 @@
-"""Bash-related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
-import json
-import os
-import socket
-import time
-import docker
-import pytest
-from conftest import (
-    _load_runtime,
-)
-import openhands
-from openhands.core.config import MCPConfig
-from openhands.core.config.mcp_config import MCPSSEServerConfig, MCPStdioServerConfig
-from openhands.core.logger import openhands_logger as logger
-from openhands.events.action import CmdRunAction, MCPAction
-from openhands.events.observation import CmdOutputObservation, MCPObservation
-# ============================================================================================================================
-# Bash-specific tests
-# ============================================================================================================================
-pytestmark = pytest.mark.skipif(
-    os.environ.get('TEST_RUNTIME') == 'cli',
-    reason='CLIRuntime does not support MCP actions',
-)
-@pytest.fixture
-def sse_mcp_docker_server():
-    """Manages the lifecycle of the SSE MCP Docker container for tests, using a random available port."""
-    image_name = 'supercorp/supergateway'
-    # Find a free port
-    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-        s.bind(('', 0))
-        host_port = s.getsockname()[1]
-    container_internal_port = (
-        8000  # The port the MCP server listens on *inside* the container
-    )
-    container_command_args = [
-        '--stdio',
-        'npx -y @modelcontextprotocol/server-filesystem /',
-        '--port',
-        str(container_internal_port),  # MCP server inside container listens on this
-        '--baseUrl',
-        f'http://localhost:{host_port}',  # The URL used to access the server from the host
-    ]
-    client = docker.from_env()
-    container = None
-    log_streamer = None
-    # Import LogStreamer here as it's specific to this fixture's needs
-    from openhands.runtime.utils.log_streamer import LogStreamer
-    try:
-        logger.info(
-            f'Starting Docker container {image_name} with command: {" ".join(container_command_args)} '
-            f'and mapping internal port {container_internal_port} to host port {host_port}',
-            extra={'msg_type': 'ACTION'},
-        )
-        container = client.containers.run(
-            image_name,
-            command=container_command_args,
-            ports={
-                f'{container_internal_port}/tcp': host_port
-            },  # Map container's internal port to the random host port
-            detach=True,
-            auto_remove=True,
-            stdin_open=True,
-        )
-        logger.info(
-            f'Container {container.short_id} started, listening on host port {host_port}.'
-        )
-        log_streamer = LogStreamer(
-            container,
-            lambda level, msg: getattr(logger, level.lower())(
-                f'[MCP server {container.short_id}] {msg}'
-            ),
-        )
-        # Wait for the server to initialize, as in the original tests
-        time.sleep(10)
-        yield {'url': f'http://localhost:{host_port}/sse'}
-    finally:
-        if container:
-            logger.info(f'Stopping container {container.short_id}...')
-            try:
-                container.stop(timeout=5)
-                logger.info(
-                    f'Container {container.short_id} stopped (and should be auto-removed).'
-                )
-            except docker.errors.NotFound:
-                logger.info(
-                    f'Container {container.short_id} not found, likely already stopped and removed.'
-                )
-            except Exception as e:
-                logger.error(f'Error stopping container {container.short_id}: {e}')
-        if log_streamer:
-            log_streamer.close()
-def test_default_activated_tools():
-    project_root = os.path.dirname(openhands.__file__)
-    mcp_config_path = os.path.join(project_root, 'runtime', 'mcp', 'config.json')
-    assert os.path.exists(mcp_config_path), (
-        f'MCP config file not found at {mcp_config_path}'
-    )
-    with open(mcp_config_path, 'r') as f:
-        mcp_config = json.load(f)
-    assert 'mcpServers' in mcp_config
-    assert 'default' in mcp_config['mcpServers']
-    assert 'tools' in mcp_config
-    # no tools are always activated yet
-    assert len(mcp_config['tools']) == 0
-@pytest.mark.asyncio
-async def test_fetch_mcp_via_stdio(temp_dir, runtime_cls, run_as_openhands):
-    mcp_stdio_server_config = MCPStdioServerConfig(
-        name='fetch', command='uvx', args=['mcp-server-fetch']
-    )
-    override_mcp_config = MCPConfig(stdio_servers=[mcp_stdio_server_config])
-    runtime, config = _load_runtime(
-        temp_dir, runtime_cls, run_as_openhands, override_mcp_config=override_mcp_config
-    )
-    # Test browser server
-    action_cmd = CmdRunAction(command='python3 -m http.server 8000 > server.log 2>&1 &')
-    logger.info(action_cmd, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action_cmd)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert isinstance(obs, CmdOutputObservation)
-    assert obs.exit_code == 0
-    assert '[1]' in obs.content
-    action_cmd = CmdRunAction(command='sleep 3 && cat server.log')
-    logger.info(action_cmd, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action_cmd)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert obs.exit_code == 0
-    mcp_action = MCPAction(name='fetch', arguments={'url': 'http://localhost:8000'})
-    obs = await runtime.call_tool_mcp(mcp_action)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert isinstance(obs, MCPObservation), (
-        'The observation should be a MCPObservation.'
-    )
-    result_json = json.loads(obs.content)
-    assert not result_json['isError']
-    assert len(result_json['content']) == 1
-    assert result_json['content'][0]['type'] == 'text'
-    assert (
-        result_json['content'][0]['text']
-        == 'Contents of http://localhost:8000/:\n---\n\n* <server.log>\n\n---'
-    )
-    runtime.close()
-@pytest.mark.asyncio
-async def test_filesystem_mcp_via_sse(
-    temp_dir, runtime_cls, run_as_openhands, sse_mcp_docker_server
-):
-    sse_server_info = sse_mcp_docker_server
-    sse_url = sse_server_info['url']
-    runtime = None
-    try:
-        mcp_sse_server_config = MCPSSEServerConfig(url=sse_url)
-        override_mcp_config = MCPConfig(sse_servers=[mcp_sse_server_config])
-        runtime, config = _load_runtime(
-            temp_dir,
-            runtime_cls,
-            run_as_openhands,
-            override_mcp_config=override_mcp_config,
-        )
-        mcp_action = MCPAction(name='list_directory', arguments={'path': '.'})
-        obs = await runtime.call_tool_mcp(mcp_action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert isinstance(obs, MCPObservation), (
-            'The observation should be a MCPObservation.'
-        )
-        assert '[FILE] .dockerenv' in obs.content
-    finally:
-        if runtime:
-            runtime.close()
-        # Container and log_streamer cleanup is handled by the sse_mcp_docker_server fixture
-@pytest.mark.asyncio
-async def test_both_stdio_and_sse_mcp(
-    temp_dir, runtime_cls, run_as_openhands, sse_mcp_docker_server
-):
-    sse_server_info = sse_mcp_docker_server
-    sse_url = sse_server_info['url']
-    runtime = None
-    try:
-        mcp_sse_server_config = MCPSSEServerConfig(url=sse_url)
-        # Also add stdio server
-        mcp_stdio_server_config = MCPStdioServerConfig(
-            name='fetch', command='uvx', args=['mcp-server-fetch']
-        )
-        override_mcp_config = MCPConfig(
-            sse_servers=[mcp_sse_server_config], stdio_servers=[mcp_stdio_server_config]
-        )
-        runtime, config = _load_runtime(
-            temp_dir,
-            runtime_cls,
-            run_as_openhands,
-            override_mcp_config=override_mcp_config,
-        )
-        # ======= Test SSE server =======
-        mcp_action_sse = MCPAction(name='list_directory', arguments={'path': '.'})
-        obs_sse = await runtime.call_tool_mcp(mcp_action_sse)
-        logger.info(obs_sse, extra={'msg_type': 'OBSERVATION'})
-        assert isinstance(obs_sse, MCPObservation), (
-            'The observation should be a MCPObservation.'
-        )
-        assert '[FILE] .dockerenv' in obs_sse.content
-        # ======= Test stdio server =======
-        # Test browser server
-        action_cmd_http = CmdRunAction(
-            command='python3 -m http.server 8000 > server.log 2>&1 &'
-        )
-        logger.info(action_cmd_http, extra={'msg_type': 'ACTION'})
-        obs_http = runtime.run_action(action_cmd_http)
-        logger.info(obs_http, extra={'msg_type': 'OBSERVATION'})
-        assert isinstance(obs_http, CmdOutputObservation)
-        assert obs_http.exit_code == 0
-        assert '[1]' in obs_http.content
-        action_cmd_cat = CmdRunAction(command='sleep 3 && cat server.log')
-        logger.info(action_cmd_cat, extra={'msg_type': 'ACTION'})
-        obs_cat = runtime.run_action(action_cmd_cat)
-        logger.info(obs_cat, extra={'msg_type': 'OBSERVATION'})
-        assert obs_cat.exit_code == 0
-        mcp_action_fetch = MCPAction(
-            # NOTE: the tool name is `fetch_fetch` because the tool name is `fetch`
-            # And FastMCP Proxy will pre-pend the server name (in this case, `fetch`)
-            # to the tool name, so the full tool name becomes `fetch_fetch`
-            name='fetch',
-            arguments={'url': 'http://localhost:8000'},
-        )
-        obs_fetch = await runtime.call_tool_mcp(mcp_action_fetch)
-        logger.info(obs_fetch, extra={'msg_type': 'OBSERVATION'})
-        assert isinstance(obs_fetch, MCPObservation), (
-            'The observation should be a MCPObservation.'
-        )
-        result_json = json.loads(obs_fetch.content)
-        assert not result_json['isError']
-        assert len(result_json['content']) == 1
-        assert result_json['content'][0]['type'] == 'text'
-        assert (
-            result_json['content'][0]['text']
-            == 'Contents of http://localhost:8000/:\n---\n\n* <server.log>\n\n---'
-        )
-    finally:
-        if runtime:
-            runtime.close()
-        # SSE Docker container cleanup is handled by the sse_mcp_docker_server fixture
-@pytest.mark.asyncio
-async def test_microagent_and_one_stdio_mcp_in_config(
-    temp_dir, runtime_cls, run_as_openhands
-):
-    runtime = None
-    try:
-        filesystem_config = MCPStdioServerConfig(
-            name='filesystem',
-            command='npx',
-            args=[
-                '@modelcontextprotocol/server-filesystem',
-                '/',
-            ],
-        )
-        override_mcp_config = MCPConfig(stdio_servers=[filesystem_config])
-        runtime, config = _load_runtime(
-            temp_dir,
-            runtime_cls,
-            run_as_openhands,
-            override_mcp_config=override_mcp_config,
-        )
-        # NOTE: this simulate the case where the microagent adds a new stdio server to the runtime
-        # but that stdio server is not in the initial config
-        # Actual invocation of the microagent involves `add_mcp_tools_to_agent`
-        # which will call `get_mcp_config` with the stdio server from microagent's config
-        fetch_config = MCPStdioServerConfig(
-            name='fetch', command='uvx', args=['mcp-server-fetch']
-        )
-        updated_config = runtime.get_mcp_config([fetch_config])
-        logger.info(f'updated_config: {updated_config}')
-        # ======= Test the stdio server in the config =======
-        mcp_action_sse = MCPAction(
-            name='filesystem_list_directory', arguments={'path': '/'}
-        )
-        obs_sse = await runtime.call_tool_mcp(mcp_action_sse)
-        logger.info(obs_sse, extra={'msg_type': 'OBSERVATION'})
-        assert isinstance(obs_sse, MCPObservation), (
-            'The observation should be a MCPObservation.'
-        )
-        assert '[FILE] .dockerenv' in obs_sse.content
-        # ======= Test the stdio server added by the microagent =======
-        # Test browser server
-        action_cmd_http = CmdRunAction(
-            command='python3 -m http.server 8000 > server.log 2>&1 &'
-        )
-        logger.info(action_cmd_http, extra={'msg_type': 'ACTION'})
-        obs_http = runtime.run_action(action_cmd_http)
-        logger.info(obs_http, extra={'msg_type': 'OBSERVATION'})
-        assert isinstance(obs_http, CmdOutputObservation)
-        assert obs_http.exit_code == 0
-        assert '[1]' in obs_http.content
-        action_cmd_cat = CmdRunAction(command='sleep 3 && cat server.log')
-        logger.info(action_cmd_cat, extra={'msg_type': 'ACTION'})
-        obs_cat = runtime.run_action(action_cmd_cat)
-        logger.info(obs_cat, extra={'msg_type': 'OBSERVATION'})
-        assert obs_cat.exit_code == 0
-        mcp_action_fetch = MCPAction(
-            name='fetch_fetch', arguments={'url': 'http://localhost:8000'}
-        )
-        obs_fetch = await runtime.call_tool_mcp(mcp_action_fetch)
-        logger.info(obs_fetch, extra={'msg_type': 'OBSERVATION'})
-        assert isinstance(obs_fetch, MCPObservation), (
-            'The observation should be a MCPObservation.'
-        )
-        result_json = json.loads(obs_fetch.content)
-        assert not result_json['isError']
-        assert len(result_json['content']) == 1
-        assert result_json['content'][0]['type'] == 'text'
-        assert (
-            result_json['content'][0]['text']
-            == 'Contents of http://localhost:8000/:\n---\n\n* <server.log>\n\n---'
-        )
-    finally:
-        if runtime:
-            runtime.close()
-        # SSE Docker container cleanup is handled by the sse_mcp_docker_server fixture

tests/runtime/test_microagent.py DELETED Viewed

@@ -1,443 +0,0 @@
-"""Tests for microagent loading in runtime."""
-import os
-import tempfile
-from pathlib import Path
-from unittest.mock import AsyncMock, MagicMock, patch
-import pytest
-from conftest import (
-    _close_test_runtime,
-    _load_runtime,
-)
-from openhands.core.config import MCPConfig
-from openhands.core.config.mcp_config import MCPStdioServerConfig
-from openhands.mcp.utils import add_mcp_tools_to_agent
-from openhands.microagent.microagent import (
-    BaseMicroagent,
-    KnowledgeMicroagent,
-    RepoMicroagent,
-    TaskMicroagent,
-)
-from openhands.microagent.types import MicroagentType
-def _create_test_microagents(test_dir: str):
-    """Create test microagent files in the given directory."""
-    microagents_dir = Path(test_dir) / '.openhands' / 'microagents'
-    microagents_dir.mkdir(parents=True, exist_ok=True)
-    # Create test knowledge agent
-    knowledge_dir = microagents_dir / 'knowledge'
-    knowledge_dir.mkdir(exist_ok=True)
-    knowledge_agent = """---
-name: test_knowledge_agent
-type: knowledge
-version: 1.0.0
-agent: CodeActAgent
-triggers:
-  - test
-  - pytest
----
-# Test Guidelines
-Testing best practices and guidelines.
-"""
-    (knowledge_dir / 'knowledge.md').write_text(knowledge_agent)
-    # Create test repo agent
-    repo_agent = """---
-name: test_repo_agent
-type: repo
-version: 1.0.0
-agent: CodeActAgent
----
-# Test Repository Agent
-Repository-specific test instructions.
-"""
-    (microagents_dir / 'repo.md').write_text(repo_agent)
-    # Create legacy repo instructions
-    legacy_instructions = """# Legacy Instructions
-These are legacy repository instructions.
-"""
-    (Path(test_dir) / '.openhands_instructions').write_text(legacy_instructions)
-def test_load_microagents_with_trailing_slashes(
-    temp_dir, runtime_cls, run_as_openhands
-):
-    """Test loading microagents when directory paths have trailing slashes."""
-    # Create test files
-    _create_test_microagents(temp_dir)
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        # Load microagents
-        loaded_agents = runtime.get_microagents_from_selected_repo(None)
-        # Verify all agents are loaded
-        knowledge_agents = [
-            a for a in loaded_agents if isinstance(a, KnowledgeMicroagent)
-        ]
-        repo_agents = [a for a in loaded_agents if isinstance(a, RepoMicroagent)]
-        # Check knowledge agents
-        assert len(knowledge_agents) == 1
-        agent = knowledge_agents[0]
-        assert agent.name == 'knowledge/knowledge'
-        assert 'test' in agent.triggers
-        assert 'pytest' in agent.triggers
-        # Check repo agents (including legacy)
-        assert len(repo_agents) == 2  # repo.md + .openhands_instructions
-        repo_names = {a.name for a in repo_agents}
-        assert 'repo' in repo_names
-        assert 'repo_legacy' in repo_names
-    finally:
-        _close_test_runtime(runtime)
-def test_load_microagents_with_selected_repo(temp_dir, runtime_cls, run_as_openhands):
-    """Test loading microagents from a selected repository."""
-    # Create test files in a repository-like structure
-    repo_dir = Path(temp_dir) / 'OpenHands'
-    repo_dir.mkdir(parents=True)
-    _create_test_microagents(str(repo_dir))
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        # Load microagents with selected repository
-        loaded_agents = runtime.get_microagents_from_selected_repo(
-            'All-Hands-AI/OpenHands'
-        )
-        # Verify all agents are loaded
-        knowledge_agents = [
-            a for a in loaded_agents if isinstance(a, KnowledgeMicroagent)
-        ]
-        repo_agents = [a for a in loaded_agents if isinstance(a, RepoMicroagent)]
-        # Check knowledge agents
-        assert len(knowledge_agents) == 1
-        agent = knowledge_agents[0]
-        assert agent.name == 'knowledge/knowledge'
-        assert 'test' in agent.triggers
-        assert 'pytest' in agent.triggers
-        # Check repo agents (including legacy)
-        assert len(repo_agents) == 2  # repo.md + .openhands_instructions
-        repo_names = {a.name for a in repo_agents}
-        assert 'repo' in repo_names
-        assert 'repo_legacy' in repo_names
-    finally:
-        _close_test_runtime(runtime)
-def test_load_microagents_with_missing_files(temp_dir, runtime_cls, run_as_openhands):
-    """Test loading microagents when some files are missing."""
-    # Create only repo.md, no other files
-    microagents_dir = Path(temp_dir) / '.openhands' / 'microagents'
-    microagents_dir.mkdir(parents=True, exist_ok=True)
-    repo_agent = """---
-name: test_repo_agent
-type: repo
-version: 1.0.0
-agent: CodeActAgent
----
-# Test Repository Agent
-Repository-specific test instructions.
-"""
-    (microagents_dir / 'repo.md').write_text(repo_agent)
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    try:
-        # Load microagents
-        loaded_agents = runtime.get_microagents_from_selected_repo(None)
-        # Verify only repo agent is loaded
-        knowledge_agents = [
-            a for a in loaded_agents if isinstance(a, KnowledgeMicroagent)
-        ]
-        repo_agents = [a for a in loaded_agents if isinstance(a, RepoMicroagent)]
-        assert len(knowledge_agents) == 0
-        assert len(repo_agents) == 1
-        agent = repo_agents[0]
-        assert agent.name == 'repo'
-    finally:
-        _close_test_runtime(runtime)
-def test_task_microagent_creation():
-    """Test that a TaskMicroagent is created correctly."""
-    content = """---
-name: test_task
-version: 1.0.0
-author: openhands
-agent: CodeActAgent
-triggers:
-- /test_task
-inputs:
-  - name: TEST_VAR
-    description: "Test variable"
----
-This is a test task microagent with a variable: ${test_var}.
-"""
-    with tempfile.NamedTemporaryFile(suffix='.md') as f:
-        f.write(content.encode())
-        f.flush()
-        agent = BaseMicroagent.load(f.name)
-        assert isinstance(agent, TaskMicroagent)
-        assert agent.type == MicroagentType.TASK
-        assert agent.name == 'test_task'
-        assert '/test_task' in agent.triggers
-        assert "If the user didn't provide any of these variables" in agent.content
-def test_task_microagent_variable_extraction():
-    """Test that variables are correctly extracted from the content."""
-    content = """---
-name: test_task
-version: 1.0.0
-author: openhands
-agent: CodeActAgent
-triggers:
-- /test_task
-inputs:
-  - name: var1
-    description: "Variable 1"
----
-This is a test with variables: ${var1}, ${var2}, and ${var3}.
-"""
-    with tempfile.NamedTemporaryFile(suffix='.md') as f:
-        f.write(content.encode())
-        f.flush()
-        agent = BaseMicroagent.load(f.name)
-        assert isinstance(agent, TaskMicroagent)
-        variables = agent.extract_variables(agent.content)
-        assert set(variables) == {'var1', 'var2', 'var3'}
-        assert agent.requires_user_input()
-def test_knowledge_microagent_no_prompt():
-    """Test that a regular KnowledgeMicroagent doesn't get the prompt."""
-    content = """---
-name: test_knowledge
-version: 1.0.0
-author: openhands
-agent: CodeActAgent
-triggers:
-- test_knowledge
----
-This is a test knowledge microagent.
-"""
-    with tempfile.NamedTemporaryFile(suffix='.md') as f:
-        f.write(content.encode())
-        f.flush()
-        agent = BaseMicroagent.load(f.name)
-        assert isinstance(agent, KnowledgeMicroagent)
-        assert agent.type == MicroagentType.KNOWLEDGE
-        assert "If the user didn't provide any of these variables" not in agent.content
-def test_task_microagent_trigger_addition():
-    """Test that a trigger is added if not present."""
-    content = """---
-name: test_task
-version: 1.0.0
-author: openhands
-agent: CodeActAgent
-inputs:
-  - name: TEST_VAR
-    description: "Test variable"
----
-This is a test task microagent.
-"""
-    with tempfile.NamedTemporaryFile(suffix='.md') as f:
-        f.write(content.encode())
-        f.flush()
-        agent = BaseMicroagent.load(f.name)
-        assert isinstance(agent, TaskMicroagent)
-        assert '/test_task' in agent.triggers
-def test_task_microagent_no_duplicate_trigger():
-    """Test that a trigger is not duplicated if already present."""
-    content = """---
-name: test_task
-version: 1.0.0
-author: openhands
-agent: CodeActAgent
-triggers:
-- /test_task
-- another_trigger
-inputs:
-  - name: TEST_VAR
-    description: "Test variable"
----
-This is a test task microagent.
-"""
-    with tempfile.NamedTemporaryFile(suffix='.md') as f:
-        f.write(content.encode())
-        f.flush()
-        agent = BaseMicroagent.load(f.name)
-        assert isinstance(agent, TaskMicroagent)
-        assert agent.triggers.count('/test_task') == 1  # No duplicates
-        assert len(agent.triggers) == 2
-        assert 'another_trigger' in agent.triggers
-        assert '/test_task' in agent.triggers
-def test_task_microagent_match_trigger():
-    """Test that a task microagent matches its trigger correctly."""
-    content = """---
-name: test_task
-version: 1.0.0
-author: openhands
-agent: CodeActAgent
-triggers:
-- /test_task
-inputs:
-  - name: TEST_VAR
-    description: "Test variable"
----
-This is a test task microagent.
-"""
-    with tempfile.NamedTemporaryFile(suffix='.md') as f:
-        f.write(content.encode())
-        f.flush()
-        agent = BaseMicroagent.load(f.name)
-        assert isinstance(agent, TaskMicroagent)
-        assert agent.match_trigger('/test_task') == '/test_task'
-        assert agent.match_trigger('  /test_task  ') == '/test_task'
-        assert agent.match_trigger('This contains /test_task') == '/test_task'
-        assert agent.match_trigger('/other_task') is None
-def test_default_tools_microagent_exists():
-    """Test that the default-tools microagent exists in the global microagents directory."""
-    # Get the path to the global microagents directory
-    import openhands
-    project_root = os.path.dirname(openhands.__file__)
-    parent_dir = os.path.dirname(project_root)
-    microagents_dir = os.path.join(parent_dir, 'microagents')
-    # Check that the default-tools.md file exists
-    default_tools_path = os.path.join(microagents_dir, 'default-tools.md')
-    assert os.path.exists(default_tools_path), (
-        f'default-tools.md not found at {default_tools_path}'
-    )
-    # Read the file and check its content
-    with open(default_tools_path, 'r') as f:
-        content = f.read()
-    # Verify it's a repo microagent (always activated)
-    assert 'type: repo' in content, 'default-tools.md should be a repo microagent'
-    # Verify it has the fetch tool configured
-    assert 'name: "fetch"' in content, 'default-tools.md should have a fetch tool'
-    assert 'command: "uvx"' in content, 'default-tools.md should use uvx command'
-    assert 'args: ["mcp-server-fetch"]' in content, (
-        'default-tools.md should use mcp-server-fetch'
-    )
-@pytest.mark.asyncio
-async def test_add_mcp_tools_from_microagents():
-    """Test that add_mcp_tools_to_agent adds tools from microagents."""
-    # Import ActionExecutionClient for mocking
-    from openhands.core.config.openhands_config import OpenHandsConfig
-    from openhands.runtime.impl.action_execution.action_execution_client import (
-        ActionExecutionClient,
-    )
-    # Create mock objects
-    mock_agent = MagicMock()
-    mock_runtime = MagicMock(spec=ActionExecutionClient)
-    mock_memory = MagicMock()
-    mock_mcp_config = MCPConfig()
-    # Create a mock OpenHandsConfig with the MCP config
-    mock_app_config = OpenHandsConfig(mcp=mock_mcp_config, search_api_key=None)
-    # Configure the mock memory to return a microagent MCP config
-    mock_stdio_server = MCPStdioServerConfig(
-        name='test-tool', command='test-command', args=['test-arg1', 'test-arg2']
-    )
-    mock_microagent_mcp_config = MCPConfig(stdio_servers=[mock_stdio_server])
-    mock_memory.get_microagent_mcp_tools.return_value = [mock_microagent_mcp_config]
-    # Configure the mock runtime
-    mock_runtime.runtime_initialized = True
-    mock_runtime.get_mcp_config.return_value = mock_microagent_mcp_config
-    # Mock the fetch_mcp_tools_from_config function to return a mock tool
-    mock_tool = {
-        'type': 'function',
-        'function': {
-            'name': 'test-tool',
-            'description': 'Test tool description',
-            'parameters': {},
-        },
-    }
-    with patch(
-        'openhands.mcp.utils.fetch_mcp_tools_from_config',
-        new=AsyncMock(return_value=[mock_tool]),
-    ):
-        # Call the function with the OpenHandsConfig instead of MCPConfig
-        await add_mcp_tools_to_agent(
-            mock_agent, mock_runtime, mock_memory, mock_app_config
-        )
-        # Verify that the memory's get_microagent_mcp_tools was called
-        mock_memory.get_microagent_mcp_tools.assert_called_once()
-        # Verify that the runtime's get_mcp_config was called with the extra stdio servers
-        mock_runtime.get_mcp_config.assert_called_once()
-        args, kwargs = mock_runtime.get_mcp_config.call_args
-        assert len(args) == 1
-        assert len(args[0]) == 1
-        assert args[0][0].name == 'test-tool'
-        # Verify that the agent's set_mcp_tools was called with the mock tool
-        mock_agent.set_mcp_tools.assert_called_once_with([mock_tool])

tests/runtime/test_replay.py DELETED Viewed

@@ -1,161 +0,0 @@
-"""Replay tests"""
-import asyncio
-from pathlib import Path
-from conftest import _close_test_runtime, _load_runtime
-from openhands.controller.state.state import State
-from openhands.core.config.config_utils import OH_DEFAULT_AGENT
-from openhands.core.config.openhands_config import OpenHandsConfig
-from openhands.core.main import run_controller
-from openhands.core.schema.agent import AgentState
-from openhands.events.action.empty import NullAction
-from openhands.events.action.message import MessageAction
-from openhands.events.event import EventSource
-from openhands.events.observation.commands import CmdOutputObservation
-def _get_config(trajectory_name: str, agent: str = OH_DEFAULT_AGENT):
-    return OpenHandsConfig(
-        default_agent=agent,
-        run_as_openhands=False,
-        # do not mount workspace
-        workspace_base=None,
-        workspace_mount_path=None,
-        replay_trajectory_path=str(
-            (Path(__file__).parent / 'trajs' / f'{trajectory_name}.json').resolve()
-        ),
-    )
-def test_simple_replay(temp_dir, runtime_cls, run_as_openhands):
-    """
-    A simple replay test that involves simple terminal operations and edits
-    (creating a simple 2048 game), using the default agent
-    """
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    config.replay_trajectory_path = str(
-        (Path(__file__).parent / 'trajs' / 'basic.json').resolve()
-    )
-    config.security.confirmation_mode = False
-    state: State | None = asyncio.run(
-        run_controller(
-            config=config,
-            initial_user_action=NullAction(),
-            runtime=runtime,
-        )
-    )
-    assert state.agent_state == AgentState.FINISHED
-    _close_test_runtime(runtime)
-def test_simple_gui_replay(temp_dir, runtime_cls, run_as_openhands):
-    """
-    A simple replay test that involves simple terminal operations and edits
-    (writing a Vue.js App), using the default agent
-    Note:
-    1. This trajectory is exported from GUI mode, meaning it has extra
-    environmental actions that don't appear in headless mode's trajectories
-    2. In GUI mode, agents typically don't finish; rather, they wait for the next
-    task from the user, so this exported trajectory ends with awaiting_user_input
-    """
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    config = _get_config('basic_gui_mode')
-    config.security.confirmation_mode = False
-    state: State | None = asyncio.run(
-        run_controller(
-            config=config,
-            initial_user_action=NullAction(),
-            runtime=runtime,
-            # exit on message, otherwise this would be stuck on waiting for user input
-            exit_on_message=True,
-        )
-    )
-    assert state.agent_state == AgentState.FINISHED
-    _close_test_runtime(runtime)
-def test_replay_wrong_initial_state(temp_dir, runtime_cls, run_as_openhands):
-    """
-    Replay requires a consistent initial state to start with, otherwise it might
-    be producing garbage. The trajectory used in this test assumes existence of
-    a file named 'game_2048.py', which doesn't exist when we replay the trajectory
-    (so called inconsistent initial states). This test demonstrates how this would
-    look like: the following events would still be replayed even though they are
-    meaningless.
-    """
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    config.replay_trajectory_path = str(
-        (Path(__file__).parent / 'trajs' / 'wrong_initial_state.json').resolve()
-    )
-    config.security.confirmation_mode = False
-    state: State | None = asyncio.run(
-        run_controller(
-            config=config,
-            initial_user_action=NullAction(),
-            runtime=runtime,
-        )
-    )
-    assert state.agent_state == AgentState.FINISHED
-    has_error_in_action = False
-    for event in state.history:
-        if isinstance(event, CmdOutputObservation) and event.exit_code != 0:
-            has_error_in_action = True
-            break
-    assert has_error_in_action
-    _close_test_runtime(runtime)
-def test_replay_basic_interactions(temp_dir, runtime_cls, run_as_openhands):
-    """
-    Replay a trajectory that involves interactions, i.e. with user messages
-    in the middle. This tests two things:
-    1) The controller should be able to replay all actions without human
-    interference (no asking for user input).
-    2) The user messages in the trajectory should appear in the history.
-    """
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    config = _get_config('basic_interactions')
-    config.security.confirmation_mode = False
-    state: State | None = asyncio.run(
-        run_controller(
-            config=config,
-            initial_user_action=NullAction(),
-            runtime=runtime,
-        )
-    )
-    assert state.agent_state == AgentState.FINISHED
-    # all user messages appear in the history, so that after a replay (assuming
-    # the trajectory doesn't end with `finish` action), LLM knows about all the
-    # context and can continue
-    user_messages = [
-        "what's 1+1?",
-        "No, I mean by Goldbach's conjecture!",
-        'Finish please',
-    ]
-    i = 0
-    for event in state.history:
-        if isinstance(event, MessageAction) and event._source == EventSource.USER:
-            assert event.message == user_messages[i]
-            i += 1
-    assert i == len(user_messages)
-    _close_test_runtime(runtime)

tests/runtime/test_runtime_resource.py DELETED Viewed

@@ -1,115 +0,0 @@
-"""Stress tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
-import pytest
-from conftest import _close_test_runtime, _load_runtime
-from openhands.core.logger import openhands_logger as logger
-from openhands.events.action import CmdRunAction
-def test_stress_docker_runtime(temp_dir, runtime_cls, repeat=1):
-    pytest.skip('This test is flaky')
-    runtime, config = _load_runtime(
-        temp_dir,
-        runtime_cls,
-        docker_runtime_kwargs={
-            'cpu_period': 100000,  # 100ms
-            'cpu_quota': 100000,  # Can use 100ms out of each 100ms period (1 CPU)
-            'mem_limit': '4G',  # 4 GB of memory
-        },
-    )
-    action = CmdRunAction(
-        command='sudo apt-get update && sudo apt-get install -y stress-ng'
-    )
-    logger.info(action, extra={'msg_type': 'ACTION'})
-    obs = runtime.run_action(action)
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert obs.exit_code == 0
-    for _ in range(repeat):
-        # run stress-ng stress tests for 1 minute
-        action = CmdRunAction(command='stress-ng --all 1 -t 30s')
-        action.set_hard_timeout(120)
-        logger.info(action, extra={'msg_type': 'ACTION'})
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    _close_test_runtime(runtime)
-# def test_stress_docker_runtime_hit_memory_limits(temp_dir, runtime_cls):
-#     """Test runtime behavior under resource constraints."""
-#     runtime, config = _load_runtime(
-#         temp_dir,
-#         runtime_cls,
-#         docker_runtime_kwargs={
-#             'cpu_period': 100000,  # 100ms
-#             'cpu_quota': 100000,  # Can use 100ms out of each 100ms period (1 CPU)
-#             'mem_limit': '4G',  # 4 GB of memory
-#             'memswap_limit': '0',  # No swap
-#             'mem_swappiness': 0,  # Disable swapping
-#             'oom_kill_disable': False,  # Enable OOM killer
-#         },
-#         runtime_startup_env_vars={
-#             'RUNTIME_MAX_MEMORY_GB': '3',
-#         },
-#     )
-#     action = CmdRunAction(
-#         command='sudo apt-get update && sudo apt-get install -y stress-ng'
-#     )
-#     logger.info(action, extra={'msg_type': 'ACTION'})
-#     obs = runtime.run_action(action)
-#     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-#     assert obs.exit_code == 0
-#     action = CmdRunAction(
-#         command='stress-ng --vm 1 --vm-bytes 6G --timeout 30s --metrics'
-#     )
-#     action.set_hard_timeout(120)
-#     logger.info(action, extra={'msg_type': 'ACTION'})
-#     obs = runtime.run_action(action)
-#     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-#     assert 'aborted early, out of system resources' in obs.content
-#     assert obs.exit_code == 3  # OOM killed!
-#     _close_test_runtime(runtime)
-# def test_stress_docker_runtime_within_memory_limits(temp_dir, runtime_cls):
-#     """Test runtime behavior under resource constraints."""
-#     runtime, config = _load_runtime(
-#         temp_dir,
-#         runtime_cls,
-#         docker_runtime_kwargs={
-#             'cpu_period': 100000,  # 100ms
-#             'cpu_quota': 100000,  # Can use 100ms out of each 100ms period (1 CPU)
-#             'mem_limit': '4G',  # 4 GB of memory
-#             'memswap_limit': '0',  # No swap
-#             'mem_swappiness': 0,  # Disable swapping
-#             'oom_kill_disable': False,  # Enable OOM killer
-#         },
-#         runtime_startup_env_vars={
-#             'RUNTIME_MAX_MEMORY_GB': '7',
-#         },
-#     )
-#     action = CmdRunAction(
-#         command='sudo apt-get update && sudo apt-get install -y stress-ng'
-#     )
-#     logger.info(action, extra={'msg_type': 'ACTION'})
-#     obs = runtime.run_action(action)
-#     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-#     assert obs.exit_code == 0
-#     action = CmdRunAction(
-#         command='stress-ng --vm 1 --vm-bytes 6G --timeout 30s --metrics'
-#     )
-#     action.set_hard_timeout(120)
-#     logger.info(action, extra={'msg_type': 'ACTION'})
-#     obs = runtime.run_action(action)
-#     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-#     assert obs.exit_code == 0
-#     _close_test_runtime(runtime)

tests/runtime/test_setup.py DELETED Viewed

@@ -1,84 +0,0 @@
-"""Tests for the setup script."""
-from unittest.mock import patch
-from conftest import (
-    _load_runtime,
-)
-from openhands.core.setup import initialize_repository_for_runtime
-from openhands.events.action import FileReadAction, FileWriteAction
-from openhands.events.observation import FileReadObservation, FileWriteObservation
-from openhands.integrations.service_types import ProviderType, Repository
-def test_initialize_repository_for_runtime(temp_dir, runtime_cls, run_as_openhands):
-    """Test that the initialize_repository_for_runtime function works."""
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    mock_repo = Repository(
-        id=1232,
-        full_name='All-Hands-AI/OpenHands',
-        git_provider=ProviderType.GITHUB,
-        is_public=True,
-    )
-    with patch(
-        'openhands.runtime.base.ProviderHandler.verify_repo_provider',
-        return_value=mock_repo,
-    ):
-        repository_dir = initialize_repository_for_runtime(
-            runtime, selected_repository='All-Hands-AI/OpenHands'
-        )
-    assert repository_dir is not None
-    assert repository_dir == 'OpenHands'
-def test_maybe_run_setup_script(temp_dir, runtime_cls, run_as_openhands):
-    """Test that setup script is executed when it exists."""
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
-    setup_script = '.openhands/setup.sh'
-    write_obs = runtime.write(
-        FileWriteAction(
-            path=setup_script, content="#!/bin/bash\necho 'Hello World' >> README.md\n"
-        )
-    )
-    assert isinstance(write_obs, FileWriteObservation)
-    # Run setup script
-    runtime.maybe_run_setup_script()
-    # Verify script was executed by checking output
-    read_obs = runtime.read(FileReadAction(path='README.md'))
-    assert isinstance(read_obs, FileReadObservation)
-    assert read_obs.content == 'Hello World\n'
-def test_maybe_run_setup_script_with_long_timeout(
-    temp_dir, runtime_cls, run_as_openhands
-):
-    """Test that setup script is executed when it exists."""
-    runtime, config = _load_runtime(
-        temp_dir,
-        runtime_cls,
-        run_as_openhands,
-        runtime_startup_env_vars={'NO_CHANGE_TIMEOUT_SECONDS': '1'},
-    )
-    setup_script = '.openhands/setup.sh'
-    write_obs = runtime.write(
-        FileWriteAction(
-            path=setup_script,
-            content="#!/bin/bash\nsleep 3 && echo 'Hello World' >> README.md\n",
-        )
-    )
-    assert isinstance(write_obs, FileWriteObservation)
-    # Run setup script
-    runtime.maybe_run_setup_script()
-    # Verify script was executed by checking output
-    read_obs = runtime.read(FileReadAction(path='README.md'))
-    assert isinstance(read_obs, FileReadObservation)
-    assert read_obs.content == 'Hello World\n'

tests/runtime/test_stress_remote_runtime.py DELETED Viewed

@@ -1,483 +0,0 @@
-"""Bash-related tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox.
-Example usage:
-```bash
-export ALLHANDS_API_KEY="YOUR_API_KEY"
-export RUNTIME=remote
-export SANDBOX_REMOTE_RUNTIME_API_URL="https://runtime.staging.all-hands.dev"
-poetry run pytest -vvxss tests/runtime/test_stress_remote_runtime.py
-```
-"""
-import asyncio
-import os
-import tempfile
-import time
-from datetime import datetime
-from unittest.mock import MagicMock
-import pandas as pd
-import pytest
-from conftest import TEST_IN_CI
-from evaluation.utils.shared import (
-    EvalException,
-    EvalMetadata,
-    EvalOutput,
-    assert_and_raise,
-    codeact_user_response,
-    make_metadata,
-    prepare_dataset,
-    reset_logger_for_multiprocessing,
-    run_evaluation,
-)
-from openhands.agenthub import Agent
-from openhands.controller.state.state import State
-from openhands.core.config import (
-    AgentConfig,
-    LLMConfig,
-    OpenHandsConfig,
-    SandboxConfig,
-)
-from openhands.core.logger import openhands_logger as logger
-from openhands.core.main import create_runtime, run_controller
-from openhands.events.action import (
-    CmdRunAction,
-    FileEditAction,
-    FileWriteAction,
-    MessageAction,
-)
-from openhands.events.observation import CmdOutputObservation
-from openhands.events.serialization.event import event_to_dict
-from openhands.llm import LLM
-from openhands.runtime.base import Runtime
-from openhands.utils.async_utils import call_async_from_sync
-AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
-    'CodeActAgent': codeact_user_response,
-}
-def get_config() -> OpenHandsConfig:
-    config = OpenHandsConfig(
-        run_as_openhands=False,
-        runtime=os.environ.get('RUNTIME', 'remote'),
-        sandbox=SandboxConfig(
-            base_container_image='python:3.11-bookworm',
-            enable_auto_lint=True,
-            use_host_network=False,
-            # large enough timeout, since some testcases take very long to run
-            timeout=300,
-            api_key=os.environ.get('ALLHANDS_API_KEY', None),
-            remote_runtime_api_url=os.environ.get(
-                'SANDBOX_REMOTE_RUNTIME_API_URL', None
-            ),
-            keep_runtime_alive=False,
-            remote_runtime_resource_factor=1,
-        ),
-        # do not mount workspace
-        workspace_base=None,
-        workspace_mount_path=None,
-    )
-    agent_config = AgentConfig(
-        enable_jupyter=False,
-        enable_browsing=False,
-        enable_llm_editor=False,
-    )
-    config.set_agent_config(agent_config)
-    return config
-@pytest.mark.skipif(
-    TEST_IN_CI,
-    reason='This test should only be run locally, not in CI.',
-)
-def test_stress_remote_runtime_eval(n_eval_workers: int = 64):
-    """Mimic evaluation setting to test remote runtime in a multi-processing setting."""
-    def _initialize_runtime(
-        runtime: Runtime,
-    ):
-        """Initialize the runtime for the agent.
-        This function is called before the runtime is used to run the agent.
-        """
-        logger.info('-' * 30)
-        logger.info('BEGIN Runtime Initialization Fn')
-        logger.info('-' * 30)
-        obs: CmdOutputObservation
-        action = CmdRunAction(command="""export USER=$(whoami); echo USER=${USER} """)
-        action.set_hard_timeout(600)
-        logger.info(action, extra={'msg_type': 'ACTION'})
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert_and_raise(obs.exit_code == 0, f'Failed to export USER: {str(obs)}')
-        action = CmdRunAction(command='mkdir -p /dummy_dir')
-        action.set_hard_timeout(600)
-        logger.info(action, extra={'msg_type': 'ACTION'})
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert_and_raise(
-            obs.exit_code == 0,
-            f'Failed to create /dummy_dir: {str(obs)}',
-        )
-        with tempfile.TemporaryDirectory() as temp_dir:
-            # Construct the full path for the desired file name within the temporary directory
-            temp_file_path = os.path.join(temp_dir, 'dummy_file')
-            # Write to the file with the desired name within the temporary directory
-            with open(temp_file_path, 'w') as f:
-                f.write('dummy content')
-            # Copy the file to the desired location
-            runtime.copy_to(temp_file_path, '/dummy_dir/')
-        logger.info('-' * 30)
-        logger.info('END Runtime Initialization Fn')
-        logger.info('-' * 30)
-    def _process_instance(
-        instance: pd.Series,
-        metadata: EvalMetadata,
-        reset_logger: bool = True,
-    ) -> EvalOutput:
-        config = get_config()
-        # Setup the logger properly, so you can run multi-processing to parallelize the evaluation
-        if reset_logger:
-            log_dir = os.path.join(metadata.eval_output_dir, 'infer_logs')
-            reset_logger_for_multiprocessing(logger, instance.instance_id, log_dir)
-        else:
-            logger.info(f'Starting evaluation for instance {instance.instance_id}.')
-        runtime = create_runtime(config, headless_mode=True)
-        call_async_from_sync(runtime.connect)
-        try:
-            _initialize_runtime(runtime)
-            instruction = 'dummy instruction'
-            agent = Agent.get_cls(metadata.agent_class)(
-                llm=LLM(config=metadata.llm_config),
-                config=config.get_agent_config(metadata.agent_class),
-            )
-            def next_command(*args, **kwargs):
-                return CmdRunAction(command='ls -lah')
-            agent.step = MagicMock(side_effect=next_command)
-            # Here's how you can run the agent (similar to the `main` function) and get the final task state
-            state: State | None = asyncio.run(
-                run_controller(
-                    config=config,
-                    initial_user_action=MessageAction(content=instruction),
-                    runtime=runtime,
-                    fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[
-                        metadata.agent_class
-                    ],
-                    agent=agent,
-                )
-            )
-            # if fatal error, throw EvalError to trigger re-run
-            if (
-                state.last_error
-                and 'fatal error during agent execution' in state.last_error
-                and 'stuck in a loop' not in state.last_error
-            ):
-                raise EvalException('Fatal error detected: ' + state.last_error)
-        finally:
-            runtime.close()
-        test_result = {}
-        if state is None:
-            raise ValueError('State should not be None.')
-        histories = [event_to_dict(event) for event in state.history]
-        metrics = state.metrics.get() if state.metrics else None
-        # Save the output
-        output = EvalOutput(
-            instance_id=instance.instance_id,
-            instruction=instruction,
-            instance=instance.to_dict(),  # SWE Bench specific
-            test_result=test_result,
-            metadata=metadata,
-            history=histories,
-            metrics=metrics,
-            error=state.last_error if state and state.last_error else None,
-        )
-        return output
-    llm_config = LLMConfig()
-    metadata = make_metadata(
-        llm_config,
-        'dummy_dataset_descrption',
-        'CodeActAgent',
-        max_iterations=10,
-        eval_note='dummy_eval_note',
-        eval_output_dir='./dummy_eval_output_dir',
-        details={},
-    )
-    # generate 300 random dummy instances
-    dummy_instance = pd.DataFrame(
-        {
-            'instance_id': [f'dummy_instance_{i}' for i in range(300)],
-        }
-    )
-    output_file = os.path.join(metadata.eval_output_dir, 'output.jsonl')
-    instances = prepare_dataset(
-        dummy_instance, output_file, eval_n_limit=len(dummy_instance)
-    )
-    run_evaluation(instances, metadata, output_file, n_eval_workers, _process_instance)
-@pytest.mark.skipif(
-    TEST_IN_CI,
-    reason='This test should only be run locally, not in CI.',
-)
-def test_stress_remote_runtime_long_output_with_soft_and_hard_timeout():
-    """Stress test for the remote runtime."""
-    config = get_config()
-    try:
-        runtime = create_runtime(config, headless_mode=True)
-        call_async_from_sync(runtime.connect)
-        _time_for_test = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
-        # Run a command that generates long output multiple times
-        for i in range(10):
-            start_time = time.time()
-            iteration_stats = {
-                'iteration': i,
-                'timestamp': time.time(),
-            }
-            # Check overall system memory usage
-            mem_action = CmdRunAction(
-                'free -k | grep "Mem:" | awk \'{printf "Total: %8.1f MB, Used: %8.1f MB, Free: %8.1f MB, Available: %8.1f MB\\n", $2/1024, $3/1024, $4/1024, $7/1024}\''
-            )
-            mem_obs = runtime.run_action(mem_action)
-            assert mem_obs.exit_code == 0
-            logger.info(
-                f'System memory usage (iteration {i}): {mem_obs.content.strip()}'
-            )
-            # Parse memory values from output
-            mem_parts = mem_obs.content.strip().split(',')
-            for part in mem_parts:
-                key, value = part.strip().split(':')
-                iteration_stats[f'memory_{key.lower()}'] = float(
-                    value.replace('MB', '').strip()
-                )
-            # Check top memory-consuming processes
-            mem_action = CmdRunAction(
-                'ps aux | awk \'{printf "%8.1f MB  %s\\n", $6/1024, $0}\' | sort -nr | head -n 5'
-            )
-            mem_obs = runtime.run_action(mem_action)
-            assert mem_obs.exit_code == 0
-            _top_processes = [i.strip() for i in mem_obs.content.strip().split('\n')]
-            logger.info(
-                f'Top 5 memory-consuming processes (iteration {i}):\n{"- " + "\n- ".join(_top_processes)}'
-            )
-            iteration_stats['top_processes'] = _top_processes
-            # Check tmux memory usage (in KB)
-            mem_action = CmdRunAction(
-                'ps aux | awk \'{printf "%8.1f MB  %s\\n", $6/1024, $0}\' | sort -nr | grep "/usr/bin/tmux" | grep -v grep | awk \'{print $1}\''
-            )
-            mem_obs = runtime.run_action(mem_action)
-            assert mem_obs.exit_code == 0
-            logger.info(
-                f'Tmux memory usage (iteration {i}): {mem_obs.content.strip()} KB'
-            )
-            try:
-                iteration_stats['tmux_memory_mb'] = float(mem_obs.content.strip())
-            except (ValueError, AttributeError):
-                iteration_stats['tmux_memory_mb'] = None
-            # Check action_execution_server mem
-            mem_action = CmdRunAction(
-                'ps aux | awk \'{printf "%8.1f MB  %s\\n", $6/1024, $0}\' | sort -nr | grep "action_execution_server" | grep "/openhands/poetry" | grep -v grep | awk \'{print $1}\''
-            )
-            mem_obs = runtime.run_action(mem_action)
-            assert mem_obs.exit_code == 0
-            logger.info(
-                f'Action execution server memory usage (iteration {i}): {mem_obs.content.strip()} MB'
-            )
-            try:
-                iteration_stats['action_server_memory_mb'] = float(
-                    mem_obs.content.strip()
-                )
-            except (ValueError, AttributeError):
-                iteration_stats['action_server_memory_mb'] = None
-            # Test soft timeout
-            action = CmdRunAction(
-                'read -p "Do you want to continue? [Y/n] " answer; if [[ $answer == "Y" ]]; then echo "Proceeding with operation..."; echo "Operation completed successfully!"; else echo "Operation cancelled."; exit 1; fi'
-            )
-            obs = runtime.run_action(action)
-            assert 'Do you want to continue?' in obs.content
-            assert obs.exit_code == -1  # Command is still running, waiting for input
-            # Send the confirmation
-            action = CmdRunAction('Y', is_input=True)
-            obs = runtime.run_action(action)
-            assert 'Proceeding with operation...' in obs.content
-            assert 'Operation completed successfully!' in obs.content
-            assert obs.exit_code == 0
-            assert '[The command completed with exit code 0.]' in obs.metadata.suffix
-            # Test hard timeout w/ long output
-            # Generate long output with 1000 asterisks per line
-            action = CmdRunAction(
-                f'export i={i}; for j in $(seq 1 100); do echo "Line $j - Iteration $i - $(printf \'%1000s\' | tr " " "*")"; sleep 1; done'
-            )
-            action.set_hard_timeout(2)
-            obs = runtime.run_action(action)
-            # Verify the output
-            assert obs.exit_code == -1
-            assert f'Line 1 - Iteration {i}' in obs.content
-            # Because hard-timeout is triggered, the terminal will in a weird state
-            # where it will not accept any new commands.
-            obs = runtime.run_action(CmdRunAction('ls'))
-            assert obs.exit_code == -1
-            assert 'The previous command is still running' in obs.metadata.suffix
-            # We need to send a Ctrl+C to reset the terminal.
-            obs = runtime.run_action(CmdRunAction('C-c', is_input=True))
-            assert obs.exit_code == 130
-            # Now make sure the terminal is in a good state
-            obs = runtime.run_action(CmdRunAction('ls'))
-            assert obs.exit_code == 0
-            duration = time.time() - start_time
-            iteration_stats['duration'] = duration
-            logger.info(f'Completed iteration {i} in {duration:.2f} seconds')
-    finally:
-        runtime.close()
-@pytest.mark.skipif(
-    TEST_IN_CI,
-    reason='This test should only be run locally, not in CI.',
-)
-def test_stress_runtime_memory_limits():
-    """Test runtime behavior under resource constraints."""
-    config = get_config()
-    # For Docker runtime, add resource constraints
-    if config.runtime == 'docker':
-        config.sandbox.docker_runtime_kwargs = {
-            'cpu_period': 100000,  # 100ms
-            'cpu_quota': 100000,  # Can use 100ms out of each 100ms period (1 CPU)
-            'mem_limit': '4G',  # 4 GB of memory
-            'memswap_limit': '0',  # No swap
-            'mem_swappiness': 0,  # Disable swapping
-            'oom_kill_disable': False,  # Enable OOM killer
-        }
-    config.sandbox.runtime_startup_env_vars = {
-        'RUNTIME_MAX_MEMORY_GB': '3',
-        'RUNTIME_MEMORY_MONITOR': 'true',
-    }
-    try:
-        runtime = create_runtime(config, headless_mode=True)
-        call_async_from_sync(runtime.connect)
-        # Install stress-ng
-        action = CmdRunAction(
-            command='sudo apt-get update && sudo apt-get install -y stress-ng'
-        )
-        logger.info(action, extra={'msg_type': 'ACTION'})
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert obs.exit_code == 0
-        action = CmdRunAction(
-            command='stress-ng --vm 1 --vm-bytes 6G --timeout 1m --metrics'
-        )
-        action.set_hard_timeout(120)
-        logger.info(action, extra={'msg_type': 'ACTION'})
-        obs = runtime.run_action(action)
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert 'aborted early, out of system resources' in obs.content
-        assert obs.exit_code == 3  # OOM killed!
-    finally:
-        runtime.close()
-@pytest.mark.skipif(
-    TEST_IN_CI,
-    reason='This test should only be run locally, not in CI.',
-)
-def test_stress_runtime_memory_limits_with_repeated_file_edit():
-    """Test runtime behavior under resource constraints with repeated file edits."""
-    config = get_config()
-    # For Docker runtime, add resource constraints
-    if config.runtime == 'docker':
-        config.sandbox.docker_runtime_kwargs = {
-            'cpu_period': 100000,  # 100ms
-            'cpu_quota': 100000,  # Can use 100ms out of each 100ms period (1 CPU)
-            'mem_limit': '4G',  # 4 GB of memory
-            'memswap_limit': '0',  # No swap
-            'mem_swappiness': 0,  # Disable swapping
-            'oom_kill_disable': False,  # Enable OOM killer
-        }
-    config.sandbox.runtime_startup_env_vars = {
-        'RUNTIME_MAX_MEMORY_GB': '3',
-        'RUNTIME_MEMORY_MONITOR': 'true',
-    }
-    try:
-        runtime = create_runtime(config, headless_mode=True)
-        call_async_from_sync(runtime.connect)
-        # Create initial test file with base content
-        test_file = '/tmp/test_file.txt'
-        # base_content = 'content_1\n' * 1000  # Create a reasonably sized file
-        base_content = ''
-        for i in range(1000):
-            base_content += f'content_{i:03d}\n'
-        # Use FileWriteAction to create initial file
-        write_action = FileWriteAction(path=test_file, content=base_content)
-        obs = runtime.run_action(write_action)
-        # Perform repeated file edits
-        for i in range(1000):
-            # Use FileEditAction with str_replace instead of IPythonRunCellAction
-            edit_action = FileEditAction(
-                command='str_replace',
-                path=test_file,
-                old_str=f'content_{i:03d}',
-                new_str=f'-content_{i:03d}',
-            )
-            obs = runtime.run_action(edit_action)
-            assert f'The file {test_file} has been edited' in obs.content, (
-                f'Edit failed at iteration {i}'
-            )
-            logger.info(f'finished iteration {i}')
-        # Verify final file state using FileEditAction view command
-        action = FileEditAction(command='view', path=test_file)
-        obs = runtime.run_action(action)
-        assert '-content_999' in obs.content, 'Final content verification failed'
-        logger.info('Final file content verified successfully')
-    finally:
-        runtime.close()

tests/runtime/trajs/basic.json DELETED Viewed

@@ -1,202 +0,0 @@
-[
-  {
-    "id": 0,
-    "timestamp": "2025-01-27T22:44:56.900854",
-    "source": "user",
-    "message": "Please create a simple 2048 game. No need to run it. Please finish the interaction after your finish coding.",
-    "action": "message",
-    "args": {
-      "content": "Please create a simple 2048 game. No need to run it. Please finish the interaction after your finish coding.",
-      "image_urls": null,
-      "wait_for_response": false
-    },
-    "timeout": 120
-  },
-  {
-    "id": 3,
-    "timestamp": "2025-01-27T22:45:13.764487",
-    "source": "agent",
-    "message": "",
-    "action": "edit",
-    "tool_call_metadata": {
-      "function_name": "str_replace_editor",
-      "tool_call_id": "toolu_017nBp7UfiGqyrA7j77HgtFP",
-      "model_response": {
-        "id": "chatcmpl-6814daca-5c62-431d-8c88-9dafead244f1",
-        "created": 1738046713,
-        "model": "claude-3-5-sonnet-20241022",
-        "object": "chat.completion",
-        "system_fingerprint": null,
-        "choices": [
-          {
-            "finish_reason": "tool_calls",
-            "index": 0,
-            "message": {
-              "content": "I'll help you create a simple 2048 game in Python. I'll use the str_replace_editor to create the file.",
-              "role": "assistant",
-              "tool_calls": [
-                {
-                  "index": 1,
-                  "function": {
-                    "arguments": "{\"command\": \"create\", \"path\": \"/workspace/game_2048.py\", \"file_text\": \"import random\\nimport keyboard\\n\\nclass Game2048:\\n    def __init__(self):\\n        self.board = [[0] * 4 for _ in range(4)]\\n        self.score = 0\\n        self.spawn_number()\\n        self.spawn_number()\\n\\n    def spawn_number(self):\\n        empty_cells = [(i, j) for i in range(4) for j in range(4) if self.board[i][j] == 0]\\n        if empty_cells:\\n            i, j = random.choice(empty_cells)\\n            self.board[i][j] = 2 if random.random() < 0.9 else 4\\n\\n    def move(self, direction):\\n        merged = [[False] * 4 for _ in range(4)]\\n        moved = False\\n\\n        if direction in ['left', 'right']:\\n            for i in range(4):\\n                row = self.board[i][:]\\n                new_row = self.merge_line(row, direction == 'right')\\n                if new_row != row:\\n                    moved = True\\n                    self.board[i] = new_row\\n\\n        elif direction in ['up', 'down']:\\n            for j in range(4):\\n                col = [self.board[i][j] for i in range(4)]\\n                new_col = self.merge_line(col, direction == 'down')\\n                if new_col != col:\\n                    moved = True\\n                    for i in range(4):\\n                        self.board[i][j] = new_col[i]\\n\\n        if moved:\\n            self.spawn_number()\\n\\n    def merge_line(self, line, reverse=False):\\n        if reverse:\\n            line = line[::-1]\\n\\n        new_line = [0] * 4\\n        pos = 0\\n        \\n        # Remove zeros and merge equal numbers\\n        numbers = [x for x in line if x != 0]\\n        i = 0\\n        while i < len(numbers):\\n            if i + 1 < len(numbers) and numbers[i] == numbers[i + 1]:\\n                new_line[pos] = numbers[i] * 2\\n                self.score += numbers[i] * 2\\n                i += 2\\n            else:\\n                new_line[pos] = numbers[i]\\n                i += 1\\n            pos += 1\\n\\n        if reverse:\\n            new_line = new_line[::-1]\\n        \\n        return new_line\\n\\n    def is_game_over(self):\\n        # Check if there are any empty cells\\n        if any(0 in row for row in self.board):\\n            return False\\n\\n        # Check if there are any possible merges\\n        for i in range(4):\\n            for j in range(4):\\n                current = self.board[i][j]\\n                # Check right neighbor\\n                if j < 3 and current == self.board[i][j + 1]:\\n                    return False\\n                # Check bottom neighbor\\n                if i < 3 and current == self.board[i + 1][j]:\\n                    return False\\n        return True\\n\\n    def display(self):\\n        print(f\\\"\\\\nScore: {self.score}\\\\n\\\")\\n        for row in self.board:\\n            print(\\\" \\\".join(f\\\"{num:4}\\\" for num in row))\\n        print(\\\"\\\\n\\\")\\n\\ndef main():\\n    game = Game2048()\\n    \\n    print(\\\"Use arrow keys to move. Press 'q' to quit.\\\")\\n    \\n    while True:\\n        game.display()\\n        \\n        if game.is_game_over():\\n            print(\\\"Game Over!\\\")\\n            break\\n            \\n        event = keyboard.read_event(suppress=True)\\n        if event.event_type != 'down':\\n            continue\\n            \\n        if event.name == 'q':\\n            break\\n        elif event.name == 'left':\\n            game.move('left')\\n        elif event.name == 'right':\\n            game.move('right')\\n        elif event.name == 'up':\\n            game.move('up')\\n        elif event.name == 'down':\\n            game.move('down')\\n\\nif __name__ == \\\"__main__\\\":\\n    main()\"}",
-                    "name": "str_replace_editor"
-                  },
-                  "id": "toolu_017nBp7UfiGqyrA7j77HgtFP",
-                  "type": "function"
-                }
-              ],
-              "function_call": null
-            }
-          }
-        ],
-        "usage": {
-          "completion_tokens": 1135,
-          "prompt_tokens": 3486,
-          "total_tokens": 4621,
-          "completion_tokens_details": null,
-          "prompt_tokens_details": {
-            "audio_tokens": null,
-            "cached_tokens": 0
-          },
-          "cache_creation_input_tokens": 3482,
-          "cache_read_input_tokens": 0
-        },
-        "service_tier": null
-      },
-      "total_calls_in_response": 1
-    },
-    "args": {
-      "path": "/workspace/game_2048.py",
-      "content": "",
-      "start": 1,
-      "end": -1,
-      "thought": "I'll help you create a simple 2048 game in Python. I'll use the str_replace_editor to create the file.",
-      "impl_source": "oh_aci",
-      "translated_ipython_code": "print(file_editor(**{'command': 'create', 'path': '/workspace/game_2048.py', 'file_text': 'import random\\nimport keyboard\\n\\nclass Game2048:\\n    def __init__(self):\\n        self.board = [[0] * 4 for _ in range(4)]\\n        self.score = 0\\n        self.spawn_number()\\n        self.spawn_number()\\n\\n    def spawn_number(self):\\n        empty_cells = [(i, j) for i in range(4) for j in range(4) if self.board[i][j] == 0]\\n        if empty_cells:\\n            i, j = random.choice(empty_cells)\\n            self.board[i][j] = 2 if random.random() < 0.9 else 4\\n\\n    def move(self, direction):\\n        merged = [[False] * 4 for _ in range(4)]\\n        moved = False\\n\\n        if direction in [\\'left\\', \\'right\\']:\\n            for i in range(4):\\n                row = self.board[i][:]\\n                new_row = self.merge_line(row, direction == \\'right\\')\\n                if new_row != row:\\n                    moved = True\\n                    self.board[i] = new_row\\n\\n        elif direction in [\\'up\\', \\'down\\']:\\n            for j in range(4):\\n                col = [self.board[i][j] for i in range(4)]\\n                new_col = self.merge_line(col, direction == \\'down\\')\\n                if new_col != col:\\n                    moved = True\\n                    for i in range(4):\\n                        self.board[i][j] = new_col[i]\\n\\n        if moved:\\n            self.spawn_number()\\n\\n    def merge_line(self, line, reverse=False):\\n        if reverse:\\n            line = line[::-1]\\n\\n        new_line = [0] * 4\\n        pos = 0\\n        \\n        # Remove zeros and merge equal numbers\\n        numbers = [x for x in line if x != 0]\\n        i = 0\\n        while i < len(numbers):\\n            if i + 1 < len(numbers) and numbers[i] == numbers[i + 1]:\\n                new_line[pos] = numbers[i] * 2\\n                self.score += numbers[i] * 2\\n                i += 2\\n            else:\\n                new_line[pos] = numbers[i]\\n                i += 1\\n            pos += 1\\n\\n        if reverse:\\n            new_line = new_line[::-1]\\n        \\n        return new_line\\n\\n    def is_game_over(self):\\n        # Check if there are any empty cells\\n        if any(0 in row for row in self.board):\\n            return False\\n\\n        # Check if there are any possible merges\\n        for i in range(4):\\n            for j in range(4):\\n                current = self.board[i][j]\\n                # Check right neighbor\\n                if j < 3 and current == self.board[i][j + 1]:\\n                    return False\\n                # Check bottom neighbor\\n                if i < 3 and current == self.board[i + 1][j]:\\n                    return False\\n        return True\\n\\n    def display(self):\\n        print(f\"\\\\nScore: {self.score}\\\\n\")\\n        for row in self.board:\\n            print(\" \".join(f\"{num:4}\" for num in row))\\n        print(\"\\\\n\")\\n\\ndef main():\\n    game = Game2048()\\n    \\n    print(\"Use arrow keys to move. Press \\'q\\' to quit.\")\\n    \\n    while True:\\n        game.display()\\n        \\n        if game.is_game_over():\\n            print(\"Game Over!\")\\n            break\\n            \\n        event = keyboard.read_event(suppress=True)\\n        if event.event_type != \\'down\\':\\n            continue\\n            \\n        if event.name == \\'q\\':\\n            break\\n        elif event.name == \\'left\\':\\n            game.move(\\'left\\')\\n        elif event.name == \\'right\\':\\n            game.move(\\'right\\')\\n        elif event.name == \\'up\\':\\n            game.move(\\'up\\')\\n        elif event.name == \\'down\\':\\n            game.move(\\'down\\')\\n\\nif __name__ == \"__main__\":\\n    main()'}))"
-    },
-    "timeout": 120
-  },
-  {
-    "id": 4,
-    "timestamp": "2025-01-27T22:45:13.801384",
-    "source": "agent",
-    "message": "I edited the file /workspace/game_2048.py.",
-    "cause": 3,
-    "observation": "edit",
-    "tool_call_metadata": {
-      "function_name": "str_replace_editor",
-      "tool_call_id": "toolu_017nBp7UfiGqyrA7j77HgtFP",
-      "model_response": {
-        "id": "chatcmpl-6814daca-5c62-431d-8c88-9dafead244f1",
-        "created": 1738046713,
-        "model": "claude-3-5-sonnet-20241022",
-        "object": "chat.completion",
-        "system_fingerprint": null,
-        "choices": [
-          {
-            "finish_reason": "tool_calls",
-            "index": 0,
-            "message": {
-              "content": "I'll help you create a simple 2048 game in Python. I'll use the str_replace_editor to create the file.",
-              "role": "assistant",
-              "tool_calls": [
-                {
-                  "index": 1,
-                  "function": {
-                    "arguments": "{\"command\": \"create\", \"path\": \"/workspace/game_2048.py\", \"file_text\": \"import random\\nimport keyboard\\n\\nclass Game2048:\\n    def __init__(self):\\n        self.board = [[0] * 4 for _ in range(4)]\\n        self.score = 0\\n        self.spawn_number()\\n        self.spawn_number()\\n\\n    def spawn_number(self):\\n        empty_cells = [(i, j) for i in range(4) for j in range(4) if self.board[i][j] == 0]\\n        if empty_cells:\\n            i, j = random.choice(empty_cells)\\n            self.board[i][j] = 2 if random.random() < 0.9 else 4\\n\\n    def move(self, direction):\\n        merged = [[False] * 4 for _ in range(4)]\\n        moved = False\\n\\n        if direction in ['left', 'right']:\\n            for i in range(4):\\n                row = self.board[i][:]\\n                new_row = self.merge_line(row, direction == 'right')\\n                if new_row != row:\\n                    moved = True\\n                    self.board[i] = new_row\\n\\n        elif direction in ['up', 'down']:\\n            for j in range(4):\\n                col = [self.board[i][j] for i in range(4)]\\n                new_col = self.merge_line(col, direction == 'down')\\n                if new_col != col:\\n                    moved = True\\n                    for i in range(4):\\n                        self.board[i][j] = new_col[i]\\n\\n        if moved:\\n            self.spawn_number()\\n\\n    def merge_line(self, line, reverse=False):\\n        if reverse:\\n            line = line[::-1]\\n\\n        new_line = [0] * 4\\n        pos = 0\\n        \\n        # Remove zeros and merge equal numbers\\n        numbers = [x for x in line if x != 0]\\n        i = 0\\n        while i < len(numbers):\\n            if i + 1 < len(numbers) and numbers[i] == numbers[i + 1]:\\n                new_line[pos] = numbers[i] * 2\\n                self.score += numbers[i] * 2\\n                i += 2\\n            else:\\n                new_line[pos] = numbers[i]\\n                i += 1\\n            pos += 1\\n\\n        if reverse:\\n            new_line = new_line[::-1]\\n        \\n        return new_line\\n\\n    def is_game_over(self):\\n        # Check if there are any empty cells\\n        if any(0 in row for row in self.board):\\n            return False\\n\\n        # Check if there are any possible merges\\n        for i in range(4):\\n            for j in range(4):\\n                current = self.board[i][j]\\n                # Check right neighbor\\n                if j < 3 and current == self.board[i][j + 1]:\\n                    return False\\n                # Check bottom neighbor\\n                if i < 3 and current == self.board[i + 1][j]:\\n                    return False\\n        return True\\n\\n    def display(self):\\n        print(f\\\"\\\\nScore: {self.score}\\\\n\\\")\\n        for row in self.board:\\n            print(\\\" \\\".join(f\\\"{num:4}\\\" for num in row))\\n        print(\\\"\\\\n\\\")\\n\\ndef main():\\n    game = Game2048()\\n    \\n    print(\\\"Use arrow keys to move. Press 'q' to quit.\\\")\\n    \\n    while True:\\n        game.display()\\n        \\n        if game.is_game_over():\\n            print(\\\"Game Over!\\\")\\n            break\\n            \\n        event = keyboard.read_event(suppress=True)\\n        if event.event_type != 'down':\\n            continue\\n            \\n        if event.name == 'q':\\n            break\\n        elif event.name == 'left':\\n            game.move('left')\\n        elif event.name == 'right':\\n            game.move('right')\\n        elif event.name == 'up':\\n            game.move('up')\\n        elif event.name == 'down':\\n            game.move('down')\\n\\nif __name__ == \\\"__main__\\\":\\n    main()\"}",
-                    "name": "str_replace_editor"
-                  },
-                  "id": "toolu_017nBp7UfiGqyrA7j77HgtFP",
-                  "type": "function"
-                }
-              ],
-              "function_call": null
-            }
-          }
-        ],
-        "usage": {
-          "completion_tokens": 1135,
-          "prompt_tokens": 3486,
-          "total_tokens": 4621,
-          "completion_tokens_details": null,
-          "prompt_tokens_details": {
-            "audio_tokens": null,
-            "cached_tokens": 0
-          },
-          "cache_creation_input_tokens": 3482,
-          "cache_read_input_tokens": 0
-        },
-        "service_tier": null
-      },
-      "total_calls_in_response": 1
-    },
-    "content": "--- /workspace/game_2048.py\n+++ /workspace/game_2048.py\n@@ -0,0 +1,2 @@\n+import random\n+import keyboard\n@@ -1,0 +4,114 @@\n+class Game2048:\n+    def __init__(self):\n+        self.board = [[0] * 4 for _ in range(4)]\n+        self.score = 0\n+        self.spawn_number()\n+        self.spawn_number()\n+\n+    def spawn_number(self):\n+        empty_cells = [(i, j) for i in range(4) for j in range(4) if self.board[i][j] == 0]\n+        if empty_cells:\n+            i, j = random.choice(empty_cells)\n+            self.board[i][j] = 2 if random.random() < 0.9 else 4\n+\n+    def move(self, direction):\n+        merged = [[False] * 4 for _ in range(4)]\n+        moved = False\n+\n+        if direction in ['left', 'right']:\n+            for i in range(4):\n+                row = self.board[i][:]\n+                new_row = self.merge_line(row, direction == 'right')\n+                if new_row != row:\n+                    moved = True\n+                    self.board[i] = new_row\n+\n+        elif direction in ['up', 'down']:\n+            for j in range(4):\n+                col = [self.board[i][j] for i in range(4)]\n+                new_col = self.merge_line(col, direction == 'down')\n+                if new_col != col:\n+                    moved = True\n+                    for i in range(4):\n+                        self.board[i][j] = new_col[i]\n+\n+        if moved:\n+            self.spawn_number()\n+\n+    def merge_line(self, line, reverse=False):\n+        if reverse:\n+            line = line[::-1]\n+\n+        new_line = [0] * 4\n+        pos = 0\n+\n+        # Remove zeros and merge equal numbers\n+        numbers = [x for x in line if x != 0]\n+        i = 0\n+        while i < len(numbers):\n+            if i + 1 < len(numbers) and numbers[i] == numbers[i + 1]:\n+                new_line[pos] = numbers[i] * 2\n+                self.score += numbers[i] * 2\n+                i += 2\n+            else:\n+                new_line[pos] = numbers[i]\n+                i += 1\n+            pos += 1\n+\n+        if reverse:\n+            new_line = new_line[::-1]\n+\n+        return new_line\n+\n+    def is_game_over(self):\n+        # Check if there are any empty cells\n+        if any(0 in row for row in self.board):\n+            return False\n+\n+        # Check if there are any possible merges\n+        for i in range(4):\n+            for j in range(4):\n+                current = self.board[i][j]\n+                # Check right neighbor\n+                if j < 3 and current == self.board[i][j + 1]:\n+                    return False\n+                # Check bottom neighbor\n+                if i < 3 and current == self.board[i + 1][j]:\n+                    return False\n+        return True\n+\n+    def display(self):\n+        print(f\"\\nScore: {self.score}\\n\")\n+        for row in self.board:\n+            print(\" \".join(f\"{num:4}\" for num in row))\n+        print(\"\\n\")\n+\n+def main():\n+    game = Game2048()\n+\n+    print(\"Use arrow keys to move. Press 'q' to quit.\")\n+\n+    while True:\n+        game.display()\n+\n+        if game.is_game_over():\n+            print(\"Game Over!\")\n+            break\n+\n+        event = keyboard.read_event(suppress=True)\n+        if event.event_type != 'down':\n+            continue\n+\n+        if event.name == 'q':\n+            break\n+        elif event.name == 'left':\n+            game.move('left')\n+        elif event.name == 'right':\n+            game.move('right')\n+        elif event.name == 'up':\n+            game.move('up')\n+        elif event.name == 'down':\n+            game.move('down')\n+\n+if __name__ == \"__main__\":\n+    main()",
-    "extras": {
-      "path": "/workspace/game_2048.py",
-      "prev_exist": false,
-      "old_content": null,
-      "new_content": "import random\nimport keyboard\n\nclass Game2048:\n    def __init__(self):\n        self.board = [[0] * 4 for _ in range(4)]\n        self.score = 0\n        self.spawn_number()\n        self.spawn_number()\n\n    def spawn_number(self):\n        empty_cells = [(i, j) for i in range(4) for j in range(4) if self.board[i][j] == 0]\n        if empty_cells:\n            i, j = random.choice(empty_cells)\n            self.board[i][j] = 2 if random.random() < 0.9 else 4\n\n    def move(self, direction):\n        merged = [[False] * 4 for _ in range(4)]\n        moved = False\n\n        if direction in ['left', 'right']:\n            for i in range(4):\n                row = self.board[i][:]\n                new_row = self.merge_line(row, direction == 'right')\n                if new_row != row:\n                    moved = True\n                    self.board[i] = new_row\n\n        elif direction in ['up', 'down']:\n            for j in range(4):\n                col = [self.board[i][j] for i in range(4)]\n                new_col = self.merge_line(col, direction == 'down')\n                if new_col != col:\n                    moved = True\n                    for i in range(4):\n                        self.board[i][j] = new_col[i]\n\n        if moved:\n            self.spawn_number()\n\n    def merge_line(self, line, reverse=False):\n        if reverse:\n            line = line[::-1]\n\n        new_line = [0] * 4\n        pos = 0\n        \n        # Remove zeros and merge equal numbers\n        numbers = [x for x in line if x != 0]\n        i = 0\n        while i < len(numbers):\n            if i + 1 < len(numbers) and numbers[i] == numbers[i + 1]:\n                new_line[pos] = numbers[i] * 2\n                self.score += numbers[i] * 2\n                i += 2\n            else:\n                new_line[pos] = numbers[i]\n                i += 1\n            pos += 1\n\n        if reverse:\n            new_line = new_line[::-1]\n        \n        return new_line\n\n    def is_game_over(self):\n        # Check if there are any empty cells\n        if any(0 in row for row in self.board):\n            return False\n\n        # Check if there are any possible merges\n        for i in range(4):\n            for j in range(4):\n                current = self.board[i][j]\n                # Check right neighbor\n                if j < 3 and current == self.board[i][j + 1]:\n                    return False\n                # Check bottom neighbor\n                if i < 3 and current == self.board[i + 1][j]:\n                    return False\n        return True\n\n    def display(self):\n        print(f\"\\nScore: {self.score}\\n\")\n        for row in self.board:\n            print(\" \".join(f\"{num:4}\" for num in row))\n        print(\"\\n\")\n\ndef main():\n    game = Game2048()\n    \n    print(\"Use arrow keys to move. Press 'q' to quit.\")\n    \n    while True:\n        game.display()\n        \n        if game.is_game_over():\n            print(\"Game Over!\")\n            break\n            \n        event = keyboard.read_event(suppress=True)\n        if event.event_type != 'down':\n            continue\n            \n        if event.name == 'q':\n            break\n        elif event.name == 'left':\n            game.move('left')\n        elif event.name == 'right':\n            game.move('right')\n        elif event.name == 'up':\n            game.move('up')\n        elif event.name == 'down':\n            game.move('down')\n\nif __name__ == \"__main__\":\n    main()",
-      "impl_source": "oh_aci",
-      "formatted_output_and_error": "File created successfully at: /workspace/game_2048.py"
-    }
-  },
-  {
-    "id": 5,
-    "timestamp": "2025-01-27T22:45:18.405685",
-    "source": "agent",
-    "message": "I've created a simple implementation of the 2048 game with the following features:\n\n1. A 4x4 game board\n2. Arrow key controls for moving tiles\n3. Score tracking\n4. Game over detection\n5. Random spawning of new numbers (2 or 4)\n6. Merging of equal numbers when moving\n\nTo run the game, you would need to:\n1. Install the keyboard package (`pip install keyboard`)\n2. Run the script with sudo privileges (required for keyboard input)\n\nThe game uses the arrow keys for movement and 'q' to quit. The game ends when no more moves are possible.",
-    "action": "finish",
-    "tool_call_metadata": {
-      "function_name": "finish",
-      "tool_call_id": "toolu_01BUKu3B7nY574atNECkBiFk",
-      "model_response": {
-        "id": "chatcmpl-27cc0ad9-6b86-4f1f-9f72-d9c7ab2282f7",
-        "created": 1738046718,
-        "model": "claude-3-5-sonnet-20241022",
-        "object": "chat.completion",
-        "system_fingerprint": null,
-        "choices": [
-          {
-            "finish_reason": "tool_calls",
-            "index": 0,
-            "message": {
-              "content": "I've created a simple implementation of the 2048 game with the following features:\n\n1. A 4x4 game board\n2. Arrow key controls for moving tiles\n3. Score tracking\n4. Game over detection\n5. Random spawning of new numbers (2 or 4)\n6. Merging of equal numbers when moving\n\nTo run the game, you would need to:\n1. Install the keyboard package (`pip install keyboard`)\n2. Run the script with sudo privileges (required for keyboard input)\n\nThe game uses the arrow keys for movement and 'q' to quit. The game ends when no more moves are possible.",
-              "role": "assistant",
-              "tool_calls": [
-                {
-                  "index": 1,
-                  "function": {
-                    "arguments": "{}",
-                    "name": "finish"
-                  },
-                  "id": "toolu_01BUKu3B7nY574atNECkBiFk",
-                  "type": "function"
-                }
-              ],
-              "function_call": null
-            }
-          }
-        ],
-        "usage": {
-          "completion_tokens": 177,
-          "prompt_tokens": 4646,
-          "total_tokens": 4823,
-          "completion_tokens_details": null,
-          "prompt_tokens_details": {
-            "audio_tokens": null,
-            "cached_tokens": 3482
-          },
-          "cache_creation_input_tokens": 1158,
-          "cache_read_input_tokens": 3482
-        },
-        "service_tier": null
-      },
-      "total_calls_in_response": 1
-    },
-    "args": {
-      "outputs": {},
-      "thought": "I've created a simple implementation of the 2048 game with the following features:\n\n1. A 4x4 game board\n2. Arrow key controls for moving tiles\n3. Score tracking\n4. Game over detection\n5. Random spawning of new numbers (2 or 4)\n6. Merging of equal numbers when moving\n\nTo run the game, you would need to:\n1. Install the keyboard package (`pip install keyboard`)\n2. Run the script with sudo privileges (required for keyboard input)\n\nThe game uses the arrow keys for movement and 'q' to quit. The game ends when no more moves are possible."
-    },
-    "timeout": 120
-  }
-]

tests/runtime/trajs/basic_gui_mode.json DELETED Viewed

@@ -1,631 +0,0 @@
-[
-  {
-    "id": 0,
-    "timestamp": "2025-01-20T20:29:13.519638",
-    "source": "environment",
-    "message": "",
-    "observation": "agent_state_changed",
-    "content": "",
-    "extras": {
-      "agent_state": "loading"
-    }
-  },
-  {
-    "id": 1,
-    "timestamp": "2025-01-20T20:29:32.163218",
-    "source": "environment",
-    "message": "Agent state changed to init",
-    "action": "change_agent_state",
-    "args": {
-      "agent_state": "init",
-      "thought": ""
-    }
-  },
-  {
-    "id": 2,
-    "timestamp": "2025-01-20T20:29:32.165837",
-    "source": "environment",
-    "message": "No observation",
-    "cause": 1,
-    "observation": "null",
-    "content": "",
-    "extras": {}
-  },
-  {
-    "id": 3,
-    "timestamp": "2025-01-20T20:29:32.176175",
-    "source": "environment",
-    "message": "",
-    "observation": "agent_state_changed",
-    "content": "",
-    "extras": {
-      "agent_state": "init"
-    }
-  },
-  {
-    "id": 4,
-    "timestamp": "2025-01-20T20:29:32.262843",
-    "source": "user",
-    "message": "I want to create a VueJS app that allows me to:\n* See all the items on my todo list\n* add a new item to the list\n* mark an item as done\n* totally remove an item from the list\n* change the text of an item\n* set a due date on the item\n\nThis should be a client-only app with no backend. The list should persist in localStorage.",
-    "action": "message",
-    "args": {
-      "content": "I want to create a VueJS app that allows me to:\n* See all the items on my todo list\n* add a new item to the list\n* mark an item as done\n* totally remove an item from the list\n* change the text of an item\n* set a due date on the item\n\nThis should be a client-only app with no backend. The list should persist in localStorage.",
-      "image_urls": [],
-      "wait_for_response": false
-    }
-  },
-  {
-    "id": 5,
-    "timestamp": "2025-01-20T20:29:32.286160",
-    "source": "environment",
-    "message": "",
-    "observation": "agent_state_changed",
-    "content": "",
-    "extras": {
-      "agent_state": "running"
-    }
-  },
-  {
-    "id": 6,
-    "timestamp": "2025-01-20T20:29:32.289136",
-    "source": "user",
-    "message": "No observation",
-    "cause": 4,
-    "observation": "null",
-    "content": "",
-    "extras": {}
-  },
-  {
-    "id": 7,
-    "timestamp": "2025-01-20T20:29:35.040676",
-    "source": "agent",
-    "message": "Running command: mkdir -p /workspace/todo-app",
-    "action": "run",
-    "tool_call_metadata": {
-      "function_name": "execute_bash",
-      "tool_call_id": "toolu_01W9Z8jBctr8X2frZV9p1RYs",
-      "model_response": {
-        "id": "chatcmpl-6bb9f175-c587-44b4-8c8d-f91fc4827590",
-        "created": 1737433775,
-        "model": "claude-3-5-sonnet-20241022",
-        "object": "chat.completion",
-        "system_fingerprint": null,
-        "choices": [
-          {
-            "finish_reason": "tool_calls",
-            "index": 0,
-            "message": {
-              "content": "I'll help you create a Vue.js application with all the requested todo list functionality. Let's create this step by step.\n\nFirst, let's create a new directory and set up the basic files:",
-              "role": "assistant",
-              "tool_calls": [
-                {
-                  "index": 1,
-                  "function": {
-                    "arguments": "{\"command\": \"mkdir -p /workspace/todo-app\"}",
-                    "name": "execute_bash"
-                  },
-                  "id": "toolu_01W9Z8jBctr8X2frZV9p1RYs",
-                  "type": "function"
-                }
-              ],
-              "function_call": null
-            }
-          }
-        ],
-        "usage": {
-          "completion_tokens": 104,
-          "prompt_tokens": 3439,
-          "total_tokens": 3543,
-          "completion_tokens_details": null,
-          "prompt_tokens_details": {
-            "audio_tokens": null,
-            "cached_tokens": 0
-          },
-          "cache_creation_input_tokens": 3435,
-          "cache_read_input_tokens": 0
-        },
-        "service_tier": null
-      },
-      "total_calls_in_response": 1
-    },
-    "args": {
-      "command": "mkdir -p /workspace/todo-app",
-      "is_input": false,
-      "thought": "I'll help you create a Vue.js application with all the requested todo list functionality. Let's create this step by step.\n\nFirst, let's create a new directory and set up the basic files:",
-      "blocking": false,
-      "hidden": false,
-      "confirmation_state": "confirmed"
-    }
-  },
-  {
-    "id": 8,
-    "timestamp": "2025-01-20T20:29:35.699033",
-    "source": "agent",
-    "message": "Command `mkdir -p /workspace/todo-app` executed with exit code 0.",
-    "cause": 7,
-    "observation": "run",
-    "tool_call_metadata": {
-      "function_name": "execute_bash",
-      "tool_call_id": "toolu_01W9Z8jBctr8X2frZV9p1RYs",
-      "model_response": {
-        "id": "chatcmpl-6bb9f175-c587-44b4-8c8d-f91fc4827590",
-        "created": 1737433775,
-        "model": "claude-3-5-sonnet-20241022",
-        "object": "chat.completion",
-        "system_fingerprint": null,
-        "choices": [
-          {
-            "finish_reason": "tool_calls",
-            "index": 0,
-            "message": {
-              "content": "I'll help you create a Vue.js application with all the requested todo list functionality. Let's create this step by step.\n\nFirst, let's create a new directory and set up the basic files:",
-              "role": "assistant",
-              "tool_calls": [
-                {
-                  "index": 1,
-                  "function": {
-                    "arguments": "{\"command\": \"mkdir -p /workspace/todo-app\"}",
-                    "name": "execute_bash"
-                  },
-                  "id": "toolu_01W9Z8jBctr8X2frZV9p1RYs",
-                  "type": "function"
-                }
-              ],
-              "function_call": null
-            }
-          }
-        ],
-        "usage": {
-          "completion_tokens": 104,
-          "prompt_tokens": 3439,
-          "total_tokens": 3543,
-          "completion_tokens_details": null,
-          "prompt_tokens_details": {
-            "audio_tokens": null,
-            "cached_tokens": 0
-          },
-          "cache_creation_input_tokens": 3435,
-          "cache_read_input_tokens": 0
-        },
-        "service_tier": null
-      },
-      "total_calls_in_response": 1
-    },
-    "content": "",
-    "extras": {
-      "command": "mkdir -p /workspace/todo-app",
-      "metadata": {
-        "exit_code": 0,
-        "pid": -1,
-        "username": "openhands",
-        "hostname": "18f4db943d2f",
-        "working_dir": "/workspace",
-        "py_interpreter_path": "/openhands/poetry/openhands-ai-5O4_aCHf-py3.12/bin/python",
-        "prefix": "",
-        "suffix": "\n[The command completed with exit code 0.]"
-      },
-      "hidden": false
-    },
-    "success": true
-  },
-  {
-    "id": 9,
-    "timestamp": "2025-01-20T20:29:48.925379",
-    "source": "agent",
-    "message": "",
-    "action": "edit",
-    "tool_call_metadata": {
-      "function_name": "str_replace_editor",
-      "tool_call_id": "toolu_01YYLXwwdBLwtMmjr5Sfsieg",
-      "model_response": {
-        "id": "chatcmpl-55936480-cbf9-4d24-bd4a-329bd2f1f65c",
-        "created": 1737433788,
-        "model": "claude-3-5-sonnet-20241022",
-        "object": "chat.completion",
-        "system_fingerprint": null,
-        "choices": [
-          {
-            "finish_reason": "tool_calls",
-            "index": 0,
-            "message": {
-              "content": "Let's create the main HTML file:",
-              "role": "assistant",
-              "tool_calls": [
-                {
-                  "index": 1,
-                  "function": {
-                    "arguments": "{\"command\": \"create\", \"path\": \"/workspace/todo-app/index.html\", \"file_text\": \"<!DOCTYPE html>\\n<html lang=\\\"en\\\">\\n<head>\\n    <meta charset=\\\"UTF-8\\\">\\n    <meta name=\\\"viewport\\\" content=\\\"width=device-width, initial-scale=1.0\\\">\\n    <title>Vue Todo App</title>\\n    <script src=\\\"https://unpkg.com/vue@3/dist/vue.global.js\\\"></script>\\n    <link href=\\\"https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css\\\" rel=\\\"stylesheet\\\">\\n    <style>\\n        .completed {\\n            text-decoration: line-through;\\n            color: #6c757d;\\n        }\\n        .todo-item {\\n            display: flex;\\n            align-items: center;\\n            gap: 1rem;\\n            padding: 0.5rem;\\n            border-bottom: 1px solid #dee2e6;\\n        }\\n        .todo-item:last-child {\\n            border-bottom: none;\\n        }\\n    </style>\\n</head>\\n<body>\\n    <div id=\\\"app\\\" class=\\\"container mt-5\\\">\\n        <h1 class=\\\"mb-4\\\">Todo List</h1>\\n        \\n        <!-- Add new todo form -->\\n        <div class=\\\"mb-4\\\">\\n            <form @submit.prevent=\\\"addTodo\\\" class=\\\"row g-3\\\">\\n                <div class=\\\"col-md-6\\\">\\n                    <input type=\\\"text\\\" v-model=\\\"newTodoText\\\" class=\\\"form-control\\\" placeholder=\\\"Enter new todo\\\" required>\\n                </div>\\n                <div class=\\\"col-md-4\\\">\\n                    <input type=\\\"date\\\" v-model=\\\"newTodoDueDate\\\" class=\\\"form-control\\\">\\n                </div>\\n                <div class=\\\"col-md-2\\\">\\n                    <button type=\\\"submit\\\" class=\\\"btn btn-primary w-100\\\">Add Todo</button>\\n                </div>\\n            </form>\\n        </div>\\n\\n        <!-- Todo list -->\\n        <div class=\\\"card\\\">\\n            <div class=\\\"card-body\\\">\\n                <div v-if=\\\"todos.length === 0\\\" class=\\\"text-center text-muted\\\">\\n                    No todos yet. Add one above!\\n                </div>\\n                <div v-for=\\\"todo in todos\\\" :key=\\\"todo.id\\\" class=\\\"todo-item\\\">\\n                    <input type=\\\"checkbox\\\" \\n                           :checked=\\\"todo.completed\\\" \\n                           @change=\\\"toggleTodo(todo)\\\"\\n                           class=\\\"form-check-input\\\">\\n                    \\n                    <div v-if=\\\"editingTodo !== todo.id\\\" \\n                         :class=\\\"{ completed: todo.completed }\\\"\\n                         class=\\\"flex-grow-1\\\">\\n                        {{ todo.text }}\\n                        <small v-if=\\\"todo.dueDate\\\" class=\\\"text-muted ms-2\\\">\\n                            Due: {{ formatDate(todo.dueDate) }}\\n                        </small>\\n                    </div>\\n                    \\n                    <div v-else class=\\\"flex-grow-1 d-flex gap-2\\\">\\n                        <input type=\\\"text\\\" \\n                               v-model=\\\"editingText\\\" \\n                               class=\\\"form-control\\\"\\n                               @keyup.enter=\\\"saveTodoEdit(todo)\\\"\\n                               @keyup.esc=\\\"cancelEdit\\\">\\n                        <input type=\\\"date\\\" \\n                               v-model=\\\"editingDueDate\\\" \\n                               class=\\\"form-control\\\">\\n                    </div>\\n\\n                    <div class=\\\"btn-group\\\">\\n                        <button v-if=\\\"editingTodo !== todo.id\\\"\\n                                @click=\\\"startEdit(todo)\\\" \\n                                class=\\\"btn btn-sm btn-outline-secondary\\\">\\n                            Edit\\n                        </button>\\n                        <button v-else\\n                                @click=\\\"saveTodoEdit(todo)\\\" \\n                                class=\\\"btn btn-sm btn-outline-primary\\\">\\n                            Save\\n                        </button>\\n                        <button @click=\\\"removeTodo(todo)\\\" \\n                                class=\\\"btn btn-sm btn-outline-danger\\\">\\n                            Delete\\n                        </button>\\n                    </div>\\n                </div>\\n            </div>\\n        </div>\\n    </div>\\n\\n    <script src=\\\"app.js\\\"></script>\\n</body>\\n</html>\"}",
-                    "name": "str_replace_editor"
-                  },
-                  "id": "toolu_01YYLXwwdBLwtMmjr5Sfsieg",
-                  "type": "function"
-                }
-              ],
-              "function_call": null
-            }
-          }
-        ],
-        "usage": {
-          "completion_tokens": 1142,
-          "prompt_tokens": 3601,
-          "total_tokens": 4743,
-          "completion_tokens_details": null,
-          "prompt_tokens_details": {
-            "audio_tokens": null,
-            "cached_tokens": 3435
-          },
-          "cache_creation_input_tokens": 160,
-          "cache_read_input_tokens": 3435
-        },
-        "service_tier": null
-      },
-      "total_calls_in_response": 1
-    },
-    "args": {
-      "path": "/workspace/todo-app/index.html",
-      "content": "",
-      "start": 1,
-      "end": -1,
-      "thought": "Let's create the main HTML file:",
-      "impl_source": "oh_aci",
-      "translated_ipython_code": "print(file_editor(**{'command': 'create', 'path': '/workspace/todo-app/index.html', 'file_text': '<!DOCTYPE html>\\n<html lang=\"en\">\\n<head>\\n    <meta charset=\"UTF-8\">\\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\\n    <title>Vue Todo App</title>\\n    <script src=\"https://unpkg.com/vue@3/dist/vue.global.js\"></script>\\n    <link href=\"https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css\" rel=\"stylesheet\">\\n    <style>\\n        .completed {\\n            text-decoration: line-through;\\n            color: #6c757d;\\n        }\\n        .todo-item {\\n            display: flex;\\n            align-items: center;\\n            gap: 1rem;\\n            padding: 0.5rem;\\n            border-bottom: 1px solid #dee2e6;\\n        }\\n        .todo-item:last-child {\\n            border-bottom: none;\\n        }\\n    </style>\\n</head>\\n<body>\\n    <div id=\"app\" class=\"container mt-5\">\\n        <h1 class=\"mb-4\">Todo List</h1>\\n        \\n        <!-- Add new todo form -->\\n        <div class=\"mb-4\">\\n            <form @submit.prevent=\"addTodo\" class=\"row g-3\">\\n                <div class=\"col-md-6\">\\n                    <input type=\"text\" v-model=\"newTodoText\" class=\"form-control\" placeholder=\"Enter new todo\" required>\\n                </div>\\n                <div class=\"col-md-4\">\\n                    <input type=\"date\" v-model=\"newTodoDueDate\" class=\"form-control\">\\n                </div>\\n                <div class=\"col-md-2\">\\n                    <button type=\"submit\" class=\"btn btn-primary w-100\">Add Todo</button>\\n                </div>\\n            </form>\\n        </div>\\n\\n        <!-- Todo list -->\\n        <div class=\"card\">\\n            <div class=\"card-body\">\\n                <div v-if=\"todos.length === 0\" class=\"text-center text-muted\">\\n                    No todos yet. Add one above!\\n                </div>\\n                <div v-for=\"todo in todos\" :key=\"todo.id\" class=\"todo-item\">\\n                    <input type=\"checkbox\" \\n                           :checked=\"todo.completed\" \\n                           @change=\"toggleTodo(todo)\"\\n                           class=\"form-check-input\">\\n                    \\n                    <div v-if=\"editingTodo !== todo.id\" \\n                         :class=\"{ completed: todo.completed }\"\\n                         class=\"flex-grow-1\">\\n                        {{ todo.text }}\\n                        <small v-if=\"todo.dueDate\" class=\"text-muted ms-2\">\\n                            Due: {{ formatDate(todo.dueDate) }}\\n                        </small>\\n                    </div>\\n                    \\n                    <div v-else class=\"flex-grow-1 d-flex gap-2\">\\n                        <input type=\"text\" \\n                               v-model=\"editingText\" \\n                               class=\"form-control\"\\n                               @keyup.enter=\"saveTodoEdit(todo)\"\\n                               @keyup.esc=\"cancelEdit\">\\n                        <input type=\"date\" \\n                               v-model=\"editingDueDate\" \\n                               class=\"form-control\">\\n                    </div>\\n\\n                    <div class=\"btn-group\">\\n                        <button v-if=\"editingTodo !== todo.id\"\\n                                @click=\"startEdit(todo)\" \\n                                class=\"btn btn-sm btn-outline-secondary\">\\n                            Edit\\n                        </button>\\n                        <button v-else\\n                                @click=\"saveTodoEdit(todo)\" \\n                                class=\"btn btn-sm btn-outline-primary\">\\n                            Save\\n                        </button>\\n                        <button @click=\"removeTodo(todo)\" \\n                                class=\"btn btn-sm btn-outline-danger\">\\n                            Delete\\n                        </button>\\n                    </div>\\n                </div>\\n            </div>\\n        </div>\\n    </div>\\n\\n    <script src=\"app.js\"></script>\\n</body>\\n</html>'}))"
-    }
-  },
-  {
-    "id": 10,
-    "timestamp": "2025-01-20T20:29:48.967655",
-    "source": "agent",
-    "message": "I edited the file /workspace/todo-app/index.html.",
-    "cause": 9,
-    "observation": "edit",
-    "tool_call_metadata": {
-      "function_name": "str_replace_editor",
-      "tool_call_id": "toolu_01YYLXwwdBLwtMmjr5Sfsieg",
-      "model_response": {
-        "id": "chatcmpl-55936480-cbf9-4d24-bd4a-329bd2f1f65c",
-        "created": 1737433788,
-        "model": "claude-3-5-sonnet-20241022",
-        "object": "chat.completion",
-        "system_fingerprint": null,
-        "choices": [
-          {
-            "finish_reason": "tool_calls",
-            "index": 0,
-            "message": {
-              "content": "Let's create the main HTML file:",
-              "role": "assistant",
-              "tool_calls": [
-                {
-                  "index": 1,
-                  "function": {
-                    "arguments": "{\"command\": \"create\", \"path\": \"/workspace/todo-app/index.html\", \"file_text\": \"<!DOCTYPE html>\\n<html lang=\\\"en\\\">\\n<head>\\n    <meta charset=\\\"UTF-8\\\">\\n    <meta name=\\\"viewport\\\" content=\\\"width=device-width, initial-scale=1.0\\\">\\n    <title>Vue Todo App</title>\\n    <script src=\\\"https://unpkg.com/vue@3/dist/vue.global.js\\\"></script>\\n    <link href=\\\"https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css\\\" rel=\\\"stylesheet\\\">\\n    <style>\\n        .completed {\\n            text-decoration: line-through;\\n            color: #6c757d;\\n        }\\n        .todo-item {\\n            display: flex;\\n            align-items: center;\\n            gap: 1rem;\\n            padding: 0.5rem;\\n            border-bottom: 1px solid #dee2e6;\\n        }\\n        .todo-item:last-child {\\n            border-bottom: none;\\n        }\\n    </style>\\n</head>\\n<body>\\n    <div id=\\\"app\\\" class=\\\"container mt-5\\\">\\n        <h1 class=\\\"mb-4\\\">Todo List</h1>\\n        \\n        <!-- Add new todo form -->\\n        <div class=\\\"mb-4\\\">\\n            <form @submit.prevent=\\\"addTodo\\\" class=\\\"row g-3\\\">\\n                <div class=\\\"col-md-6\\\">\\n                    <input type=\\\"text\\\" v-model=\\\"newTodoText\\\" class=\\\"form-control\\\" placeholder=\\\"Enter new todo\\\" required>\\n                </div>\\n                <div class=\\\"col-md-4\\\">\\n                    <input type=\\\"date\\\" v-model=\\\"newTodoDueDate\\\" class=\\\"form-control\\\">\\n                </div>\\n                <div class=\\\"col-md-2\\\">\\n                    <button type=\\\"submit\\\" class=\\\"btn btn-primary w-100\\\">Add Todo</button>\\n                </div>\\n            </form>\\n        </div>\\n\\n        <!-- Todo list -->\\n        <div class=\\\"card\\\">\\n            <div class=\\\"card-body\\\">\\n                <div v-if=\\\"todos.length === 0\\\" class=\\\"text-center text-muted\\\">\\n                    No todos yet. Add one above!\\n                </div>\\n                <div v-for=\\\"todo in todos\\\" :key=\\\"todo.id\\\" class=\\\"todo-item\\\">\\n                    <input type=\\\"checkbox\\\" \\n                           :checked=\\\"todo.completed\\\" \\n                           @change=\\\"toggleTodo(todo)\\\"\\n                           class=\\\"form-check-input\\\">\\n                    \\n                    <div v-if=\\\"editingTodo !== todo.id\\\" \\n                         :class=\\\"{ completed: todo.completed }\\\"\\n                         class=\\\"flex-grow-1\\\">\\n                        {{ todo.text }}\\n                        <small v-if=\\\"todo.dueDate\\\" class=\\\"text-muted ms-2\\\">\\n                            Due: {{ formatDate(todo.dueDate) }}\\n                        </small>\\n                    </div>\\n                    \\n                    <div v-else class=\\\"flex-grow-1 d-flex gap-2\\\">\\n                        <input type=\\\"text\\\" \\n                               v-model=\\\"editingText\\\" \\n                               class=\\\"form-control\\\"\\n                               @keyup.enter=\\\"saveTodoEdit(todo)\\\"\\n                               @keyup.esc=\\\"cancelEdit\\\">\\n                        <input type=\\\"date\\\" \\n                               v-model=\\\"editingDueDate\\\" \\n                               class=\\\"form-control\\\">\\n                    </div>\\n\\n                    <div class=\\\"btn-group\\\">\\n                        <button v-if=\\\"editingTodo !== todo.id\\\"\\n                                @click=\\\"startEdit(todo)\\\" \\n                                class=\\\"btn btn-sm btn-outline-secondary\\\">\\n                            Edit\\n                        </button>\\n                        <button v-else\\n                                @click=\\\"saveTodoEdit(todo)\\\" \\n                                class=\\\"btn btn-sm btn-outline-primary\\\">\\n                            Save\\n                        </button>\\n                        <button @click=\\\"removeTodo(todo)\\\" \\n                                class=\\\"btn btn-sm btn-outline-danger\\\">\\n                            Delete\\n                        </button>\\n                    </div>\\n                </div>\\n            </div>\\n        </div>\\n    </div>\\n\\n    <script src=\\\"app.js\\\"></script>\\n</body>\\n</html>\"}",
-                    "name": "str_replace_editor"
-                  },
-                  "id": "toolu_01YYLXwwdBLwtMmjr5Sfsieg",
-                  "type": "function"
-                }
-              ],
-              "function_call": null
-            }
-          }
-        ],
-        "usage": {
-          "completion_tokens": 1142,
-          "prompt_tokens": 3601,
-          "total_tokens": 4743,
-          "completion_tokens_details": null,
-          "prompt_tokens_details": {
-            "audio_tokens": null,
-            "cached_tokens": 3435
-          },
-          "cache_creation_input_tokens": 160,
-          "cache_read_input_tokens": 3435
-        },
-        "service_tier": null
-      },
-      "total_calls_in_response": 1
-    },
-    "content": "--- /workspace/todo-app/index.html\n+++ /workspace/todo-app/index.html\n@@ -0,0 +1,43 @@\n+<!DOCTYPE html>\n+<html lang=\"en\">\n+<head>\n+    <meta charset=\"UTF-8\">\n+    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n+    <title>Vue Todo App</title>\n+    <script src=\"https://unpkg.com/vue@3/dist/vue.global.js\"></script>\n+    <link href=\"https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css\" rel=\"stylesheet\">\n+    <style>\n+        .completed {\n+            text-decoration: line-through;\n+            color: #6c757d;\n+        }\n+        .todo-item {\n+            display: flex;\n+            align-items: center;\n+            gap: 1rem;\n+            padding: 0.5rem;\n+            border-bottom: 1px solid #dee2e6;\n+        }\n+        .todo-item:last-child {\n+            border-bottom: none;\n+        }\n+    </style>\n+</head>\n+<body>\n+    <div id=\"app\" class=\"container mt-5\">\n+        <h1 class=\"mb-4\">Todo List</h1>\n+\n+        <!-- Add new todo form -->\n+        <div class=\"mb-4\">\n+            <form @submit.prevent=\"addTodo\" class=\"row g-3\">\n+                <div class=\"col-md-6\">\n+                    <input type=\"text\" v-model=\"newTodoText\" class=\"form-control\" placeholder=\"Enter new todo\" required>\n+                </div>\n+                <div class=\"col-md-4\">\n+                    <input type=\"date\" v-model=\"newTodoDueDate\" class=\"form-control\">\n+                </div>\n+                <div class=\"col-md-2\">\n+                    <button type=\"submit\" class=\"btn btn-primary w-100\">Add Todo</button>\n+                </div>\n+            </form>\n+        </div>\n@@ -1,0 +45,56 @@\n+        <!-- Todo list -->\n+        <div class=\"card\">\n+            <div class=\"card-body\">\n+                <div v-if=\"todos.length === 0\" class=\"text-center text-muted\">\n+                    No todos yet. Add one above!\n+                </div>\n+                <div v-for=\"todo in todos\" :key=\"todo.id\" class=\"todo-item\">\n+                    <input type=\"checkbox\"\n+                           :checked=\"todo.completed\"\n+                           @change=\"toggleTodo(todo)\"\n+                           class=\"form-check-input\">\n+\n+                    <div v-if=\"editingTodo !== todo.id\"\n+                         :class=\"{ completed: todo.completed }\"\n+                         class=\"flex-grow-1\">\n+                        {{ todo.text }}\n+                        <small v-if=\"todo.dueDate\" class=\"text-muted ms-2\">\n+                            Due: {{ formatDate(todo.dueDate) }}\n+                        </small>\n+                    </div>\n+\n+                    <div v-else class=\"flex-grow-1 d-flex gap-2\">\n+                        <input type=\"text\"\n+                               v-model=\"editingText\"\n+                               class=\"form-control\"\n+                               @keyup.enter=\"saveTodoEdit(todo)\"\n+                               @keyup.esc=\"cancelEdit\">\n+                        <input type=\"date\"\n+                               v-model=\"editingDueDate\"\n+                               class=\"form-control\">\n+                    </div>\n+\n+                    <div class=\"btn-group\">\n+                        <button v-if=\"editingTodo !== todo.id\"\n+                                @click=\"startEdit(todo)\"\n+                                class=\"btn btn-sm btn-outline-secondary\">\n+                            Edit\n+                        </button>\n+                        <button v-else\n+                                @click=\"saveTodoEdit(todo)\"\n+                                class=\"btn btn-sm btn-outline-primary\">\n+                            Save\n+                        </button>\n+                        <button @click=\"removeTodo(todo)\"\n+                                class=\"btn btn-sm btn-outline-danger\">\n+                            Delete\n+                        </button>\n+                    </div>\n+                </div>\n+            </div>\n+        </div>\n+    </div>\n+\n+    <script src=\"app.js\"></script>\n+</body>\n+</html>",
-    "extras": {
-      "path": "/workspace/todo-app/index.html",
-      "prev_exist": false,
-      "old_content": null,
-      "new_content": "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n    <meta charset=\"UTF-8\">\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n    <title>Vue Todo App</title>\n    <script src=\"https://unpkg.com/vue@3/dist/vue.global.js\"></script>\n    <link href=\"https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css\" rel=\"stylesheet\">\n    <style>\n        .completed {\n            text-decoration: line-through;\n            color: #6c757d;\n        }\n        .todo-item {\n            display: flex;\n            align-items: center;\n            gap: 1rem;\n            padding: 0.5rem;\n            border-bottom: 1px solid #dee2e6;\n        }\n        .todo-item:last-child {\n            border-bottom: none;\n        }\n    </style>\n</head>\n<body>\n    <div id=\"app\" class=\"container mt-5\">\n        <h1 class=\"mb-4\">Todo List</h1>\n        \n        <!-- Add new todo form -->\n        <div class=\"mb-4\">\n            <form @submit.prevent=\"addTodo\" class=\"row g-3\">\n                <div class=\"col-md-6\">\n                    <input type=\"text\" v-model=\"newTodoText\" class=\"form-control\" placeholder=\"Enter new todo\" required>\n                </div>\n                <div class=\"col-md-4\">\n                    <input type=\"date\" v-model=\"newTodoDueDate\" class=\"form-control\">\n                </div>\n                <div class=\"col-md-2\">\n                    <button type=\"submit\" class=\"btn btn-primary w-100\">Add Todo</button>\n                </div>\n            </form>\n        </div>\n\n        <!-- Todo list -->\n        <div class=\"card\">\n            <div class=\"card-body\">\n                <div v-if=\"todos.length === 0\" class=\"text-center text-muted\">\n                    No todos yet. Add one above!\n                </div>\n                <div v-for=\"todo in todos\" :key=\"todo.id\" class=\"todo-item\">\n                    <input type=\"checkbox\" \n                           :checked=\"todo.completed\" \n                           @change=\"toggleTodo(todo)\"\n                           class=\"form-check-input\">\n                    \n                    <div v-if=\"editingTodo !== todo.id\" \n                         :class=\"{ completed: todo.completed }\"\n                         class=\"flex-grow-1\">\n                        {{ todo.text }}\n                        <small v-if=\"todo.dueDate\" class=\"text-muted ms-2\">\n                            Due: {{ formatDate(todo.dueDate) }}\n                        </small>\n                    </div>\n                    \n                    <div v-else class=\"flex-grow-1 d-flex gap-2\">\n                        <input type=\"text\" \n                               v-model=\"editingText\" \n                               class=\"form-control\"\n                               @keyup.enter=\"saveTodoEdit(todo)\"\n                               @keyup.esc=\"cancelEdit\">\n                        <input type=\"date\" \n                               v-model=\"editingDueDate\" \n                               class=\"form-control\">\n                    </div>\n\n                    <div class=\"btn-group\">\n                        <button v-if=\"editingTodo !== todo.id\"\n                                @click=\"startEdit(todo)\" \n                                class=\"btn btn-sm btn-outline-secondary\">\n                            Edit\n                        </button>\n                        <button v-else\n                                @click=\"saveTodoEdit(todo)\" \n                                class=\"btn btn-sm btn-outline-primary\">\n                            Save\n                        </button>\n                        <button @click=\"removeTodo(todo)\" \n                                class=\"btn btn-sm btn-outline-danger\">\n                            Delete\n                        </button>\n                    </div>\n                </div>\n            </div>\n        </div>\n    </div>\n\n    <script src=\"app.js\"></script>\n</body>\n</html>",
-      "impl_source": "oh_aci",
-      "formatted_output_and_error": "File created successfully at: /workspace/todo-app/index.html"
-    }
-  },
-  {
-    "id": 11,
-    "timestamp": "2025-01-20T20:29:56.899226",
-    "source": "agent",
-    "message": "",
-    "action": "edit",
-    "tool_call_metadata": {
-      "function_name": "str_replace_editor",
-      "tool_call_id": "toolu_01GpJj2ShdFjb9ZTY4CVxi9C",
-      "model_response": {
-        "id": "chatcmpl-34fef176-276b-4a18-892f-688b08f51225",
-        "created": 1737433796,
-        "model": "claude-3-5-sonnet-20241022",
-        "object": "chat.completion",
-        "system_fingerprint": null,
-        "choices": [
-          {
-            "finish_reason": "tool_calls",
-            "index": 0,
-            "message": {
-              "content": "Now, let's create the JavaScript file with the Vue application logic:",
-              "role": "assistant",
-              "tool_calls": [
-                {
-                  "index": 1,
-                  "function": {
-                    "arguments": "{\"command\": \"create\", \"path\": \"/workspace/todo-app/app.js\", \"file_text\": \"const { createApp } = Vue\\n\\nconst STORAGE_KEY = 'vue-todo-list'\\n\\ncreateApp({\\n    data() {\\n        return {\\n            todos: [],\\n            newTodoText: '',\\n            newTodoDueDate: '',\\n            editingTodo: null,\\n            editingText: '',\\n            editingDueDate: ''\\n        }\\n    },\\n    methods: {\\n        loadTodos() {\\n            const savedTodos = localStorage.getItem(STORAGE_KEY)\\n            if (savedTodos) {\\n                this.todos = JSON.parse(savedTodos)\\n            }\\n        },\\n        saveTodos() {\\n            localStorage.setItem(STORAGE_KEY, JSON.stringify(this.todos))\\n        },\\n        addTodo() {\\n            if (!this.newTodoText.trim()) return\\n\\n            const todo = {\\n                id: Date.now(),\\n                text: this.newTodoText.trim(),\\n                completed: false,\\n                dueDate: this.newTodoDueDate || null\\n            }\\n\\n            this.todos.push(todo)\\n            this.newTodoText = ''\\n            this.newTodoDueDate = ''\\n            this.saveTodos()\\n        },\\n        toggleTodo(todo) {\\n            todo.completed = !todo.completed\\n            this.saveTodos()\\n        },\\n        removeTodo(todo) {\\n            const index = this.todos.indexOf(todo)\\n            if (index > -1) {\\n                this.todos.splice(index, 1)\\n                this.saveTodos()\\n            }\\n        },\\n        startEdit(todo) {\\n            this.editingTodo = todo.id\\n            this.editingText = todo.text\\n            this.editingDueDate = todo.dueDate || ''\\n        },\\n        saveTodoEdit(todo) {\\n            if (!this.editingText.trim()) return\\n\\n            todo.text = this.editingText.trim()\\n            todo.dueDate = this.editingDueDate || null\\n            this.editingTodo = null\\n            this.saveTodos()\\n        },\\n        cancelEdit() {\\n            this.editingTodo = null\\n        },\\n        formatDate(dateString) {\\n            return new Date(dateString).toLocaleDateString()\\n        }\\n    },\\n    mounted() {\\n        this.loadTodos()\\n    }\\n}).mount('#app')\"}",
-                    "name": "str_replace_editor"
-                  },
-                  "id": "toolu_01GpJj2ShdFjb9ZTY4CVxi9C",
-                  "type": "function"
-                }
-              ],
-              "function_call": null
-            }
-          }
-        ],
-        "usage": {
-          "completion_tokens": 693,
-          "prompt_tokens": 4769,
-          "total_tokens": 5462,
-          "completion_tokens_details": null,
-          "prompt_tokens_details": {
-            "audio_tokens": null,
-            "cached_tokens": 3595
-          },
-          "cache_creation_input_tokens": 1168,
-          "cache_read_input_tokens": 3595
-        },
-        "service_tier": null
-      },
-      "total_calls_in_response": 1
-    },
-    "args": {
-      "path": "/workspace/todo-app/app.js",
-      "content": "",
-      "start": 1,
-      "end": -1,
-      "thought": "Now, let's create the JavaScript file with the Vue application logic:",
-      "impl_source": "oh_aci",
-      "translated_ipython_code": "print(file_editor(**{'command': 'create', 'path': '/workspace/todo-app/app.js', 'file_text': \"const { createApp } = Vue\\n\\nconst STORAGE_KEY = 'vue-todo-list'\\n\\ncreateApp({\\n    data() {\\n        return {\\n            todos: [],\\n            newTodoText: '',\\n            newTodoDueDate: '',\\n            editingTodo: null,\\n            editingText: '',\\n            editingDueDate: ''\\n        }\\n    },\\n    methods: {\\n        loadTodos() {\\n            const savedTodos = localStorage.getItem(STORAGE_KEY)\\n            if (savedTodos) {\\n                this.todos = JSON.parse(savedTodos)\\n            }\\n        },\\n        saveTodos() {\\n            localStorage.setItem(STORAGE_KEY, JSON.stringify(this.todos))\\n        },\\n        addTodo() {\\n            if (!this.newTodoText.trim()) return\\n\\n            const todo = {\\n                id: Date.now(),\\n                text: this.newTodoText.trim(),\\n                completed: false,\\n                dueDate: this.newTodoDueDate || null\\n            }\\n\\n            this.todos.push(todo)\\n            this.newTodoText = ''\\n            this.newTodoDueDate = ''\\n            this.saveTodos()\\n        },\\n        toggleTodo(todo) {\\n            todo.completed = !todo.completed\\n            this.saveTodos()\\n        },\\n        removeTodo(todo) {\\n            const index = this.todos.indexOf(todo)\\n            if (index > -1) {\\n                this.todos.splice(index, 1)\\n                this.saveTodos()\\n            }\\n        },\\n        startEdit(todo) {\\n            this.editingTodo = todo.id\\n            this.editingText = todo.text\\n            this.editingDueDate = todo.dueDate || ''\\n        },\\n        saveTodoEdit(todo) {\\n            if (!this.editingText.trim()) return\\n\\n            todo.text = this.editingText.trim()\\n            todo.dueDate = this.editingDueDate || null\\n            this.editingTodo = null\\n            this.saveTodos()\\n        },\\n        cancelEdit() {\\n            this.editingTodo = null\\n        },\\n        formatDate(dateString) {\\n            return new Date(dateString).toLocaleDateString()\\n        }\\n    },\\n    mounted() {\\n        this.loadTodos()\\n    }\\n}).mount('#app')\"}))"
-    }
-  },
-  {
-    "id": 12,
-    "timestamp": "2025-01-20T20:29:56.933329",
-    "source": "agent",
-    "message": "I edited the file /workspace/todo-app/app.js.",
-    "cause": 11,
-    "observation": "edit",
-    "tool_call_metadata": {
-      "function_name": "str_replace_editor",
-      "tool_call_id": "toolu_01GpJj2ShdFjb9ZTY4CVxi9C",
-      "model_response": {
-        "id": "chatcmpl-34fef176-276b-4a18-892f-688b08f51225",
-        "created": 1737433796,
-        "model": "claude-3-5-sonnet-20241022",
-        "object": "chat.completion",
-        "system_fingerprint": null,
-        "choices": [
-          {
-            "finish_reason": "tool_calls",
-            "index": 0,
-            "message": {
-              "content": "Now, let's create the JavaScript file with the Vue application logic:",
-              "role": "assistant",
-              "tool_calls": [
-                {
-                  "index": 1,
-                  "function": {
-                    "arguments": "{\"command\": \"create\", \"path\": \"/workspace/todo-app/app.js\", \"file_text\": \"const { createApp } = Vue\\n\\nconst STORAGE_KEY = 'vue-todo-list'\\n\\ncreateApp({\\n    data() {\\n        return {\\n            todos: [],\\n            newTodoText: '',\\n            newTodoDueDate: '',\\n            editingTodo: null,\\n            editingText: '',\\n            editingDueDate: ''\\n        }\\n    },\\n    methods: {\\n        loadTodos() {\\n            const savedTodos = localStorage.getItem(STORAGE_KEY)\\n            if (savedTodos) {\\n                this.todos = JSON.parse(savedTodos)\\n            }\\n        },\\n        saveTodos() {\\n            localStorage.setItem(STORAGE_KEY, JSON.stringify(this.todos))\\n        },\\n        addTodo() {\\n            if (!this.newTodoText.trim()) return\\n\\n            const todo = {\\n                id: Date.now(),\\n                text: this.newTodoText.trim(),\\n                completed: false,\\n                dueDate: this.newTodoDueDate || null\\n            }\\n\\n            this.todos.push(todo)\\n            this.newTodoText = ''\\n            this.newTodoDueDate = ''\\n            this.saveTodos()\\n        },\\n        toggleTodo(todo) {\\n            todo.completed = !todo.completed\\n            this.saveTodos()\\n        },\\n        removeTodo(todo) {\\n            const index = this.todos.indexOf(todo)\\n            if (index > -1) {\\n                this.todos.splice(index, 1)\\n                this.saveTodos()\\n            }\\n        },\\n        startEdit(todo) {\\n            this.editingTodo = todo.id\\n            this.editingText = todo.text\\n            this.editingDueDate = todo.dueDate || ''\\n        },\\n        saveTodoEdit(todo) {\\n            if (!this.editingText.trim()) return\\n\\n            todo.text = this.editingText.trim()\\n            todo.dueDate = this.editingDueDate || null\\n            this.editingTodo = null\\n            this.saveTodos()\\n        },\\n        cancelEdit() {\\n            this.editingTodo = null\\n        },\\n        formatDate(dateString) {\\n            return new Date(dateString).toLocaleDateString()\\n        }\\n    },\\n    mounted() {\\n        this.loadTodos()\\n    }\\n}).mount('#app')\"}",
-                    "name": "str_replace_editor"
-                  },
-                  "id": "toolu_01GpJj2ShdFjb9ZTY4CVxi9C",
-                  "type": "function"
-                }
-              ],
-              "function_call": null
-            }
-          }
-        ],
-        "usage": {
-          "completion_tokens": 693,
-          "prompt_tokens": 4769,
-          "total_tokens": 5462,
-          "completion_tokens_details": null,
-          "prompt_tokens_details": {
-            "audio_tokens": null,
-            "cached_tokens": 3595
-          },
-          "cache_creation_input_tokens": 1168,
-          "cache_read_input_tokens": 3595
-        },
-        "service_tier": null
-      },
-      "total_calls_in_response": 1
-    },
-    "content": "--- /workspace/todo-app/app.js\n+++ /workspace/todo-app/app.js\n@@ -0,0 +1 @@\n+const { createApp } = Vue\n@@ -1,0 +3,73 @@\n+const STORAGE_KEY = 'vue-todo-list'\n+\n+createApp({\n+    data() {\n+        return {\n+            todos: [],\n+            newTodoText: '',\n+            newTodoDueDate: '',\n+            editingTodo: null,\n+            editingText: '',\n+            editingDueDate: ''\n+        }\n+    },\n+    methods: {\n+        loadTodos() {\n+            const savedTodos = localStorage.getItem(STORAGE_KEY)\n+            if (savedTodos) {\n+                this.todos = JSON.parse(savedTodos)\n+            }\n+        },\n+        saveTodos() {\n+            localStorage.setItem(STORAGE_KEY, JSON.stringify(this.todos))\n+        },\n+        addTodo() {\n+            if (!this.newTodoText.trim()) return\n+\n+            const todo = {\n+                id: Date.now(),\n+                text: this.newTodoText.trim(),\n+                completed: false,\n+                dueDate: this.newTodoDueDate || null\n+            }\n+\n+            this.todos.push(todo)\n+            this.newTodoText = ''\n+            this.newTodoDueDate = ''\n+            this.saveTodos()\n+        },\n+        toggleTodo(todo) {\n+            todo.completed = !todo.completed\n+            this.saveTodos()\n+        },\n+        removeTodo(todo) {\n+            const index = this.todos.indexOf(todo)\n+            if (index > -1) {\n+                this.todos.splice(index, 1)\n+                this.saveTodos()\n+            }\n+        },\n+        startEdit(todo) {\n+            this.editingTodo = todo.id\n+            this.editingText = todo.text\n+            this.editingDueDate = todo.dueDate || ''\n+        },\n+        saveTodoEdit(todo) {\n+            if (!this.editingText.trim()) return\n+\n+            todo.text = this.editingText.trim()\n+            todo.dueDate = this.editingDueDate || null\n+            this.editingTodo = null\n+            this.saveTodos()\n+        },\n+        cancelEdit() {\n+            this.editingTodo = null\n+        },\n+        formatDate(dateString) {\n+            return new Date(dateString).toLocaleDateString()\n+        }\n+    },\n+    mounted() {\n+        this.loadTodos()\n+    }\n+}).mount('#app')",
-    "extras": {
-      "path": "/workspace/todo-app/app.js",
-      "prev_exist": false,
-      "old_content": null,
-      "new_content": "const { createApp } = Vue\n\nconst STORAGE_KEY = 'vue-todo-list'\n\ncreateApp({\n    data() {\n        return {\n            todos: [],\n            newTodoText: '',\n            newTodoDueDate: '',\n            editingTodo: null,\n            editingText: '',\n            editingDueDate: ''\n        }\n    },\n    methods: {\n        loadTodos() {\n            const savedTodos = localStorage.getItem(STORAGE_KEY)\n            if (savedTodos) {\n                this.todos = JSON.parse(savedTodos)\n            }\n        },\n        saveTodos() {\n            localStorage.setItem(STORAGE_KEY, JSON.stringify(this.todos))\n        },\n        addTodo() {\n            if (!this.newTodoText.trim()) return\n\n            const todo = {\n                id: Date.now(),\n                text: this.newTodoText.trim(),\n                completed: false,\n                dueDate: this.newTodoDueDate || null\n            }\n\n            this.todos.push(todo)\n            this.newTodoText = ''\n            this.newTodoDueDate = ''\n            this.saveTodos()\n        },\n        toggleTodo(todo) {\n            todo.completed = !todo.completed\n            this.saveTodos()\n        },\n        removeTodo(todo) {\n            const index = this.todos.indexOf(todo)\n            if (index > -1) {\n                this.todos.splice(index, 1)\n                this.saveTodos()\n            }\n        },\n        startEdit(todo) {\n            this.editingTodo = todo.id\n            this.editingText = todo.text\n            this.editingDueDate = todo.dueDate || ''\n        },\n        saveTodoEdit(todo) {\n            if (!this.editingText.trim()) return\n\n            todo.text = this.editingText.trim()\n            todo.dueDate = this.editingDueDate || null\n            this.editingTodo = null\n            this.saveTodos()\n        },\n        cancelEdit() {\n            this.editingTodo = null\n        },\n        formatDate(dateString) {\n            return new Date(dateString).toLocaleDateString()\n        }\n    },\n    mounted() {\n        this.loadTodos()\n    }\n}).mount('#app')",
-      "impl_source": "oh_aci",
-      "formatted_output_and_error": "File created successfully at: /workspace/todo-app/app.js"
-    }
-  },
-  {
-    "id": 13,
-    "timestamp": "2025-01-20T20:29:59.282034",
-    "source": "agent",
-    "message": "Running command: cd /workspace/todo-app && python3 -m http.server 8000",
-    "action": "run",
-    "tool_call_metadata": {
-      "function_name": "execute_bash",
-      "tool_call_id": "toolu_01DwihWr4ooPZS1CGTLAWrFP",
-      "model_response": {
-        "id": "chatcmpl-51a85fbb-dafc-4519-a7d2-df369e38d76f",
-        "created": 1737433799,
-        "model": "claude-3-5-sonnet-20241022",
-        "object": "chat.completion",
-        "system_fingerprint": null,
-        "choices": [
-          {
-            "finish_reason": "tool_calls",
-            "index": 0,
-            "message": {
-              "content": "Let's serve this application using a simple Python HTTP server:",
-              "role": "assistant",
-              "tool_calls": [
-                {
-                  "index": 1,
-                  "function": {
-                    "arguments": "{\"command\": \"cd /workspace/todo-app && python3 -m http.server 8000\"}",
-                    "name": "execute_bash"
-                  },
-                  "id": "toolu_01DwihWr4ooPZS1CGTLAWrFP",
-                  "type": "function"
-                }
-              ],
-              "function_call": null
-            }
-          }
-        ],
-        "usage": {
-          "completion_tokens": 85,
-          "prompt_tokens": 5488,
-          "total_tokens": 5573,
-          "completion_tokens_details": null,
-          "prompt_tokens_details": {
-            "audio_tokens": null,
-            "cached_tokens": 4763
-          },
-          "cache_creation_input_tokens": 719,
-          "cache_read_input_tokens": 4763
-        },
-        "service_tier": null
-      },
-      "total_calls_in_response": 1
-    },
-    "args": {
-      "command": "cd /workspace/todo-app && python3 -m http.server 8000",
-      "is_input": false,
-      "thought": "Let's serve this application using a simple Python HTTP server:",
-      "blocking": false,
-      "hidden": false,
-      "confirmation_state": "confirmed"
-    }
-  },
-  {
-    "id": 14,
-    "timestamp": "2025-01-20T20:30:29.862264",
-    "source": "agent",
-    "message": "Command `cd /workspace/todo-app && python3 -m http.server 8000` executed with exit code -1.",
-    "cause": 13,
-    "observation": "run",
-    "tool_call_metadata": {
-      "function_name": "execute_bash",
-      "tool_call_id": "toolu_01DwihWr4ooPZS1CGTLAWrFP",
-      "model_response": {
-        "id": "chatcmpl-51a85fbb-dafc-4519-a7d2-df369e38d76f",
-        "created": 1737433799,
-        "model": "claude-3-5-sonnet-20241022",
-        "object": "chat.completion",
-        "system_fingerprint": null,
-        "choices": [
-          {
-            "finish_reason": "tool_calls",
-            "index": 0,
-            "message": {
-              "content": "Let's serve this application using a simple Python HTTP server:",
-              "role": "assistant",
-              "tool_calls": [
-                {
-                  "index": 1,
-                  "function": {
-                    "arguments": "{\"command\": \"cd /workspace/todo-app && python3 -m http.server 8000\"}",
-                    "name": "execute_bash"
-                  },
-                  "id": "toolu_01DwihWr4ooPZS1CGTLAWrFP",
-                  "type": "function"
-                }
-              ],
-              "function_call": null
-            }
-          }
-        ],
-        "usage": {
-          "completion_tokens": 85,
-          "prompt_tokens": 5488,
-          "total_tokens": 5573,
-          "completion_tokens_details": null,
-          "prompt_tokens_details": {
-            "audio_tokens": null,
-            "cached_tokens": 4763
-          },
-          "cache_creation_input_tokens": 719,
-          "cache_read_input_tokens": 4763
-        },
-        "service_tier": null
-      },
-      "total_calls_in_response": 1
-    },
-    "content": "Serving HTTP on 0.0.0.0 port 8000 (http://0.0.0.0:8000/) ...",
-    "extras": {
-      "command": "cd /workspace/todo-app && python3 -m http.server 8000",
-      "metadata": {
-        "exit_code": -1,
-        "pid": -1,
-        "username": null,
-        "hostname": null,
-        "working_dir": null,
-        "py_interpreter_path": null,
-        "prefix": "",
-        "suffix": "\n[The command has no new output after 30 seconds. You may wait longer to see additional output by sending empty command '', send other commands to interact with the current process, send keys to interrupt/kill the command, or use the timeout parameter in execute_bash for future commands.]"
-      },
-      "hidden": false
-    },
-    "success": false
-  },
-  {
-    "id": 15,
-    "timestamp": "2025-01-20T20:30:35.414886",
-    "source": "agent",
-    "message": "I've created a complete Vue.js todo application with all the requested features. Here's what the application includes:\n\n1. Todo List Features:\n   - View all todos\n   - Add new todos with optional due dates\n   - Mark todos as complete/incomplete\n   - Delete todos\n   - Edit todo text and due date\n   - Persistent storage using localStorage\n\n2. UI Features:\n   - Clean, responsive design using Bootstrap\n   - Completed todos are shown with strikethrough\n   - Due dates are displayed in a readable format\n   - Edit mode with save/cancel functionality\n   - Form validation to prevent empty todos\n\nThe application is now running on port 8000. You can access it by opening a web browser and navigating to http://localhost:8000\n\nTo use the application:\n1. Add a new todo by typing in the text field and optionally selecting a due date\n2. Click the checkbox to mark a todo as complete\n3. Click the \"Edit\" button to modify a todo's text or due date\n4. Click the \"Delete\" button to remove a todo\n5. All changes are automatically saved to localStorage\n\nThe data will persist even if you close and reopen your browser. Would you like me to explain any specific part of the implementation or make any adjustments to the functionality?",
-    "action": "message",
-    "args": {
-      "content": "I've created a complete Vue.js todo application with all the requested features. Here's what the application includes:\n\n1. Todo List Features:\n   - View all todos\n   - Add new todos with optional due dates\n   - Mark todos as complete/incomplete\n   - Delete todos\n   - Edit todo text and due date\n   - Persistent storage using localStorage\n\n2. UI Features:\n   - Clean, responsive design using Bootstrap\n   - Completed todos are shown with strikethrough\n   - Due dates are displayed in a readable format\n   - Edit mode with save/cancel functionality\n   - Form validation to prevent empty todos\n\nThe application is now running on port 8000. You can access it by opening a web browser and navigating to http://localhost:8000\n\nTo use the application:\n1. Add a new todo by typing in the text field and optionally selecting a due date\n2. Click the checkbox to mark a todo as complete\n3. Click the \"Edit\" button to modify a todo's text or due date\n4. Click the \"Delete\" button to remove a todo\n5. All changes are automatically saved to localStorage\n\nThe data will persist even if you close and reopen your browser. Would you like me to explain any specific part of the implementation or make any adjustments to the functionality?",
-      "image_urls": null,
-      "wait_for_response": true
-    }
-  },
-  {
-    "id": 16,
-    "timestamp": "2025-01-20T20:30:35.421080",
-    "source": "agent",
-    "message": "No observation",
-    "cause": 15,
-    "observation": "null",
-    "content": "",
-    "extras": {}
-  },
-  {
-    "id": 17,
-    "timestamp": "2025-01-20T20:30:35.429675",
-    "source": "environment",
-    "message": "",
-    "observation": "agent_state_changed",
-    "content": "",
-    "extras": {
-      "agent_state": "awaiting_user_input"
-    }
-  }
-]

tests/runtime/trajs/basic_interactions.json DELETED Viewed

@@ -1,128 +0,0 @@
-[
-  {
-    "id": 0,
-    "timestamp": "2025-02-01T00:14:10.787622",
-    "source": "user",
-    "message": "what's 1+1?",
-    "action": "message",
-    "args": {
-      "content": "what's 1+1?",
-      "image_urls": null,
-      "wait_for_response": false
-    },
-    "timeout": 120
-  },
-  {
-    "id": 3,
-    "timestamp": "2025-02-01T00:14:12.686046",
-    "source": "agent",
-    "message": "1 + 1 equals 2.",
-    "action": "message",
-    "args": {
-      "content": "1 + 1 equals 2.",
-      "image_urls": null,
-      "wait_for_response": true
-    },
-    "timeout": 120
-  },
-  {
-    "id": 6,
-    "timestamp": "2025-02-01T00:17:23.228442",
-    "source": "user",
-    "message": "No, I mean by Goldbach's conjecture!",
-    "action": "message",
-    "args": {
-      "content": "No, I mean by Goldbach's conjecture!",
-      "image_urls": null,
-      "wait_for_response": false
-    },
-    "timeout": 120
-  },
-  {
-    "id": 9,
-    "timestamp": "2025-02-01T00:17:25.586532",
-    "source": "agent",
-    "message": "Goldbach's conjecture is an unsolved problem in number theory that suggests every even integer greater than 2 can be expressed as the sum of two prime numbers. However, it doesn't directly apply to the sum of 1 + 1, as 1 is not considered a prime number. The conjecture is more relevant to larger even numbers.",
-    "action": "message",
-    "args": {
-      "content": "Goldbach's conjecture is an unsolved problem in number theory that suggests every even integer greater than 2 can be expressed as the sum of two prime numbers. However, it doesn't directly apply to the sum of 1 + 1, as 1 is not considered a prime number. The conjecture is more relevant to larger even numbers.",
-      "image_urls": null,
-      "wait_for_response": true
-    },
-    "timeout": 120
-  },
-  {
-    "id": 12,
-    "timestamp": "2025-02-01T00:18:59.847433",
-    "source": "user",
-    "message": "Finish please",
-    "action": "message",
-    "args": {
-      "content": "Finish please",
-      "image_urls": null,
-      "wait_for_response": false
-    },
-    "timeout": 120
-  },
-  {
-    "id": 15,
-    "timestamp": "2025-02-01T00:19:01.168854",
-    "source": "agent",
-    "message": "All done! What's next on the agenda?",
-    "action": "finish",
-    "tool_call_metadata": {
-      "function_name": "finish",
-      "tool_call_id": "call_IpyczPgEAwtpRWFZ2RNokTdH",
-      "model_response": {
-        "id": "chatcmpl-Aw30O6Ka8n1X3F6F7YUh91NxXBE0y",
-        "created": 1738397940,
-        "model": "gpt-4o-2024-08-06",
-        "object": "chat.completion",
-        "system_fingerprint": "fp_4691090a87",
-        "choices": [
-          {
-            "finish_reason": "tool_calls",
-            "index": 0,
-            "message": {
-              "content": null,
-              "role": "assistant",
-              "tool_calls": [
-                {
-                  "function": {
-                    "arguments": "{}",
-                    "name": "finish"
-                  },
-                  "id": "call_IpyczPgEAwtpRWFZ2RNokTdH",
-                  "type": "function"
-                }
-              ],
-              "function_call": null
-            }
-          }
-        ],
-        "usage": {
-          "completion_tokens": 10,
-          "prompt_tokens": 2473,
-          "total_tokens": 2483,
-          "completion_tokens_details": {
-            "accepted_prediction_tokens": 0,
-            "audio_tokens": 0,
-            "reasoning_tokens": 0,
-            "rejected_prediction_tokens": 0
-          },
-          "prompt_tokens_details": {
-            "audio_tokens": 0,
-            "cached_tokens": 2304
-          }
-        },
-        "service_tier": "default"
-      },
-      "total_calls_in_response": 1
-    },
-    "args": {
-      "outputs": {},
-      "thought": ""
-    },
-    "timeout": 120
-  }
-]

tests/runtime/trajs/wrong_initial_state.json DELETED Viewed

@@ -1,454 +0,0 @@
-[
-  {
-    "id": 0,
-    "timestamp": "2025-01-27T23:16:50.239610",
-    "source": "user",
-    "message": "Please rename game_2048.py to 2048.py",
-    "action": "message",
-    "args": {
-      "content": "Please rename game_2048.py to 2048.py",
-      "image_urls": null,
-      "wait_for_response": false
-    },
-    "timeout": 120
-  },
-  {
-    "id": 3,
-    "timestamp": "2025-01-27T23:16:54.105108",
-    "source": "agent",
-    "message": "Reading file: /workspace",
-    "action": "read",
-    "tool_call_metadata": {
-      "function_name": "str_replace_editor",
-      "tool_call_id": "toolu_01USom24cuiyvAembLhEryrn",
-      "model_response": {
-        "id": "chatcmpl-90c35dea-eb25-484c-aaa7-d9d00048f64a",
-        "created": 1738048613,
-        "model": "claude-3-5-sonnet-20241022",
-        "object": "chat.completion",
-        "system_fingerprint": null,
-        "choices": [
-          {
-            "finish_reason": "tool_calls",
-            "index": 0,
-            "message": {
-              "content": "I'll help you rename the file. First, let me check if the file exists and its location.",
-              "role": "assistant",
-              "tool_calls": [
-                {
-                  "index": 1,
-                  "function": {
-                    "arguments": "{\"command\": \"view\", \"path\": \"/workspace\"}",
-                    "name": "str_replace_editor"
-                  },
-                  "id": "toolu_01USom24cuiyvAembLhEryrn",
-                  "type": "function"
-                }
-              ],
-              "function_call": null
-            }
-          }
-        ],
-        "usage": {
-          "completion_tokens": 95,
-          "prompt_tokens": 3475,
-          "total_tokens": 3570,
-          "completion_tokens_details": null,
-          "prompt_tokens_details": {
-            "audio_tokens": null,
-            "cached_tokens": 0
-          },
-          "cache_creation_input_tokens": 3471,
-          "cache_read_input_tokens": 0
-        },
-        "service_tier": null
-      },
-      "total_calls_in_response": 1
-    },
-    "args": {
-      "path": "/workspace",
-      "start": 0,
-      "end": -1,
-      "thought": "I'll help you rename the file. First, let me check if the file exists and its location.",
-      "impl_source": "oh_aci",
-      "translated_ipython_code": "print(file_editor(**{'command': 'view', 'path': '/workspace'}))"
-    },
-    "timeout": 120
-  },
-  {
-    "id": 4,
-    "timestamp": "2025-01-27T23:16:54.140581",
-    "source": "agent",
-    "message": "I read the file /workspace.",
-    "cause": 3,
-    "observation": "read",
-    "tool_call_metadata": {
-      "function_name": "str_replace_editor",
-      "tool_call_id": "toolu_01USom24cuiyvAembLhEryrn",
-      "model_response": {
-        "id": "chatcmpl-90c35dea-eb25-484c-aaa7-d9d00048f64a",
-        "created": 1738048613,
-        "model": "claude-3-5-sonnet-20241022",
-        "object": "chat.completion",
-        "system_fingerprint": null,
-        "choices": [
-          {
-            "finish_reason": "tool_calls",
-            "index": 0,
-            "message": {
-              "content": "I'll help you rename the file. First, let me check if the file exists and its location.",
-              "role": "assistant",
-              "tool_calls": [
-                {
-                  "index": 1,
-                  "function": {
-                    "arguments": "{\"command\": \"view\", \"path\": \"/workspace\"}",
-                    "name": "str_replace_editor"
-                  },
-                  "id": "toolu_01USom24cuiyvAembLhEryrn",
-                  "type": "function"
-                }
-              ],
-              "function_call": null
-            }
-          }
-        ],
-        "usage": {
-          "completion_tokens": 95,
-          "prompt_tokens": 3475,
-          "total_tokens": 3570,
-          "completion_tokens_details": null,
-          "prompt_tokens_details": {
-            "audio_tokens": null,
-            "cached_tokens": 0
-          },
-          "cache_creation_input_tokens": 3471,
-          "cache_read_input_tokens": 0
-        },
-        "service_tier": null
-      },
-      "total_calls_in_response": 1
-    },
-    "content": "Here's the files and directories up to 2 levels deep in /workspace, excluding hidden items:\n/workspace/\n/workspace/game_2048.py",
-    "extras": {
-      "path": "/workspace",
-      "impl_source": "oh_aci"
-    }
-  },
-  {
-    "id": 5,
-    "timestamp": "2025-01-27T23:16:56.951967",
-    "source": "agent",
-    "message": "Running command: mv /workspace/game_2048.py /workspace/2048.py",
-    "action": "run",
-    "tool_call_metadata": {
-      "function_name": "execute_bash",
-      "tool_call_id": "toolu_01PuY3mXrE6fm3j52GscR4rj",
-      "model_response": {
-        "id": "chatcmpl-1f2bc7b4-848a-45ec-a2d8-aff83e5adbd6",
-        "created": 1738048616,
-        "model": "claude-3-5-sonnet-20241022",
-        "object": "chat.completion",
-        "system_fingerprint": null,
-        "choices": [
-          {
-            "finish_reason": "tool_calls",
-            "index": 0,
-            "message": {
-              "content": "Great, I found the file. Now I'll use the bash command `mv` to rename it.",
-              "role": "assistant",
-              "tool_calls": [
-                {
-                  "index": 1,
-                  "function": {
-                    "arguments": "{\"command\": \"mv /workspace/game_2048.py /workspace/2048.py\", \"is_input\": \"false\"}",
-                    "name": "execute_bash"
-                  },
-                  "id": "toolu_01PuY3mXrE6fm3j52GscR4rj",
-                  "type": "function"
-                }
-              ],
-              "function_call": null
-            }
-          }
-        ],
-        "usage": {
-          "completion_tokens": 111,
-          "prompt_tokens": 3616,
-          "total_tokens": 3727,
-          "completion_tokens_details": null,
-          "prompt_tokens_details": {
-            "audio_tokens": null,
-            "cached_tokens": 3471
-          },
-          "cache_creation_input_tokens": 139,
-          "cache_read_input_tokens": 3471
-        },
-        "service_tier": null
-      },
-      "total_calls_in_response": 1
-    },
-    "args": {
-      "command": "mv /workspace/game_2048.py /workspace/2048.py",
-      "is_input": false,
-      "thought": "Great, I found the file. Now I'll use the bash command `mv` to rename it.",
-      "blocking": false,
-      "hidden": false,
-      "confirmation_state": "confirmed"
-    },
-    "timeout": 120
-  },
-  {
-    "id": 6,
-    "timestamp": "2025-01-27T23:16:57.600730",
-    "source": "agent",
-    "message": "Command `mv /workspace/game_2048.py /workspace/2048.py` executed with exit code 0.",
-    "cause": 5,
-    "observation": "run",
-    "tool_call_metadata": {
-      "function_name": "execute_bash",
-      "tool_call_id": "toolu_01PuY3mXrE6fm3j52GscR4rj",
-      "model_response": {
-        "id": "chatcmpl-1f2bc7b4-848a-45ec-a2d8-aff83e5adbd6",
-        "created": 1738048616,
-        "model": "claude-3-5-sonnet-20241022",
-        "object": "chat.completion",
-        "system_fingerprint": null,
-        "choices": [
-          {
-            "finish_reason": "tool_calls",
-            "index": 0,
-            "message": {
-              "content": "Great, I found the file. Now I'll use the bash command `mv` to rename it.",
-              "role": "assistant",
-              "tool_calls": [
-                {
-                  "index": 1,
-                  "function": {
-                    "arguments": "{\"command\": \"mv /workspace/game_2048.py /workspace/2048.py\", \"is_input\": \"false\"}",
-                    "name": "execute_bash"
-                  },
-                  "id": "toolu_01PuY3mXrE6fm3j52GscR4rj",
-                  "type": "function"
-                }
-              ],
-              "function_call": null
-            }
-          }
-        ],
-        "usage": {
-          "completion_tokens": 111,
-          "prompt_tokens": 3616,
-          "total_tokens": 3727,
-          "completion_tokens_details": null,
-          "prompt_tokens_details": {
-            "audio_tokens": null,
-            "cached_tokens": 3471
-          },
-          "cache_creation_input_tokens": 139,
-          "cache_read_input_tokens": 3471
-        },
-        "service_tier": null
-      },
-      "total_calls_in_response": 1
-    },
-    "content": "",
-    "extras": {
-      "command": "mv /workspace/game_2048.py /workspace/2048.py",
-      "metadata": {
-        "exit_code": 0,
-        "pid": -1,
-        "username": "openhands",
-        "hostname": "0301ebbced1e",
-        "working_dir": "/workspace",
-        "py_interpreter_path": "/openhands/poetry/openhands-ai-5O4_aCHf-py3.12/bin/python",
-        "prefix": "",
-        "suffix": "\n[The command completed with exit code 0.]"
-      },
-      "hidden": false
-    },
-    "success": true
-  },
-  {
-    "id": 7,
-    "timestamp": "2025-01-27T23:16:59.803261",
-    "source": "agent",
-    "message": "Reading file: /workspace",
-    "action": "read",
-    "tool_call_metadata": {
-      "function_name": "str_replace_editor",
-      "tool_call_id": "toolu_01DodVjoeVmEY3gU5ALbqmGj",
-      "model_response": {
-        "id": "chatcmpl-ea298ce5-bc1c-435d-b5f9-6eb6e6e864df",
-        "created": 1738048619,
-        "model": "claude-3-5-sonnet-20241022",
-        "object": "chat.completion",
-        "system_fingerprint": null,
-        "choices": [
-          {
-            "finish_reason": "tool_calls",
-            "index": 0,
-            "message": {
-              "content": "Let me verify that the file was renamed successfully:",
-              "role": "assistant",
-              "tool_calls": [
-                {
-                  "index": 1,
-                  "function": {
-                    "arguments": "{\"command\": \"view\", \"path\": \"/workspace\"}",
-                    "name": "str_replace_editor"
-                  },
-                  "id": "toolu_01DodVjoeVmEY3gU5ALbqmGj",
-                  "type": "function"
-                }
-              ],
-              "function_call": null
-            }
-          }
-        ],
-        "usage": {
-          "completion_tokens": 85,
-          "prompt_tokens": 3804,
-          "total_tokens": 3889,
-          "completion_tokens_details": null,
-          "prompt_tokens_details": {
-            "audio_tokens": null,
-            "cached_tokens": 3610
-          },
-          "cache_creation_input_tokens": 188,
-          "cache_read_input_tokens": 3610
-        },
-        "service_tier": null
-      },
-      "total_calls_in_response": 1
-    },
-    "args": {
-      "path": "/workspace",
-      "start": 0,
-      "end": -1,
-      "thought": "Let me verify that the file was renamed successfully:",
-      "impl_source": "oh_aci",
-      "translated_ipython_code": "print(file_editor(**{'command': 'view', 'path': '/workspace'}))"
-    },
-    "timeout": 120
-  },
-  {
-    "id": 8,
-    "timestamp": "2025-01-27T23:16:59.824084",
-    "source": "agent",
-    "message": "I read the file /workspace.",
-    "cause": 7,
-    "observation": "read",
-    "tool_call_metadata": {
-      "function_name": "str_replace_editor",
-      "tool_call_id": "toolu_01DodVjoeVmEY3gU5ALbqmGj",
-      "model_response": {
-        "id": "chatcmpl-ea298ce5-bc1c-435d-b5f9-6eb6e6e864df",
-        "created": 1738048619,
-        "model": "claude-3-5-sonnet-20241022",
-        "object": "chat.completion",
-        "system_fingerprint": null,
-        "choices": [
-          {
-            "finish_reason": "tool_calls",
-            "index": 0,
-            "message": {
-              "content": "Let me verify that the file was renamed successfully:",
-              "role": "assistant",
-              "tool_calls": [
-                {
-                  "index": 1,
-                  "function": {
-                    "arguments": "{\"command\": \"view\", \"path\": \"/workspace\"}",
-                    "name": "str_replace_editor"
-                  },
-                  "id": "toolu_01DodVjoeVmEY3gU5ALbqmGj",
-                  "type": "function"
-                }
-              ],
-              "function_call": null
-            }
-          }
-        ],
-        "usage": {
-          "completion_tokens": 85,
-          "prompt_tokens": 3804,
-          "total_tokens": 3889,
-          "completion_tokens_details": null,
-          "prompt_tokens_details": {
-            "audio_tokens": null,
-            "cached_tokens": 3610
-          },
-          "cache_creation_input_tokens": 188,
-          "cache_read_input_tokens": 3610
-        },
-        "service_tier": null
-      },
-      "total_calls_in_response": 1
-    },
-    "content": "Here's the files and directories up to 2 levels deep in /workspace, excluding hidden items:\n/workspace/\n/workspace/2048.py",
-    "extras": {
-      "path": "/workspace",
-      "impl_source": "oh_aci"
-    }
-  },
-  {
-    "id": 9,
-    "timestamp": "2025-01-27T23:17:02.182015",
-    "source": "agent",
-    "message": "The file has been successfully renamed from `game_2048.py` to `2048.py`. Is there anything else you need help with?",
-    "action": "finish",
-    "tool_call_metadata": {
-      "function_name": "finish",
-      "tool_call_id": "toolu_01G98EmHEC8HgktRjJBYXqEa",
-      "model_response": {
-        "id": "chatcmpl-fcf8f59b-beb3-4412-94d5-fbe4c02d64a1",
-        "created": 1738048621,
-        "model": "claude-3-5-sonnet-20241022",
-        "object": "chat.completion",
-        "system_fingerprint": null,
-        "choices": [
-          {
-            "finish_reason": "tool_calls",
-            "index": 0,
-            "message": {
-              "content": "The file has been successfully renamed from `game_2048.py` to `2048.py`. Is there anything else you need help with?",
-              "role": "assistant",
-              "tool_calls": [
-                {
-                  "index": 1,
-                  "function": {
-                    "arguments": "{}",
-                    "name": "finish"
-                  },
-                  "id": "toolu_01G98EmHEC8HgktRjJBYXqEa",
-                  "type": "function"
-                }
-              ],
-              "function_call": null
-            }
-          }
-        ],
-        "usage": {
-          "completion_tokens": 67,
-          "prompt_tokens": 3933,
-          "total_tokens": 4000,
-          "completion_tokens_details": null,
-          "prompt_tokens_details": {
-            "audio_tokens": null,
-            "cached_tokens": 3798
-          },
-          "cache_creation_input_tokens": 129,
-          "cache_read_input_tokens": 3798
-        },
-        "service_tier": null
-      },
-      "total_calls_in_response": 1
-    },
-    "args": {
-      "outputs": {},
-      "thought": "The file has been successfully renamed from `game_2048.py` to `2048.py`. Is there anything else you need help with?"
-    },
-    "timeout": 120
-  }
-]

tests/runtime/utils/test_system_stats.py DELETED Viewed

@@ -1,60 +0,0 @@
-"""Tests for system stats utilities."""
-import psutil
-from openhands.runtime.utils.system_stats import get_system_stats
-def test_get_system_stats():
-    """Test that get_system_stats returns valid system statistics."""
-    stats = get_system_stats()
-    # Test structure
-    assert isinstance(stats, dict)
-    assert set(stats.keys()) == {'cpu_percent', 'memory', 'disk', 'io'}
-    # Test CPU stats
-    assert isinstance(stats['cpu_percent'], float)
-    assert 0 <= stats['cpu_percent'] <= 100 * psutil.cpu_count()
-    # Test memory stats
-    assert isinstance(stats['memory'], dict)
-    assert set(stats['memory'].keys()) == {'rss', 'vms', 'percent'}
-    assert isinstance(stats['memory']['rss'], int)
-    assert isinstance(stats['memory']['vms'], int)
-    assert isinstance(stats['memory']['percent'], float)
-    assert stats['memory']['rss'] > 0
-    assert stats['memory']['vms'] > 0
-    assert 0 <= stats['memory']['percent'] <= 100
-    # Test disk stats
-    assert isinstance(stats['disk'], dict)
-    assert set(stats['disk'].keys()) == {'total', 'used', 'free', 'percent'}
-    assert isinstance(stats['disk']['total'], int)
-    assert isinstance(stats['disk']['used'], int)
-    assert isinstance(stats['disk']['free'], int)
-    assert isinstance(stats['disk']['percent'], float)
-    assert stats['disk']['total'] > 0
-    assert stats['disk']['used'] >= 0
-    assert stats['disk']['free'] >= 0
-    assert 0 <= stats['disk']['percent'] <= 100
-    # Verify that used + free is less than or equal to total
-    # (might not be exactly equal due to filesystem overhead)
-    assert stats['disk']['used'] + stats['disk']['free'] <= stats['disk']['total']
-    # Test I/O stats
-    assert isinstance(stats['io'], dict)
-    assert set(stats['io'].keys()) == {'read_bytes', 'write_bytes'}
-    assert isinstance(stats['io']['read_bytes'], int)
-    assert isinstance(stats['io']['write_bytes'], int)
-    assert stats['io']['read_bytes'] >= 0
-    assert stats['io']['write_bytes'] >= 0
-def test_get_system_stats_stability():
-    """Test that get_system_stats can be called multiple times without errors."""
-    # Call multiple times to ensure stability
-    for _ in range(3):
-        stats = get_system_stats()
-        assert isinstance(stats, dict)
-        assert stats['cpu_percent'] >= 0

tests/test_fileops.py DELETED Viewed

@@ -1,66 +0,0 @@
-from pathlib import Path
-import pytest
-from openhands.runtime.utils import files
-SANDBOX_PATH_PREFIX = '/workspace'
-CONTAINER_PATH = '/workspace'
-HOST_PATH = 'workspace'
-def test_resolve_path():
-    assert (
-        files.resolve_path('test.txt', '/workspace', HOST_PATH, CONTAINER_PATH)
-        == Path(HOST_PATH) / 'test.txt'
-    )
-    assert (
-        files.resolve_path('subdir/test.txt', '/workspace', HOST_PATH, CONTAINER_PATH)
-        == Path(HOST_PATH) / 'subdir' / 'test.txt'
-    )
-    assert (
-        files.resolve_path(
-            Path(SANDBOX_PATH_PREFIX) / 'test.txt',
-            '/workspace',
-            HOST_PATH,
-            CONTAINER_PATH,
-        )
-        == Path(HOST_PATH) / 'test.txt'
-    )
-    assert (
-        files.resolve_path(
-            Path(SANDBOX_PATH_PREFIX) / 'subdir' / 'test.txt',
-            '/workspace',
-            HOST_PATH,
-            CONTAINER_PATH,
-        )
-        == Path(HOST_PATH) / 'subdir' / 'test.txt'
-    )
-    assert (
-        files.resolve_path(
-            Path(SANDBOX_PATH_PREFIX) / 'subdir' / '..' / 'test.txt',
-            '/workspace',
-            HOST_PATH,
-            CONTAINER_PATH,
-        )
-        == Path(HOST_PATH) / 'test.txt'
-    )
-    with pytest.raises(PermissionError):
-        files.resolve_path(
-            Path(SANDBOX_PATH_PREFIX) / '..' / 'test.txt',
-            '/workspace',
-            HOST_PATH,
-            CONTAINER_PATH,
-        )
-    with pytest.raises(PermissionError):
-        files.resolve_path(
-            Path('..') / 'test.txt', '/workspace', HOST_PATH, CONTAINER_PATH
-        )
-    with pytest.raises(PermissionError):
-        files.resolve_path(
-            Path('/') / 'test.txt', '/workspace', HOST_PATH, CONTAINER_PATH
-        )
-    assert (
-        files.resolve_path('test.txt', '/workspace/test', HOST_PATH, CONTAINER_PATH)
-        == Path(HOST_PATH) / 'test' / 'test.txt'
-    )

tests/unit/README.md DELETED Viewed

@@ -1,29 +0,0 @@
-## Introduction
-This folder contains unit tests that could be run locally.
-Run all test:
-```bash
-poetry run pytest ./tests/unit
-```
-Run specific test file:
-```bash
-poetry run pytest ./tests/unit/test_llm_fncall_converter.py
-```
-Run specific unit test
-```bash
-poetry run pytest ./tests/unit/test_llm_fncall_converter.py::test_convert_tool_call_to_string
-```
-For a more verbose output, to above calls the `-v` flag can be used (even more verbose: `-vv` and `-vvv`):
-```bash
-poetry run pytest -v ./tests/unit/test_llm_fncall_converter.py
-```
-More details see [pytest doc](https://docs.pytest.org/en/latest/contents.html)

tests/unit/core/config/test_config_utils.py DELETED Viewed

@@ -1,168 +0,0 @@
-import pytest
-from openhands.core.config.agent_config import AgentConfig
-from openhands.core.config.openhands_config import OpenHandsConfig
-from openhands.core.config.utils import finalize_config
-# Define a dummy agent name often used in tests or as a default
-DEFAULT_AGENT_NAME = 'CodeActAgent'
-def test_finalize_config_cli_disables_jupyter_and_browsing_when_true():
-    """
-    Test that finalize_config sets enable_jupyter and enable_browsing to False
-    when runtime is 'cli' and they were initially True.
-    """
-    app_config = OpenHandsConfig()
-    app_config.runtime = 'cli'
-    agent_config = AgentConfig(enable_jupyter=True, enable_browsing=True)
-    app_config.agents[DEFAULT_AGENT_NAME] = agent_config
-    finalize_config(app_config)
-    assert not app_config.agents[DEFAULT_AGENT_NAME].enable_jupyter, (
-        "enable_jupyter should be False when runtime is 'cli'"
-    )
-    assert not app_config.agents[DEFAULT_AGENT_NAME].enable_browsing, (
-        "enable_browsing should be False when runtime is 'cli'"
-    )
-def test_finalize_config_cli_keeps_jupyter_and_browsing_false_when_false():
-    """
-    Test that finalize_config keeps enable_jupyter and enable_browsing as False
-    when runtime is 'cli' and they were initially False.
-    """
-    app_config = OpenHandsConfig()
-    app_config.runtime = 'cli'
-    agent_config = AgentConfig(enable_jupyter=False, enable_browsing=False)
-    app_config.agents[DEFAULT_AGENT_NAME] = agent_config
-    finalize_config(app_config)
-    assert not app_config.agents[DEFAULT_AGENT_NAME].enable_jupyter, (
-        "enable_jupyter should remain False when runtime is 'cli' and initially False"
-    )
-    assert not app_config.agents[DEFAULT_AGENT_NAME].enable_browsing, (
-        "enable_browsing should remain False when runtime is 'cli' and initially False"
-    )
-def test_finalize_config_other_runtime_keeps_jupyter_and_browsing_true_by_default():
-    """
-    Test that finalize_config keeps enable_jupyter and enable_browsing as True (default)
-    when runtime is not 'cli'.
-    """
-    app_config = OpenHandsConfig()
-    app_config.runtime = 'docker'  # A non-cli runtime
-    # AgentConfig defaults enable_jupyter and enable_browsing to True
-    agent_config = AgentConfig()
-    app_config.agents[DEFAULT_AGENT_NAME] = agent_config
-    finalize_config(app_config)
-    assert app_config.agents[DEFAULT_AGENT_NAME].enable_jupyter, (
-        'enable_jupyter should remain True by default for non-cli runtimes'
-    )
-    assert app_config.agents[DEFAULT_AGENT_NAME].enable_browsing, (
-        'enable_browsing should remain True by default for non-cli runtimes'
-    )
-def test_finalize_config_other_runtime_keeps_jupyter_and_browsing_false_if_set():
-    """
-    Test that finalize_config keeps enable_jupyter and enable_browsing as False
-    when runtime is not 'cli' but they were explicitly set to False.
-    """
-    app_config = OpenHandsConfig()
-    app_config.runtime = 'docker'  # A non-cli runtime
-    agent_config = AgentConfig(enable_jupyter=False, enable_browsing=False)
-    app_config.agents[DEFAULT_AGENT_NAME] = agent_config
-    finalize_config(app_config)
-    assert not app_config.agents[DEFAULT_AGENT_NAME].enable_jupyter, (
-        'enable_jupyter should remain False for non-cli runtimes if explicitly set to False'
-    )
-    assert not app_config.agents[DEFAULT_AGENT_NAME].enable_browsing, (
-        'enable_browsing should remain False for non-cli runtimes if explicitly set to False'
-    )
-def test_finalize_config_no_agents_defined():
-    """
-    Test that finalize_config runs without error if no agents are defined in the config,
-    even when runtime is 'cli'.
-    """
-    app_config = OpenHandsConfig()
-    app_config.runtime = 'cli'
-    # No agents are added to app_config.agents
-    try:
-        finalize_config(app_config)
-    except Exception as e:
-        pytest.fail(f'finalize_config raised an exception with no agents defined: {e}')
-def test_finalize_config_multiple_agents_cli_runtime():
-    """
-    Test that finalize_config correctly disables jupyter and browsing for multiple agents
-    when runtime is 'cli'.
-    """
-    app_config = OpenHandsConfig()
-    app_config.runtime = 'cli'
-    agent_config1 = AgentConfig(enable_jupyter=True, enable_browsing=True)
-    agent_config2 = AgentConfig(enable_jupyter=True, enable_browsing=True)
-    app_config.agents['Agent1'] = agent_config1
-    app_config.agents['Agent2'] = agent_config2
-    finalize_config(app_config)
-    assert not app_config.agents['Agent1'].enable_jupyter, (
-        'Jupyter should be disabled for Agent1'
-    )
-    assert not app_config.agents['Agent1'].enable_browsing, (
-        'Browsing should be disabled for Agent1'
-    )
-    assert not app_config.agents['Agent2'].enable_jupyter, (
-        'Jupyter should be disabled for Agent2'
-    )
-    assert not app_config.agents['Agent2'].enable_browsing, (
-        'Browsing should be disabled for Agent2'
-    )
-def test_finalize_config_multiple_agents_other_runtime():
-    """
-    Test that finalize_config correctly keeps jupyter and browsing enabled (or as set)
-    for multiple agents when runtime is not 'cli'.
-    """
-    app_config = OpenHandsConfig()
-    app_config.runtime = 'docker'
-    agent_config1 = AgentConfig(enable_jupyter=True, enable_browsing=True)  # Defaults
-    agent_config2 = AgentConfig(
-        enable_jupyter=False, enable_browsing=False
-    )  # Explicitly false
-    app_config.agents['Agent1'] = agent_config1
-    app_config.agents['Agent2'] = agent_config2
-    finalize_config(app_config)
-    assert app_config.agents['Agent1'].enable_jupyter, (
-        'Jupyter should be True for Agent1'
-    )
-    assert app_config.agents['Agent1'].enable_browsing, (
-        'Browsing should be True for Agent1'
-    )
-    assert not app_config.agents['Agent2'].enable_jupyter, (
-        'Jupyter should be False for Agent2'
-    )
-    assert not app_config.agents['Agent2'].enable_browsing, (
-        'Browsing should be False for Agent2'
-    )

tests/unit/frontend/test_translation_completeness.py DELETED Viewed

@@ -1,33 +0,0 @@
-"""Test that the translation completeness check works correctly."""
-import os
-import subprocess
-import unittest
-class TestTranslationCompleteness(unittest.TestCase):
-    """Test that the translation completeness check works correctly."""
-    def test_translation_completeness_check_runs(self):
-        """Test that the translation completeness check script can be executed."""
-        frontend_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), "frontend")
-        script_path = os.path.join(frontend_dir, "scripts", "check-translation-completeness.cjs")
-        # Verify the script exists
-        self.assertTrue(os.path.exists(script_path), f"Script not found at {script_path}")
-        # Verify the script is executable
-        self.assertTrue(os.access(script_path, os.X_OK), f"Script at {script_path} is not executable")
-        # Run the script (it may fail due to missing translations, but we just want to verify it runs)
-        try:
-            subprocess.run(
-                ["node", script_path],
-                cwd=frontend_dir,
-                check=False,
-                capture_output=True,
-                text=True
-            )
-            # We don't assert on the return code because it might fail due to missing translations
-        except Exception as e:
-            self.fail(f"Failed to run translation completeness check: {e}")

tests/unit/resolver/github/test_guess_success.py DELETED Viewed

@@ -1,202 +0,0 @@
-import json
-from unittest.mock import MagicMock, patch
-from openhands.core.config import LLMConfig
-from openhands.events.action.message import MessageAction
-from openhands.llm import LLM
-from openhands.resolver.interfaces.github import GithubIssueHandler, GithubPRHandler
-from openhands.resolver.interfaces.issue import Issue
-from openhands.resolver.interfaces.issue_definitions import (
-    ServiceContextIssue,
-    ServiceContextPR,
-)
-def test_guess_success_multiline_explanation():
-    # Mock data
-    issue = Issue(
-        owner='test',
-        repo='test',
-        number=1,
-        title='Test Issue',
-        body='Test body',
-        thread_comments=None,
-        review_comments=None,
-    )
-    history = [MessageAction(content='Test message')]
-    llm_config = LLMConfig(model='test', api_key='test')
-    # Create a mock response with multi-line explanation
-    mock_response = MagicMock()
-    mock_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content="""--- success
-true
---- explanation
-The PR successfully addressed the issue by:
-- Fixed bug A
-- Added test B
-- Updated documentation C
-Automatic fix generated by OpenHands 🙌"""
-            )
-        )
-    ]
-    # Use patch to mock the LLM completion call
-    with patch.object(LLM, 'completion', return_value=mock_response) as mock_completion:
-        # Create a handler instance
-        handler = ServiceContextIssue(
-            GithubIssueHandler('test', 'test', 'test'), llm_config
-        )
-        # Call guess_success
-        success, _, explanation = handler.guess_success(issue, history)
-        # Verify the results
-        assert success is True
-        assert 'The PR successfully addressed the issue by:' in explanation
-        assert 'Fixed bug A' in explanation
-        assert 'Added test B' in explanation
-        assert 'Updated documentation C' in explanation
-        assert 'Automatic fix generated by OpenHands' in explanation
-        # Verify that LLM completion was called exactly once
-        mock_completion.assert_called_once()
-def test_pr_handler_guess_success_with_thread_comments():
-    # Create a PR handler instance
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(GithubPRHandler('test', 'test', 'test'), llm_config)
-    # Create a mock issue with thread comments but no review comments
-    issue = Issue(
-        owner='test-owner',
-        repo='test-repo',
-        number=1,
-        title='Test PR',
-        body='Test Body',
-        thread_comments=['First comment', 'Second comment'],
-        closing_issues=['Issue description'],
-        review_comments=None,
-        thread_ids=None,
-        head_branch='test-branch',
-    )
-    # Create mock history
-    history = [MessageAction(content='Fixed the issue by implementing X and Y')]
-    # Create mock LLM config
-    llm_config = LLMConfig(model='test-model', api_key='test-key')
-    # Mock the LLM response
-    mock_response = MagicMock()
-    mock_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content="""--- success
-true
---- explanation
-The changes successfully address the feedback."""
-            )
-        )
-    ]
-    # Test the guess_success method
-    with patch.object(LLM, 'completion', return_value=mock_response):
-        success, success_list, explanation = handler.guess_success(issue, history)
-        # Verify the results
-        assert success is True
-        assert success_list == [True]
-        assert 'successfully address' in explanation
-        assert len(json.loads(explanation)) == 1
-def test_pr_handler_guess_success_only_review_comments():
-    # Create a PR handler instance
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Create a mock issue with only review comments
-    issue = Issue(
-        owner='test-owner',
-        repo='test-repo',
-        number=1,
-        title='Test PR',
-        body='Test Body',
-        thread_comments=None,
-        closing_issues=['Issue description'],
-        review_comments=['Please fix the formatting', 'Add more tests'],
-        thread_ids=None,
-        head_branch='test-branch',
-    )
-    # Create mock history
-    history = [MessageAction(content='Fixed the formatting and added more tests')]
-    # Create mock LLM config
-    llm_config = LLMConfig(model='test-model', api_key='test-key')
-    # Mock the LLM response
-    mock_response = MagicMock()
-    mock_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content="""--- success
-true
---- explanation
-The changes successfully address the review comments."""
-            )
-        )
-    ]
-    # Test the guess_success method
-    with patch.object(LLM, 'completion', return_value=mock_response):
-        success, success_list, explanation = handler.guess_success(issue, history)
-        # Verify the results
-        assert success is True
-        assert success_list == [True]
-        assert (
-            '["The changes successfully address the review comments."]' in explanation
-        )
-def test_pr_handler_guess_success_no_comments():
-    # Create a PR handler instance
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(GithubPRHandler('test', 'test', 'test'), llm_config)
-    # Create a mock issue with no comments
-    issue = Issue(
-        owner='test-owner',
-        repo='test-repo',
-        number=1,
-        title='Test PR',
-        body='Test Body',
-        thread_comments=None,
-        closing_issues=['Issue description'],
-        review_comments=None,
-        thread_ids=None,
-        head_branch='test-branch',
-    )
-    # Create mock history
-    history = [MessageAction(content='Fixed the issue')]
-    # Create mock LLM config
-    llm_config = LLMConfig(model='test-model', api_key='test-key')
-    # Test that it returns appropriate message when no comments are present
-    success, success_list, explanation = handler.guess_success(issue, history)
-    assert success is False
-    assert success_list is None
-    assert explanation == 'No feedback was found to process'

tests/unit/resolver/github/test_issue_handler.py DELETED Viewed

@@ -1,645 +0,0 @@
-from unittest.mock import MagicMock, patch
-from openhands.core.config import LLMConfig
-from openhands.resolver.interfaces.github import GithubIssueHandler, GithubPRHandler
-from openhands.resolver.interfaces.issue import ReviewThread
-from openhands.resolver.interfaces.issue_definitions import (
-    ServiceContextIssue,
-    ServiceContextPR,
-)
-def test_get_converted_issues_initializes_review_comments():
-    # Mock the necessary dependencies
-    with patch('httpx.get') as mock_get:
-        # Mock the response for issues
-        mock_issues_response = MagicMock()
-        mock_issues_response.json.return_value = [
-            {'number': 1, 'title': 'Test Issue', 'body': 'Test Body'}
-        ]
-        # Mock the response for comments
-        mock_comments_response = MagicMock()
-        mock_comments_response.json.return_value = []
-        # Set up the mock to return different responses for different calls
-        # First call is for issues, second call is for comments
-        mock_get.side_effect = [
-            mock_issues_response,
-            mock_comments_response,
-            mock_comments_response,
-        ]  # Need two comment responses because we make two API calls
-        # Create an instance of IssueHandler
-        llm_config = LLMConfig(model='test', api_key='test')
-        handler = ServiceContextIssue(
-            GithubIssueHandler('test-owner', 'test-repo', 'test-token'), llm_config
-        )
-        # Get converted issues
-        issues = handler.get_converted_issues(issue_numbers=[1])
-        # Verify that we got exactly one issue
-        assert len(issues) == 1
-        # Verify that review_comments is initialized as None
-        assert issues[0].review_comments is None
-        # Verify other fields are set correctly
-        assert issues[0].number == 1
-        assert issues[0].title == 'Test Issue'
-        assert issues[0].body == 'Test Body'
-        assert issues[0].owner == 'test-owner'
-        assert issues[0].repo == 'test-repo'
-def test_get_converted_issues_handles_empty_body():
-    # Mock the necessary dependencies
-    with patch('httpx.get') as mock_get:
-        # Mock the response for issues
-        mock_issues_response = MagicMock()
-        mock_issues_response.json.return_value = [
-            {'number': 1, 'title': 'Test Issue', 'body': None}
-        ]
-        # Mock the response for comments
-        mock_comments_response = MagicMock()
-        mock_comments_response.json.return_value = []
-        # Set up the mock to return different responses
-        mock_get.side_effect = [
-            mock_issues_response,
-            mock_comments_response,
-            mock_comments_response,
-        ]
-        # Create an instance of IssueHandler
-        llm_config = LLMConfig(model='test', api_key='test')
-        handler = ServiceContextIssue(
-            GithubIssueHandler('test-owner', 'test-repo', 'test-token'), llm_config
-        )
-        # Get converted issues
-        issues = handler.get_converted_issues(issue_numbers=[1])
-        # Verify that we got exactly one issue
-        assert len(issues) == 1
-        # Verify that body is empty string when None
-        assert issues[0].body == ''
-        # Verify other fields are set correctly
-        assert issues[0].number == 1
-        assert issues[0].title == 'Test Issue'
-        assert issues[0].owner == 'test-owner'
-        assert issues[0].repo == 'test-repo'
-        # Verify that review_comments is initialized as None
-        assert issues[0].review_comments is None
-def test_pr_handler_get_converted_issues_with_comments():
-    # Mock the necessary dependencies
-    with patch('httpx.get') as mock_get:
-        # Mock the response for PRs
-        mock_prs_response = MagicMock()
-        mock_prs_response.json.return_value = [
-            {
-                'number': 1,
-                'title': 'Test PR',
-                'body': 'Test Body fixes #1',
-                'head': {'ref': 'test-branch'},
-            }
-        ]
-        # Mock the response for PR comments
-        mock_comments_response = MagicMock()
-        mock_comments_response.json.return_value = [
-            {'body': 'First comment'},
-            {'body': 'Second comment'},
-        ]
-        # Mock the response for PR metadata (GraphQL)
-        mock_graphql_response = MagicMock()
-        mock_graphql_response.json.return_value = {
-            'data': {
-                'repository': {
-                    'pullRequest': {
-                        'closingIssuesReferences': {'edges': []},
-                        'reviews': {'nodes': []},
-                        'reviewThreads': {'edges': []},
-                    }
-                }
-            }
-        }
-        # Set up the mock to return different responses
-        # We need to return empty responses for subsequent pages
-        mock_empty_response = MagicMock()
-        mock_empty_response.json.return_value = []
-        # Mock the response for fetching the external issue referenced in PR body
-        mock_external_issue_response = MagicMock()
-        mock_external_issue_response.json.return_value = {
-            'body': 'This is additional context from an externally referenced issue.'
-        }
-        mock_get.side_effect = [
-            mock_prs_response,  # First call for PRs
-            mock_empty_response,  # Second call for PRs (empty page)
-            mock_comments_response,  # Third call for PR comments
-            mock_empty_response,  # Fourth call for PR comments (empty page)
-            mock_external_issue_response,  # Mock response for the external issue reference #1
-        ]
-        # Mock the post request for GraphQL
-        with patch('httpx.post') as mock_post:
-            mock_post.return_value = mock_graphql_response
-            # Create an instance of PRHandler
-            llm_config = LLMConfig(model='test', api_key='test')
-            handler = ServiceContextPR(
-                GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-            )
-            # Get converted issues
-            prs = handler.get_converted_issues(issue_numbers=[1])
-            # Verify that we got exactly one PR
-            assert len(prs) == 1
-            # Verify that thread_comments are set correctly
-            assert prs[0].thread_comments == ['First comment', 'Second comment']
-            # Verify other fields are set correctly
-            assert prs[0].number == 1
-            assert prs[0].title == 'Test PR'
-            assert prs[0].body == 'Test Body fixes #1'
-            assert prs[0].owner == 'test-owner'
-            assert prs[0].repo == 'test-repo'
-            assert prs[0].head_branch == 'test-branch'
-            assert prs[0].closing_issues == [
-                'This is additional context from an externally referenced issue.'
-            ]
-def test_get_issue_comments_with_specific_comment_id():
-    # Mock the necessary dependencies
-    with patch('httpx.get') as mock_get:
-        # Mock the response for comments
-        mock_comments_response = MagicMock()
-        mock_comments_response.json.return_value = [
-            {'id': 123, 'body': 'First comment'},
-            {'id': 456, 'body': 'Second comment'},
-        ]
-        mock_get.return_value = mock_comments_response
-        # Create an instance of IssueHandler
-        llm_config = LLMConfig(model='test', api_key='test')
-        handler = ServiceContextIssue(
-            GithubIssueHandler('test-owner', 'test-repo', 'test-token'), llm_config
-        )
-        # Get comments with a specific comment_id
-        specific_comment = handler.get_issue_comments(issue_number=1, comment_id=123)
-        # Verify only the specific comment is returned
-        assert specific_comment == ['First comment']
-def test_pr_handler_get_converted_issues_with_specific_thread_comment():
-    # Define the specific comment_id to filter
-    specific_comment_id = 123
-    # Mock GraphQL response for review threads
-    with patch('httpx.get') as mock_get:
-        # Mock the response for PRs
-        mock_prs_response = MagicMock()
-        mock_prs_response.json.return_value = [
-            {
-                'number': 1,
-                'title': 'Test PR',
-                'body': 'Test Body',
-                'head': {'ref': 'test-branch'},
-            }
-        ]
-        # Mock the response for PR comments
-        mock_comments_response = MagicMock()
-        mock_comments_response.json.return_value = [
-            {'body': 'First comment', 'id': 123},
-            {'body': 'Second comment', 'id': 124},
-        ]
-        # Mock the response for PR metadata (GraphQL)
-        mock_graphql_response = MagicMock()
-        mock_graphql_response.json.return_value = {
-            'data': {
-                'repository': {
-                    'pullRequest': {
-                        'closingIssuesReferences': {'edges': []},
-                        'reviews': {'nodes': []},
-                        'reviewThreads': {
-                            'edges': [
-                                {
-                                    'node': {
-                                        'id': 'review-thread-1',
-                                        'isResolved': False,
-                                        'comments': {
-                                            'nodes': [
-                                                {
-                                                    'fullDatabaseId': 121,
-                                                    'body': 'Specific review comment',
-                                                    'path': 'file1.txt',
-                                                },
-                                                {
-                                                    'fullDatabaseId': 456,
-                                                    'body': 'Another review comment',
-                                                    'path': 'file2.txt',
-                                                },
-                                            ]
-                                        },
-                                    }
-                                }
-                            ]
-                        },
-                    }
-                }
-            }
-        }
-        # Set up the mock to return different responses
-        # We need to return empty responses for subsequent pages
-        mock_empty_response = MagicMock()
-        mock_empty_response.json.return_value = []
-        mock_get.side_effect = [
-            mock_prs_response,  # First call for PRs
-            mock_empty_response,  # Second call for PRs (empty page)
-            mock_comments_response,  # Third call for PR comments
-            mock_empty_response,  # Fourth call for PR comments (empty page)
-        ]
-        # Mock the post request for GraphQL
-        with patch('httpx.post') as mock_post:
-            mock_post.return_value = mock_graphql_response
-            # Create an instance of PRHandler
-            llm_config = LLMConfig(model='test', api_key='test')
-            handler = ServiceContextPR(
-                GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-            )
-            # Get converted issues
-            prs = handler.get_converted_issues(
-                issue_numbers=[1], comment_id=specific_comment_id
-            )
-            # Verify that we got exactly one PR
-            assert len(prs) == 1
-            # Verify that thread_comments are set correctly
-            assert prs[0].thread_comments == ['First comment']
-            assert prs[0].review_comments == []
-            assert prs[0].review_threads == []
-            # Verify other fields are set correctly
-            assert prs[0].number == 1
-            assert prs[0].title == 'Test PR'
-            assert prs[0].body == 'Test Body'
-            assert prs[0].owner == 'test-owner'
-            assert prs[0].repo == 'test-repo'
-            assert prs[0].head_branch == 'test-branch'
-def test_pr_handler_get_converted_issues_with_specific_review_thread_comment():
-    # Define the specific comment_id to filter
-    specific_comment_id = 123
-    # Mock GraphQL response for review threads
-    with patch('httpx.get') as mock_get:
-        # Mock the response for PRs
-        mock_prs_response = MagicMock()
-        mock_prs_response.json.return_value = [
-            {
-                'number': 1,
-                'title': 'Test PR',
-                'body': 'Test Body',
-                'head': {'ref': 'test-branch'},
-            }
-        ]
-        # Mock the response for PR comments
-        mock_comments_response = MagicMock()
-        mock_comments_response.json.return_value = [
-            {'body': 'First comment', 'id': 120},
-            {'body': 'Second comment', 'id': 124},
-        ]
-        # Mock the response for PR metadata (GraphQL)
-        mock_graphql_response = MagicMock()
-        mock_graphql_response.json.return_value = {
-            'data': {
-                'repository': {
-                    'pullRequest': {
-                        'closingIssuesReferences': {'edges': []},
-                        'reviews': {'nodes': []},
-                        'reviewThreads': {
-                            'edges': [
-                                {
-                                    'node': {
-                                        'id': 'review-thread-1',
-                                        'isResolved': False,
-                                        'comments': {
-                                            'nodes': [
-                                                {
-                                                    'fullDatabaseId': specific_comment_id,
-                                                    'body': 'Specific review comment',
-                                                    'path': 'file1.txt',
-                                                },
-                                                {
-                                                    'fullDatabaseId': 456,
-                                                    'body': 'Another review comment',
-                                                    'path': 'file1.txt',
-                                                },
-                                            ]
-                                        },
-                                    }
-                                }
-                            ]
-                        },
-                    }
-                }
-            }
-        }
-        # Set up the mock to return different responses
-        # We need to return empty responses for subsequent pages
-        mock_empty_response = MagicMock()
-        mock_empty_response.json.return_value = []
-        mock_get.side_effect = [
-            mock_prs_response,  # First call for PRs
-            mock_empty_response,  # Second call for PRs (empty page)
-            mock_comments_response,  # Third call for PR comments
-            mock_empty_response,  # Fourth call for PR comments (empty page)
-        ]
-        # Mock the post request for GraphQL
-        with patch('httpx.post') as mock_post:
-            mock_post.return_value = mock_graphql_response
-            # Create an instance of PRHandler
-            llm_config = LLMConfig(model='test', api_key='test')
-            handler = ServiceContextPR(
-                GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-            )
-            # Get converted issues
-            prs = handler.get_converted_issues(
-                issue_numbers=[1], comment_id=specific_comment_id
-            )
-            # Verify that we got exactly one PR
-            assert len(prs) == 1
-            # Verify that thread_comments are set correctly
-            assert prs[0].thread_comments is None
-            assert prs[0].review_comments == []
-            assert len(prs[0].review_threads) == 1
-            assert isinstance(prs[0].review_threads[0], ReviewThread)
-            assert (
-                prs[0].review_threads[0].comment
-                == 'Specific review comment\n---\nlatest feedback:\nAnother review comment\n'
-            )
-            assert prs[0].review_threads[0].files == ['file1.txt']
-            # Verify other fields are set correctly
-            assert prs[0].number == 1
-            assert prs[0].title == 'Test PR'
-            assert prs[0].body == 'Test Body'
-            assert prs[0].owner == 'test-owner'
-            assert prs[0].repo == 'test-repo'
-            assert prs[0].head_branch == 'test-branch'
-def test_pr_handler_get_converted_issues_with_specific_comment_and_issue_refs():
-    # Define the specific comment_id to filter
-    specific_comment_id = 123
-    # Mock GraphQL response for review threads
-    with patch('httpx.get') as mock_get:
-        # Mock the response for PRs
-        mock_prs_response = MagicMock()
-        mock_prs_response.json.return_value = [
-            {
-                'number': 1,
-                'title': 'Test PR fixes #3',
-                'body': 'Test Body',
-                'head': {'ref': 'test-branch'},
-            }
-        ]
-        # Mock the response for PR comments
-        mock_comments_response = MagicMock()
-        mock_comments_response.json.return_value = [
-            {'body': 'First comment', 'id': 120},
-            {'body': 'Second comment', 'id': 124},
-        ]
-        # Mock the response for PR metadata (GraphQL)
-        mock_graphql_response = MagicMock()
-        mock_graphql_response.json.return_value = {
-            'data': {
-                'repository': {
-                    'pullRequest': {
-                        'closingIssuesReferences': {'edges': []},
-                        'reviews': {'nodes': []},
-                        'reviewThreads': {
-                            'edges': [
-                                {
-                                    'node': {
-                                        'id': 'review-thread-1',
-                                        'isResolved': False,
-                                        'comments': {
-                                            'nodes': [
-                                                {
-                                                    'fullDatabaseId': specific_comment_id,
-                                                    'body': 'Specific review comment that references #6',
-                                                    'path': 'file1.txt',
-                                                },
-                                                {
-                                                    'fullDatabaseId': 456,
-                                                    'body': 'Another review comment referencing #7',
-                                                    'path': 'file2.txt',
-                                                },
-                                            ]
-                                        },
-                                    }
-                                }
-                            ]
-                        },
-                    }
-                }
-            }
-        }
-        # Set up the mock to return different responses
-        # We need to return empty responses for subsequent pages
-        mock_empty_response = MagicMock()
-        mock_empty_response.json.return_value = []
-        # Mock the response for fetching the external issue referenced in PR body
-        mock_external_issue_response_in_body = MagicMock()
-        mock_external_issue_response_in_body.json.return_value = {
-            'body': 'External context #1.'
-        }
-        # Mock the response for fetching the external issue referenced in review thread
-        mock_external_issue_response_review_thread = MagicMock()
-        mock_external_issue_response_review_thread.json.return_value = {
-            'body': 'External context #2.'
-        }
-        mock_get.side_effect = [
-            mock_prs_response,  # First call for PRs
-            mock_empty_response,  # Second call for PRs (empty page)
-            mock_comments_response,  # Third call for PR comments
-            mock_empty_response,  # Fourth call for PR comments (empty page)
-            mock_external_issue_response_in_body,
-            mock_external_issue_response_review_thread,
-        ]
-        # Mock the post request for GraphQL
-        with patch('httpx.post') as mock_post:
-            mock_post.return_value = mock_graphql_response
-            # Create an instance of PRHandler
-            llm_config = LLMConfig(model='test', api_key='test')
-            handler = ServiceContextPR(
-                GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-            )
-            # Get converted issues
-            prs = handler.get_converted_issues(
-                issue_numbers=[1], comment_id=specific_comment_id
-            )
-            # Verify that we got exactly one PR
-            assert len(prs) == 1
-            # Verify that thread_comments are set correctly
-            assert prs[0].thread_comments is None
-            assert prs[0].review_comments == []
-            assert len(prs[0].review_threads) == 1
-            assert isinstance(prs[0].review_threads[0], ReviewThread)
-            assert (
-                prs[0].review_threads[0].comment
-                == 'Specific review comment that references #6\n---\nlatest feedback:\nAnother review comment referencing #7\n'
-            )
-            assert prs[0].closing_issues == [
-                'External context #1.',
-                'External context #2.',
-            ]  # Only includes references inside comment ID and body PR
-            # Verify other fields are set correctly
-            assert prs[0].number == 1
-            assert prs[0].title == 'Test PR fixes #3'
-            assert prs[0].body == 'Test Body'
-            assert prs[0].owner == 'test-owner'
-            assert prs[0].repo == 'test-repo'
-            assert prs[0].head_branch == 'test-branch'
-def test_pr_handler_get_converted_issues_with_duplicate_issue_refs():
-    # Mock the necessary dependencies
-    with patch('httpx.get') as mock_get:
-        # Mock the response for PRs
-        mock_prs_response = MagicMock()
-        mock_prs_response.json.return_value = [
-            {
-                'number': 1,
-                'title': 'Test PR',
-                'body': 'Test Body fixes #1',
-                'head': {'ref': 'test-branch'},
-            }
-        ]
-        # Mock the response for PR comments
-        mock_comments_response = MagicMock()
-        mock_comments_response.json.return_value = [
-            {'body': 'First comment addressing #1'},
-            {'body': 'Second comment addressing #2'},
-        ]
-        # Mock the response for PR metadata (GraphQL)
-        mock_graphql_response = MagicMock()
-        mock_graphql_response.json.return_value = {
-            'data': {
-                'repository': {
-                    'pullRequest': {
-                        'closingIssuesReferences': {'edges': []},
-                        'reviews': {'nodes': []},
-                        'reviewThreads': {'edges': []},
-                    }
-                }
-            }
-        }
-        # Set up the mock to return different responses
-        # We need to return empty responses for subsequent pages
-        mock_empty_response = MagicMock()
-        mock_empty_response.json.return_value = []
-        # Mock the response for fetching the external issue referenced in PR body
-        mock_external_issue_response_in_body = MagicMock()
-        mock_external_issue_response_in_body.json.return_value = {
-            'body': 'External context #1.'
-        }
-        # Mock the response for fetching the external issue referenced in review thread
-        mock_external_issue_response_in_comment = MagicMock()
-        mock_external_issue_response_in_comment.json.return_value = {
-            'body': 'External context #2.'
-        }
-        mock_get.side_effect = [
-            mock_prs_response,  # First call for PRs
-            mock_empty_response,  # Second call for PRs (empty page)
-            mock_comments_response,  # Third call for PR comments
-            mock_empty_response,  # Fourth call for PR comments (empty page)
-            mock_external_issue_response_in_body,  # Mock response for the external issue reference #1
-            mock_external_issue_response_in_comment,
-        ]
-        # Mock the post request for GraphQL
-        with patch('httpx.post') as mock_post:
-            mock_post.return_value = mock_graphql_response
-            # Create an instance of PRHandler
-            llm_config = LLMConfig(model='test', api_key='test')
-            handler = ServiceContextPR(
-                GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-            )
-            # Get converted issues
-            prs = handler.get_converted_issues(issue_numbers=[1])
-            # Verify that we got exactly one PR
-            assert len(prs) == 1
-            # Verify that thread_comments are set correctly
-            assert prs[0].thread_comments == [
-                'First comment addressing #1',
-                'Second comment addressing #2',
-            ]
-            # Verify other fields are set correctly
-            assert prs[0].number == 1
-            assert prs[0].title == 'Test PR'
-            assert prs[0].body == 'Test Body fixes #1'
-            assert prs[0].owner == 'test-owner'
-            assert prs[0].repo == 'test-repo'
-            assert prs[0].head_branch == 'test-branch'
-            assert prs[0].closing_issues == [
-                'External context #1.',
-                'External context #2.',
-            ]

tests/unit/resolver/github/test_issue_handler_error_handling.py DELETED Viewed

@@ -1,281 +0,0 @@
-from unittest.mock import MagicMock, patch
-import httpx
-import pytest
-from litellm.exceptions import RateLimitError
-from openhands.core.config import LLMConfig
-from openhands.events.action.message import MessageAction
-from openhands.llm.llm import LLM
-from openhands.resolver.interfaces.github import GithubIssueHandler, GithubPRHandler
-from openhands.resolver.interfaces.issue import Issue
-from openhands.resolver.interfaces.issue_definitions import (
-    ServiceContextIssue,
-    ServiceContextPR,
-)
-@pytest.fixture(autouse=True)
-def mock_logger(monkeypatch):
-    # suppress logging of completion data to file
-    mock_logger = MagicMock()
-    monkeypatch.setattr('openhands.llm.debug_mixin.llm_prompt_logger', mock_logger)
-    monkeypatch.setattr('openhands.llm.debug_mixin.llm_response_logger', mock_logger)
-    return mock_logger
-@pytest.fixture
-def default_config():
-    return LLMConfig(
-        model='gpt-4o',
-        api_key='test_key',
-        num_retries=2,
-        retry_min_wait=1,
-        retry_max_wait=2,
-    )
-def test_handle_nonexistent_issue_reference():
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Mock the requests.get to simulate a 404 error
-    mock_response = MagicMock()
-    mock_response.raise_for_status.side_effect = httpx.HTTPError(
-        '404 Client Error: Not Found'
-    )
-    with patch('httpx.get', return_value=mock_response):
-        # Call the method with a non-existent issue reference
-        result = handler._strategy.get_context_from_external_issues_references(
-            closing_issues=[],
-            closing_issue_numbers=[],
-            issue_body='This references #999999',  # Non-existent issue
-            review_comments=[],
-            review_threads=[],
-            thread_comments=None,
-        )
-        # The method should return an empty list since the referenced issue couldn't be fetched
-        assert result == []
-def test_handle_rate_limit_error():
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Mock the requests.get to simulate a rate limit error
-    mock_response = MagicMock()
-    mock_response.raise_for_status.side_effect = httpx.HTTPError(
-        '403 Client Error: Rate Limit Exceeded'
-    )
-    with patch('httpx.get', return_value=mock_response):
-        # Call the method with an issue reference
-        result = handler._strategy.get_context_from_external_issues_references(
-            closing_issues=[],
-            closing_issue_numbers=[],
-            issue_body='This references #123',
-            review_comments=[],
-            review_threads=[],
-            thread_comments=None,
-        )
-        # The method should return an empty list since the request was rate limited
-        assert result == []
-def test_handle_network_error():
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Mock the requests.get to simulate a network error
-    with patch('httpx.get', side_effect=httpx.NetworkError('Network Error')):
-        # Call the method with an issue reference
-        result = handler._strategy.get_context_from_external_issues_references(
-            closing_issues=[],
-            closing_issue_numbers=[],
-            issue_body='This references #123',
-            review_comments=[],
-            review_threads=[],
-            thread_comments=None,
-        )
-        # The method should return an empty list since the network request failed
-        assert result == []
-def test_successful_issue_reference():
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Mock a successful response
-    mock_response = MagicMock()
-    mock_response.raise_for_status.return_value = None
-    mock_response.json.return_value = {'body': 'This is the referenced issue body'}
-    with patch('httpx.get', return_value=mock_response):
-        # Call the method with an issue reference
-        result = handler._strategy.get_context_from_external_issues_references(
-            closing_issues=[],
-            closing_issue_numbers=[],
-            issue_body='This references #123',
-            review_comments=[],
-            review_threads=[],
-            thread_comments=None,
-        )
-        # The method should return a list with the referenced issue body
-        assert result == ['This is the referenced issue body']
-class MockLLMResponse:
-    """Mock LLM Response class to mimic the actual LLM response structure."""
-    class Choice:
-        class Message:
-            def __init__(self, content):
-                self.content = content
-        def __init__(self, content):
-            self.message = self.Message(content)
-    def __init__(self, content):
-        self.choices = [self.Choice(content)]
-class DotDict(dict):
-    """
-    A dictionary that supports dot notation access.
-    """
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        for key, value in self.items():
-            if isinstance(value, dict):
-                self[key] = DotDict(value)
-            elif isinstance(value, list):
-                self[key] = [
-                    DotDict(item) if isinstance(item, dict) else item for item in value
-                ]
-    def __getattr__(self, key):
-        if key in self:
-            return self[key]
-        else:
-            raise AttributeError(
-                f"'{self.__class__.__name__}' object has no attribute '{key}'"
-            )
-    def __setattr__(self, key, value):
-        self[key] = value
-    def __delattr__(self, key):
-        if key in self:
-            del self[key]
-        else:
-            raise AttributeError(
-                f"'{self.__class__.__name__}' object has no attribute '{key}'"
-            )
-@patch('openhands.llm.llm.litellm_completion')
-def test_guess_success_rate_limit_wait_time(mock_litellm_completion, default_config):
-    """Test that the retry mechanism in guess_success respects wait time between retries."""
-    with patch('time.sleep') as mock_sleep:
-        # Simulate a rate limit error followed by a successful response
-        mock_litellm_completion.side_effect = [
-            RateLimitError(
-                'Rate limit exceeded', llm_provider='test_provider', model='test_model'
-            ),
-            DotDict(
-                {
-                    'choices': [
-                        {
-                            'message': {
-                                'content': '--- success\ntrue\n--- explanation\nRetry successful'
-                            }
-                        }
-                    ]
-                }
-            ),
-        ]
-        llm = LLM(config=default_config)
-        handler = ServiceContextIssue(
-            GithubIssueHandler('test-owner', 'test-repo', 'test-token'), default_config
-        )
-        handler.llm = llm
-        # Mock issue and history
-        issue = Issue(
-            owner='test-owner',
-            repo='test-repo',
-            number=1,
-            title='Test Issue',
-            body='This is a test issue.',
-            thread_comments=['Please improve error handling'],
-        )
-        history = [MessageAction(content='Fixed error handling.')]
-        # Call guess_success
-        success, _, explanation = handler.guess_success(issue, history)
-        # Assertions
-        assert success is True
-        assert explanation == 'Retry successful'
-        assert mock_litellm_completion.call_count == 2  # Two attempts made
-        mock_sleep.assert_called_once()  # Sleep called once between retries
-        # Validate wait time
-        wait_time = mock_sleep.call_args[0][0]
-        assert (
-            default_config.retry_min_wait <= wait_time <= default_config.retry_max_wait
-        ), (
-            f'Expected wait time between {default_config.retry_min_wait} and {default_config.retry_max_wait} seconds, but got {wait_time}'
-        )
-@patch('openhands.llm.llm.litellm_completion')
-def test_guess_success_exhausts_retries(mock_completion, default_config):
-    """Test the retry mechanism in guess_success exhausts retries and raises an error."""
-    # Simulate persistent rate limit errors by always raising RateLimitError
-    mock_completion.side_effect = RateLimitError(
-        'Rate limit exceeded', llm_provider='test_provider', model='test_model'
-    )
-    # Initialize LLM and handler
-    llm = LLM(config=default_config)
-    handler = ServiceContextPR(
-        GithubPRHandler('test-owner', 'test-repo', 'test-token'), default_config
-    )
-    handler.llm = llm
-    # Mock issue and history
-    issue = Issue(
-        owner='test-owner',
-        repo='test-repo',
-        number=1,
-        title='Test Issue',
-        body='This is a test issue.',
-        thread_comments=['Please improve error handling'],
-    )
-    history = [MessageAction(content='Fixed error handling.')]
-    # Call guess_success and expect it to raise an error after retries
-    with pytest.raises(RateLimitError):
-        handler.guess_success(issue, history)
-    # Assertions
-    assert (
-        mock_completion.call_count == default_config.num_retries
-    )  # Initial call + retries

tests/unit/resolver/github/test_pr_handler_guess_success.py DELETED Viewed

@@ -1,672 +0,0 @@
-import json
-from unittest.mock import MagicMock, patch
-import pytest
-from openhands.core.config import LLMConfig
-from openhands.events.action.message import MessageAction
-from openhands.llm.llm import LLM
-from openhands.resolver.interfaces.github import GithubPRHandler
-from openhands.resolver.interfaces.issue import Issue, ReviewThread
-from openhands.resolver.interfaces.issue_definitions import ServiceContextPR
-@pytest.fixture
-def pr_handler():
-    llm_config = LLMConfig(model='test-model')
-    handler = ServiceContextPR(
-        GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    return handler
-@pytest.fixture
-def mock_llm_success_response():
-    return MagicMock(
-        choices=[
-            MagicMock(
-                message=MagicMock(
-                    content="""--- success
-true
---- explanation
-The changes look good"""
-                )
-            )
-        ]
-    )
-def test_guess_success_review_threads_litellm_call():
-    """Test that the completion() call for review threads contains the expected content."""
-    # Create a PR handler instance
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Create a mock issue with review threads
-    issue = Issue(
-        owner='test-owner',
-        repo='test-repo',
-        number=1,
-        title='Test PR',
-        body='Test Body',
-        thread_comments=None,
-        closing_issues=['Issue 1 description', 'Issue 2 description'],
-        review_comments=None,
-        review_threads=[
-            ReviewThread(
-                comment='Please fix the formatting\n---\nlatest feedback:\nAdd docstrings',
-                files=['/src/file1.py', '/src/file2.py'],
-            ),
-            ReviewThread(
-                comment='Add more tests\n---\nlatest feedback:\nAdd test cases',
-                files=['/tests/test_file.py'],
-            ),
-        ],
-        thread_ids=['1', '2'],
-        head_branch='test-branch',
-    )
-    # Create mock history with a detailed response
-    history = [
-        MessageAction(
-            content="""I have made the following changes:
-1. Fixed formatting in file1.py and file2.py
-2. Added docstrings to all functions
-3. Added test cases in test_file.py"""
-        )
-    ]
-    # Create mock LLM config
-    llm_config = LLMConfig(model='test-model', api_key='test-key')
-    # Mock the LLM response
-    mock_response = MagicMock()
-    mock_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content="""--- success
-true
---- explanation
-The changes successfully address the feedback."""
-            )
-        )
-    ]
-    # Test the guess_success method
-    with patch.object(LLM, 'completion') as mock_completion:
-        mock_completion.return_value = mock_response
-        success, success_list, explanation = handler.guess_success(issue, history)
-        # Verify the completion() calls
-        assert mock_completion.call_count == 2  # One call per review thread
-        # Check first call
-        first_call = mock_completion.call_args_list[0]
-        first_prompt = first_call[1]['messages'][0]['content']
-        assert (
-            'Issue descriptions:\n'
-            + json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
-            in first_prompt
-        )
-        assert (
-            'Feedback:\nPlease fix the formatting\n---\nlatest feedback:\nAdd docstrings'
-            in first_prompt
-        )
-        assert (
-            'Files locations:\n'
-            + json.dumps(['/src/file1.py', '/src/file2.py'], indent=4)
-            in first_prompt
-        )
-        assert 'Last message from AI agent:\n' + history[0].content in first_prompt
-        # Check second call
-        second_call = mock_completion.call_args_list[1]
-        second_prompt = second_call[1]['messages'][0]['content']
-        assert (
-            'Issue descriptions:\n'
-            + json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
-            in second_prompt
-        )
-        assert (
-            'Feedback:\nAdd more tests\n---\nlatest feedback:\nAdd test cases'
-            in second_prompt
-        )
-        assert (
-            'Files locations:\n' + json.dumps(['/tests/test_file.py'], indent=4)
-            in second_prompt
-        )
-        assert 'Last message from AI agent:\n' + history[0].content in second_prompt
-        assert len(json.loads(explanation)) == 2
-def test_guess_success_thread_comments_litellm_call():
-    """Test that the completion() call for thread comments contains the expected content."""
-    # Create a PR handler instance
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Create a mock issue with thread comments
-    issue = Issue(
-        owner='test-owner',
-        repo='test-repo',
-        number=1,
-        title='Test PR',
-        body='Test Body',
-        thread_comments=[
-            'Please improve error handling',
-            'Add input validation',
-            'latest feedback:\nHandle edge cases',
-        ],
-        closing_issues=['Issue 1 description', 'Issue 2 description'],
-        review_comments=None,
-        thread_ids=None,
-        head_branch='test-branch',
-    )
-    # Create mock history with a detailed response
-    history = [
-        MessageAction(
-            content="""I have made the following changes:
-1. Added try/catch blocks for error handling
-2. Added input validation checks
-3. Added handling for edge cases"""
-        )
-    ]
-    # Create mock LLM config
-    llm_config = LLMConfig(model='test-model', api_key='test-key')
-    # Mock the LLM response
-    mock_response = MagicMock()
-    mock_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content="""--- success
-true
---- explanation
-The changes successfully address the feedback."""
-            )
-        )
-    ]
-    # Test the guess_success method
-    with patch.object(LLM, 'completion') as mock_completion:
-        mock_completion.return_value = mock_response
-        success, success_list, explanation = handler.guess_success(issue, history)
-        # Verify the completion() call
-        mock_completion.assert_called_once()
-        call_args = mock_completion.call_args
-        prompt = call_args[1]['messages'][0]['content']
-        # Check prompt content
-        assert (
-            'Issue descriptions:\n'
-            + json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
-            in prompt
-        )
-        assert 'PR Thread Comments:\n' + '\n---\n'.join(issue.thread_comments) in prompt
-        assert 'Last message from AI agent:\n' + history[0].content in prompt
-        assert len(json.loads(explanation)) == 1
-def test_check_feedback_with_llm():
-    """Test the _check_feedback_with_llm helper function."""
-    # Create a PR handler instance
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Test cases for different LLM responses
-    test_cases = [
-        {
-            'response': '--- success\ntrue\n--- explanation\nChanges look good',
-            'expected': (True, 'Changes look good'),
-        },
-        {
-            'response': '--- success\nfalse\n--- explanation\nNot all issues fixed',
-            'expected': (False, 'Not all issues fixed'),
-        },
-        {
-            'response': 'Invalid response format',
-            'expected': (
-                False,
-                'Failed to decode answer from LLM response: Invalid response format',
-            ),
-        },
-        {
-            'response': '--- success\ntrue\n--- explanation\nMultiline\nexplanation\nhere',
-            'expected': (True, 'Multiline\nexplanation\nhere'),
-        },
-    ]
-    for case in test_cases:
-        # Mock the LLM response
-        mock_response = MagicMock()
-        mock_response.choices = [MagicMock(message=MagicMock(content=case['response']))]
-        # Test the function
-        with patch.object(LLM, 'completion', return_value=mock_response):
-            success, explanation = handler._check_feedback_with_llm('test prompt')
-            assert (success, explanation) == case['expected']
-def test_check_review_thread_with_git_patch():
-    """Test that git patch from complete_runtime is included in the prompt."""
-    # Create a PR handler instance
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Create test data
-    review_thread = ReviewThread(
-        comment='Please fix the formatting\n---\nlatest feedback:\nAdd docstrings',
-        files=['/src/file1.py', '/src/file2.py'],
-    )
-    issues_context = json.dumps(
-        ['Issue 1 description', 'Issue 2 description'], indent=4
-    )
-    last_message = 'I have fixed the formatting and added docstrings'
-    git_patch = 'diff --git a/src/file1.py b/src/file1.py\n+"""Added docstring."""\n'
-    # Mock the LLM response
-    mock_response = MagicMock()
-    mock_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content="""--- success
-true
---- explanation
-Changes look good"""
-            )
-        )
-    ]
-    # Test the function
-    with patch.object(LLM, 'completion') as mock_completion:
-        mock_completion.return_value = mock_response
-        success, explanation = handler._check_review_thread(
-            review_thread, issues_context, last_message, git_patch
-        )
-        # Verify the completion() call
-        mock_completion.assert_called_once()
-        call_args = mock_completion.call_args
-        prompt = call_args[1]['messages'][0]['content']
-        # Check prompt content
-        assert 'Issue descriptions:\n' + issues_context in prompt
-        assert 'Feedback:\n' + review_thread.comment in prompt
-        assert (
-            'Files locations:\n' + json.dumps(review_thread.files, indent=4) in prompt
-        )
-        assert 'Last message from AI agent:\n' + last_message in prompt
-        assert 'Changes made (git patch):\n' + git_patch in prompt
-        # Check result
-        assert success is True
-        assert explanation == 'Changes look good'
-def test_check_review_thread():
-    """Test the _check_review_thread helper function."""
-    # Create a PR handler instance
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Create test data
-    review_thread = ReviewThread(
-        comment='Please fix the formatting\n---\nlatest feedback:\nAdd docstrings',
-        files=['/src/file1.py', '/src/file2.py'],
-    )
-    issues_context = json.dumps(
-        ['Issue 1 description', 'Issue 2 description'], indent=4
-    )
-    last_message = 'I have fixed the formatting and added docstrings'
-    # Mock the LLM response
-    mock_response = MagicMock()
-    mock_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content="""--- success
-true
---- explanation
-Changes look good"""
-            )
-        )
-    ]
-    # Test the function
-    with patch.object(LLM, 'completion') as mock_completion:
-        mock_completion.return_value = mock_response
-        success, explanation = handler._check_review_thread(
-            review_thread, issues_context, last_message
-        )
-        # Verify the completion() call
-        mock_completion.assert_called_once()
-        call_args = mock_completion.call_args
-        prompt = call_args[1]['messages'][0]['content']
-        # Check prompt content
-        assert 'Issue descriptions:\n' + issues_context in prompt
-        assert 'Feedback:\n' + review_thread.comment in prompt
-        assert (
-            'Files locations:\n' + json.dumps(review_thread.files, indent=4) in prompt
-        )
-        assert 'Last message from AI agent:\n' + last_message in prompt
-        # Check result
-        assert success is True
-        assert explanation == 'Changes look good'
-def test_check_thread_comments_with_git_patch():
-    """Test that git patch from complete_runtime is included in the prompt."""
-    # Create a PR handler instance
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Create test data
-    thread_comments = [
-        'Please improve error handling',
-        'Add input validation',
-        'latest feedback:\nHandle edge cases',
-    ]
-    issues_context = json.dumps(
-        ['Issue 1 description', 'Issue 2 description'], indent=4
-    )
-    last_message = 'I have added error handling and input validation'
-    git_patch = 'diff --git a/src/file1.py b/src/file1.py\n+try:\n+    validate_input()\n+except ValueError:\n+    handle_error()\n'
-    # Mock the LLM response
-    mock_response = MagicMock()
-    mock_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content="""--- success
-true
---- explanation
-Changes look good"""
-            )
-        )
-    ]
-    # Test the function
-    with patch.object(LLM, 'completion') as mock_completion:
-        mock_completion.return_value = mock_response
-        success, explanation = handler._check_thread_comments(
-            thread_comments, issues_context, last_message, git_patch
-        )
-        # Verify the completion() call
-        mock_completion.assert_called_once()
-        call_args = mock_completion.call_args
-        prompt = call_args[1]['messages'][0]['content']
-        # Check prompt content
-        assert 'Issue descriptions:\n' + issues_context in prompt
-        assert 'PR Thread Comments:\n' + '\n---\n'.join(thread_comments) in prompt
-        assert 'Last message from AI agent:\n' + last_message in prompt
-        assert 'Changes made (git patch):\n' + git_patch in prompt
-        # Check result
-        assert success is True
-        assert explanation == 'Changes look good'
-def test_check_thread_comments():
-    """Test the _check_thread_comments helper function."""
-    # Create a PR handler instance
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Create test data
-    thread_comments = [
-        'Please improve error handling',
-        'Add input validation',
-        'latest feedback:\nHandle edge cases',
-    ]
-    issues_context = json.dumps(
-        ['Issue 1 description', 'Issue 2 description'], indent=4
-    )
-    last_message = 'I have added error handling and input validation'
-    # Mock the LLM response
-    mock_response = MagicMock()
-    mock_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content="""--- success
-true
---- explanation
-Changes look good"""
-            )
-        )
-    ]
-    # Test the function
-    with patch.object(LLM, 'completion') as mock_completion:
-        mock_completion.return_value = mock_response
-        success, explanation = handler._check_thread_comments(
-            thread_comments, issues_context, last_message
-        )
-        # Verify the completion() call
-        mock_completion.assert_called_once()
-        call_args = mock_completion.call_args
-        prompt = call_args[1]['messages'][0]['content']
-        # Check prompt content
-        assert 'Issue descriptions:\n' + issues_context in prompt
-        assert 'PR Thread Comments:\n' + '\n---\n'.join(thread_comments) in prompt
-        assert 'Last message from AI agent:\n' + last_message in prompt
-        # Check result
-        assert success is True
-        assert explanation == 'Changes look good'
-def test_check_review_comments_with_git_patch():
-    """Test that git patch from complete_runtime is included in the prompt."""
-    # Create a PR handler instance
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Create test data
-    review_comments = [
-        'Please fix the code style',
-        'Add more test cases',
-        'latest feedback:\nImprove documentation',
-    ]
-    issues_context = json.dumps(
-        ['Issue 1 description', 'Issue 2 description'], indent=4
-    )
-    last_message = 'I have fixed the code style and added tests'
-    git_patch = 'diff --git a/src/file1.py b/src/file1.py\n+"""This module does X."""\n+def func():\n+    """Do Y."""\n'
-    # Mock the LLM response
-    mock_response = MagicMock()
-    mock_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content="""--- success
-true
---- explanation
-Changes look good"""
-            )
-        )
-    ]
-    # Test the function
-    with patch.object(LLM, 'completion') as mock_completion:
-        mock_completion.return_value = mock_response
-        success, explanation = handler._check_review_comments(
-            review_comments, issues_context, last_message, git_patch
-        )
-        # Verify the completion() call
-        mock_completion.assert_called_once()
-        call_args = mock_completion.call_args
-        prompt = call_args[1]['messages'][0]['content']
-        # Check prompt content
-        assert 'Issue descriptions:\n' + issues_context in prompt
-        assert 'PR Review Comments:\n' + '\n---\n'.join(review_comments) in prompt
-        assert 'Last message from AI agent:\n' + last_message in prompt
-        assert 'Changes made (git patch):\n' + git_patch in prompt
-        # Check result
-        assert success is True
-        assert explanation == 'Changes look good'
-def test_check_review_comments():
-    """Test the _check_review_comments helper function."""
-    # Create a PR handler instance
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Create test data
-    review_comments = [
-        'Please improve code readability',
-        'Add comments to complex functions',
-        'Follow PEP 8 style guide',
-    ]
-    issues_context = json.dumps(
-        ['Issue 1 description', 'Issue 2 description'], indent=4
-    )
-    last_message = 'I have improved code readability and added comments'
-    # Mock the LLM response
-    mock_response = MagicMock()
-    mock_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content="""--- success
-true
---- explanation
-Changes look good"""
-            )
-        )
-    ]
-    # Test the function
-    with patch.object(LLM, 'completion') as mock_completion:
-        mock_completion.return_value = mock_response
-        success, explanation = handler._check_review_comments(
-            review_comments, issues_context, last_message
-        )
-        # Verify the completion() call
-        mock_completion.assert_called_once()
-        call_args = mock_completion.call_args
-        prompt = call_args[1]['messages'][0]['content']
-        # Check prompt content
-        assert 'Issue descriptions:\n' + issues_context in prompt
-        assert 'PR Review Comments:\n' + '\n---\n'.join(review_comments) in prompt
-        assert 'Last message from AI agent:\n' + last_message in prompt
-        # Check result
-        assert success is True
-        assert explanation == 'Changes look good'
-def test_guess_success_review_comments_litellm_call():
-    """Test that the completion() call for review comments contains the expected content."""
-    # Create a PR handler instance
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GithubPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Create a mock issue with review comments
-    issue = Issue(
-        owner='test-owner',
-        repo='test-repo',
-        number=1,
-        title='Test PR',
-        body='Test Body',
-        thread_comments=None,
-        closing_issues=['Issue 1 description', 'Issue 2 description'],
-        review_comments=[
-            'Please improve code readability',
-            'Add comments to complex functions',
-            'Follow PEP 8 style guide',
-        ],
-        thread_ids=None,
-        head_branch='test-branch',
-    )
-    # Create mock history with a detailed response
-    history = [
-        MessageAction(
-            content="""I have made the following changes:
-1. Improved code readability by breaking down complex functions
-2. Added detailed comments to all complex functions
-3. Fixed code style to follow PEP 8"""
-        )
-    ]
-    # Mock the LLM response
-    mock_response = MagicMock()
-    mock_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content="""--- success
-true
---- explanation
-The changes successfully address the feedback."""
-            )
-        )
-    ]
-    with patch.object(LLM, 'completion') as mock_completion:
-        mock_completion.return_value = mock_response
-        success, success_list, explanation = handler.guess_success(issue, history)
-        # Verify the completion() call
-        mock_completion.assert_called_once()
-        call_args = mock_completion.call_args
-        prompt = call_args[1]['messages'][0]['content']
-        # Check prompt content
-        assert (
-            'Issue descriptions:\n'
-            + json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
-            in prompt
-        )
-        assert 'PR Review Comments:\n' + '\n---\n'.join(issue.review_comments) in prompt
-        assert 'Last message from AI agent:\n' + history[0].content in prompt
-        assert len(json.loads(explanation)) == 1

tests/unit/resolver/github/test_pr_title_escaping.py DELETED Viewed

@@ -1,166 +0,0 @@
-import os
-import subprocess
-import tempfile
-from openhands.integrations.service_types import ProviderType
-from openhands.resolver.interfaces.issue import Issue
-from openhands.resolver.send_pull_request import make_commit
-def test_commit_message_with_quotes():
-    # Create a temporary directory and initialize git repo
-    with tempfile.TemporaryDirectory() as temp_dir:
-        subprocess.run(['git', 'init', temp_dir], check=True)
-        # Create a test file and add it to git
-        test_file = os.path.join(temp_dir, 'test.txt')
-        with open(test_file, 'w') as f:
-            f.write('test content')
-        subprocess.run(['git', '-C', temp_dir, 'add', 'test.txt'], check=True)
-        # Create a test issue with problematic title
-        issue = Issue(
-            owner='test-owner',
-            repo='test-repo',
-            number=123,
-            title="Issue with 'quotes' and \"double quotes\" and <class 'ValueError'>",
-            body='Test body',
-            labels=[],
-            assignees=[],
-            state='open',
-            created_at='2024-01-01T00:00:00Z',
-            updated_at='2024-01-01T00:00:00Z',
-            closed_at=None,
-            head_branch=None,
-            thread_ids=None,
-        )
-        # Make the commit
-        make_commit(temp_dir, issue, 'issue')
-        # Get the commit message
-        result = subprocess.run(
-            ['git', '-C', temp_dir, 'log', '-1', '--pretty=%B'],
-            capture_output=True,
-            text=True,
-            check=True,
-        )
-        commit_msg = result.stdout.strip()
-        # The commit message should contain the quotes without excessive escaping
-        expected = "Fix issue #123: Issue with 'quotes' and \"double quotes\" and <class 'ValueError'>"
-        assert commit_msg == expected, f'Expected: {expected}\nGot: {commit_msg}'
-def test_pr_title_with_quotes(monkeypatch):
-    # Mock requests.post to avoid actual API calls
-    class MockResponse:
-        def __init__(self, status_code=201):
-            self.status_code = status_code
-            self.text = ''
-        def json(self):
-            return {'html_url': 'https://github.com/test/test/pull/1'}
-        def raise_for_status(self):
-            pass
-    def mock_post(*args, **kwargs):
-        # Verify that the PR title is not over-escaped
-        data = kwargs.get('json', {})
-        title = data.get('title', '')
-        expected = "Fix issue #123: Issue with 'quotes' and \"double quotes\" and <class 'ValueError'>"
-        assert title == expected, (
-            f'PR title was incorrectly escaped.\nExpected: {expected}\nGot: {title}'
-        )
-        return MockResponse()
-    class MockGetResponse:
-        def __init__(self, status_code=200):
-            self.status_code = status_code
-            self.text = ''
-        def json(self):
-            return {'default_branch': 'main'}
-        def raise_for_status(self):
-            pass
-    monkeypatch.setattr('httpx.post', mock_post)
-    monkeypatch.setattr('httpx.get', lambda *args, **kwargs: MockGetResponse())
-    monkeypatch.setattr(
-        'openhands.resolver.interfaces.github.GithubIssueHandler.branch_exists',
-        lambda *args, **kwargs: False,
-    )
-    # Mock subprocess.run to avoid actual git commands
-    original_run = subprocess.run
-    def mock_run(*args, **kwargs):
-        print(f'Running command: {args[0] if args else kwargs.get("args", [])}')
-        if isinstance(args[0], list) and args[0][0] == 'git':
-            if 'push' in args[0]:
-                return subprocess.CompletedProcess(
-                    args[0], returncode=0, stdout='', stderr=''
-                )
-            return original_run(*args, **kwargs)
-        return original_run(*args, **kwargs)
-    monkeypatch.setattr('subprocess.run', mock_run)
-    # Create a temporary directory and initialize git repo
-    with tempfile.TemporaryDirectory() as temp_dir:
-        print('Initializing git repo...')
-        subprocess.run(['git', 'init', temp_dir], check=True)
-        # Add these lines to configure git
-        subprocess.run(
-            ['git', '-C', temp_dir, 'config', 'user.name', 'Test User'], check=True
-        )
-        subprocess.run(
-            ['git', '-C', temp_dir, 'config', 'user.email', '[email protected]'],
-            check=True,
-        )
-        # Create a test file and add it to git
-        test_file = os.path.join(temp_dir, 'test.txt')
-        with open(test_file, 'w') as f:
-            f.write('test content')
-        print('Adding and committing test file...')
-        subprocess.run(['git', '-C', temp_dir, 'add', 'test.txt'], check=True)
-        subprocess.run(
-            ['git', '-C', temp_dir, 'commit', '-m', 'Initial commit'], check=True
-        )
-        # Create a test issue with problematic title
-        print('Creating test issue...')
-        issue = Issue(
-            owner='test-owner',
-            repo='test-repo',
-            number=123,
-            title="Issue with 'quotes' and \"double quotes\" and <class 'ValueError'>",
-            body='Test body',
-            labels=[],
-            assignees=[],
-            state='open',
-            created_at='2024-01-01T00:00:00Z',
-            updated_at='2024-01-01T00:00:00Z',
-            closed_at=None,
-            head_branch=None,
-            thread_ids=None,
-        )
-        # Try to send a PR - this will fail if the title is incorrectly escaped
-        print('Sending PR...')
-        from openhands.resolver.send_pull_request import send_pull_request
-        send_pull_request(
-            issue=issue,
-            token='dummy-token',
-            username='test-user',
-            platform=ProviderType.GITHUB,
-            patch_dir=temp_dir,
-            pr_type='ready',
-        )

tests/unit/resolver/github/test_resolve_issues.py DELETED Viewed

@@ -1,1035 +0,0 @@
-import os
-import tempfile
-from unittest.mock import AsyncMock, MagicMock, patch
-import pytest
-from openhands.core.config import LLMConfig
-from openhands.events.action import CmdRunAction, MessageAction
-from openhands.events.observation import (
-    CmdOutputMetadata,
-    CmdOutputObservation,
-    NullObservation,
-)
-from openhands.integrations.service_types import ProviderType
-from openhands.llm.llm import LLM
-from openhands.resolver.interfaces.github import GithubIssueHandler, GithubPRHandler
-from openhands.resolver.interfaces.issue import Issue, ReviewThread
-from openhands.resolver.interfaces.issue_definitions import (
-    ServiceContextIssue,
-    ServiceContextPR,
-)
-from openhands.resolver.issue_resolver import IssueResolver
-from openhands.resolver.resolver_output import ResolverOutput
-@pytest.fixture
-def default_mock_args():
-    """Fixture that provides a default mock args object with common values.
-    Tests can override specific attributes as needed.
-    """
-    mock_args = MagicMock()
-    mock_args.selected_repo = 'test-owner/test-repo'
-    mock_args.token = 'test-token'
-    mock_args.username = 'test-user'
-    mock_args.max_iterations = 5
-    mock_args.output_dir = '/tmp'
-    mock_args.llm_model = 'test'
-    mock_args.llm_api_key = 'test'
-    mock_args.llm_base_url = None
-    mock_args.base_domain = None
-    mock_args.runtime_container_image = None
-    mock_args.base_container_image = None
-    mock_args.is_experimental = False
-    mock_args.issue_number = None
-    mock_args.comment_id = None
-    mock_args.repo_instruction_file = None
-    mock_args.issue_type = 'issue'
-    mock_args.prompt_file = None
-    return mock_args
-@pytest.fixture
-def mock_github_token():
-    """Fixture that patches the identify_token function to return GitHub provider type.
-    This eliminates the need for repeated patching in each test function.
-    """
-    with patch(
-        'openhands.resolver.issue_resolver.identify_token',
-        return_value=ProviderType.GITHUB,
-    ) as patched:
-        yield patched
-@pytest.fixture
-def mock_output_dir():
-    with tempfile.TemporaryDirectory() as temp_dir:
-        repo_path = os.path.join(temp_dir, 'repo')
-        # Initialize a GitHub repo in "repo" and add a commit with "README.md"
-        os.makedirs(repo_path)
-        os.system(f'git init {repo_path}')
-        readme_path = os.path.join(repo_path, 'README.md')
-        with open(readme_path, 'w') as f:
-            f.write('hello world')
-        os.system(f'git -C {repo_path} add README.md')
-        os.system(f"git -C {repo_path} commit -m 'Initial commit'")
-        yield temp_dir
-@pytest.fixture
-def mock_subprocess():
-    with patch('subprocess.check_output') as mock_check_output:
-        yield mock_check_output
-@pytest.fixture
-def mock_os():
-    with patch('os.system') as mock_system, patch('os.path.join') as mock_join:
-        yield mock_system, mock_join
-@pytest.fixture
-def mock_user_instructions_template():
-    return 'Issue: {{ body }}\n\nPlease fix this issue.'
-@pytest.fixture
-def mock_conversation_instructions_template():
-    return 'Instructions: {{ repo_instruction }}'
-@pytest.fixture
-def mock_followup_prompt_template():
-    return 'Issue context: {{ issues }}\n\nReview comments: {{ review_comments }}\n\nReview threads: {{ review_threads }}\n\nFiles: {{ files }}\n\nThread comments: {{ thread_context }}\n\nPlease fix this issue.'
-def create_cmd_output(exit_code: int, content: str, command: str):
-    return CmdOutputObservation(
-        content=content,
-        command=command,
-        metadata=CmdOutputMetadata(exit_code=exit_code),
-    )
-def test_initialize_runtime(default_mock_args, mock_github_token):
-    mock_runtime = MagicMock()
-    mock_runtime.run_action.side_effect = [
-        create_cmd_output(exit_code=0, content='', command='cd /workspace'),
-        create_cmd_output(
-            exit_code=0, content='', command='git config --global core.pager ""'
-        ),
-    ]
-    # Create resolver with mocked token identification
-    resolver = IssueResolver(default_mock_args)
-    resolver.initialize_runtime(mock_runtime)
-    assert mock_runtime.run_action.call_count == 2
-    mock_runtime.run_action.assert_any_call(CmdRunAction(command='cd /workspace'))
-    mock_runtime.run_action.assert_any_call(
-        CmdRunAction(command='git config --global core.pager ""')
-    )
-@pytest.mark.asyncio
-async def test_resolve_issue_no_issues_found(default_mock_args, mock_github_token):
-    """Test the resolve_issue method when no issues are found."""
-    # Mock dependencies
-    mock_handler = MagicMock()
-    mock_handler.get_converted_issues.return_value = []  # Return empty list
-    # Customize the mock args for this test
-    default_mock_args.issue_number = 5432
-    # Create a resolver instance with mocked token identification
-    resolver = IssueResolver(default_mock_args)
-    # Mock the issue handler
-    resolver.issue_handler = mock_handler
-    # Test that the correct exception is raised
-    with pytest.raises(ValueError) as exc_info:
-        await resolver.resolve_issue()
-    # Verify the error message
-    assert 'No issues found for issue number 5432' in str(exc_info.value)
-    assert 'test-owner/test-repo' in str(exc_info.value)
-    mock_handler.get_converted_issues.assert_called_once_with(
-        issue_numbers=[5432], comment_id=None
-    )
-def test_download_issues_from_github():
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextIssue(
-        GithubIssueHandler('owner', 'repo', 'token'), llm_config
-    )
-    mock_issues_response = MagicMock()
-    mock_issues_response.json.side_effect = [
-        [
-            {'number': 1, 'title': 'Issue 1', 'body': 'This is an issue'},
-            {
-                'number': 2,
-                'title': 'PR 1',
-                'body': 'This is a pull request',
-                'pull_request': {},
-            },
-            {'number': 3, 'title': 'Issue 2', 'body': 'This is another issue'},
-        ],
-        None,
-    ]
-    mock_issues_response.raise_for_status = MagicMock()
-    mock_comments_response = MagicMock()
-    mock_comments_response.json.return_value = []
-    mock_comments_response.raise_for_status = MagicMock()
-    def get_mock_response(url, *args, **kwargs):
-        if '/comments' in url:
-            return mock_comments_response
-        return mock_issues_response
-    with patch('httpx.get', side_effect=get_mock_response):
-        issues = handler.get_converted_issues(issue_numbers=[1, 3])
-    assert len(issues) == 2
-    assert handler.issue_type == 'issue'
-    assert all(isinstance(issue, Issue) for issue in issues)
-    assert [issue.number for issue in issues] == [1, 3]
-    assert [issue.title for issue in issues] == ['Issue 1', 'Issue 2']
-    assert [issue.review_comments for issue in issues] == [None, None]
-    assert [issue.closing_issues for issue in issues] == [None, None]
-    assert [issue.thread_ids for issue in issues] == [None, None]
-def test_download_pr_from_github():
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(GithubPRHandler('owner', 'repo', 'token'), llm_config)
-    mock_pr_response = MagicMock()
-    mock_pr_response.json.side_effect = [
-        [
-            {
-                'number': 1,
-                'title': 'PR 1',
-                'body': 'This is a pull request',
-                'head': {'ref': 'b1'},
-            },
-            {
-                'number': 2,
-                'title': 'My PR',
-                'body': 'This is another pull request',
-                'head': {'ref': 'b2'},
-            },
-            {'number': 3, 'title': 'PR 3', 'body': 'Final PR', 'head': {'ref': 'b3'}},
-        ],
-        None,
-    ]
-    mock_pr_response.raise_for_status = MagicMock()
-    # Mock for PR comments response
-    mock_comments_response = MagicMock()
-    mock_comments_response.json.return_value = []  # No PR comments
-    mock_comments_response.raise_for_status = MagicMock()
-    # Mock for GraphQL request (for download_pr_metadata)
-    mock_graphql_response = MagicMock()
-    mock_graphql_response.json.side_effect = lambda: {
-        'data': {
-            'repository': {
-                'pullRequest': {
-                    'closingIssuesReferences': {
-                        'edges': [
-                            {'node': {'body': 'Issue 1 body', 'number': 1}},
-                            {'node': {'body': 'Issue 2 body', 'number': 2}},
-                        ]
-                    },
-                    'reviewThreads': {
-                        'edges': [
-                            {
-                                'node': {
-                                    'isResolved': False,
-                                    'id': '1',
-                                    'comments': {
-                                        'nodes': [
-                                            {
-                                                'body': 'Unresolved comment 1',
-                                                'path': '/frontend/header.tsx',
-                                            },
-                                            {'body': 'Follow up thread'},
-                                        ]
-                                    },
-                                }
-                            },
-                            {
-                                'node': {
-                                    'isResolved': True,
-                                    'id': '2',
-                                    'comments': {
-                                        'nodes': [
-                                            {
-                                                'body': 'Resolved comment 1',
-                                                'path': '/some/file.py',
-                                            }
-                                        ]
-                                    },
-                                }
-                            },
-                            {
-                                'node': {
-                                    'isResolved': False,
-                                    'id': '3',
-                                    'comments': {
-                                        'nodes': [
-                                            {
-                                                'body': 'Unresolved comment 3',
-                                                'path': '/another/file.py',
-                                            }
-                                        ]
-                                    },
-                                }
-                            },
-                        ]
-                    },
-                }
-            }
-        }
-    }
-    mock_graphql_response.raise_for_status = MagicMock()
-    def get_mock_response(url, *args, **kwargs):
-        if '/comments' in url:
-            return mock_comments_response
-        return mock_pr_response
-    with patch('httpx.get', side_effect=get_mock_response):
-        with patch('httpx.post', return_value=mock_graphql_response):
-            issues = handler.get_converted_issues(issue_numbers=[1, 2, 3])
-    assert len(issues) == 3
-    assert handler.issue_type == 'pr'
-    assert all(isinstance(issue, Issue) for issue in issues)
-    assert [issue.number for issue in issues] == [1, 2, 3]
-    assert [issue.title for issue in issues] == ['PR 1', 'My PR', 'PR 3']
-    assert [issue.head_branch for issue in issues] == ['b1', 'b2', 'b3']
-    assert len(issues[0].review_threads) == 2  # Only unresolved threads
-    assert (
-        issues[0].review_threads[0].comment
-        == 'Unresolved comment 1\n---\nlatest feedback:\nFollow up thread\n'
-    )
-    assert issues[0].review_threads[0].files == ['/frontend/header.tsx']
-    assert (
-        issues[0].review_threads[1].comment
-        == 'latest feedback:\nUnresolved comment 3\n'
-    )
-    assert issues[0].review_threads[1].files == ['/another/file.py']
-    assert issues[0].closing_issues == ['Issue 1 body', 'Issue 2 body']
-    assert issues[0].thread_ids == ['1', '3']
-@pytest.mark.asyncio
-async def test_complete_runtime(default_mock_args, mock_github_token):
-    """Test the complete_runtime method."""
-    mock_runtime = MagicMock()
-    mock_runtime.run_action.side_effect = [
-        create_cmd_output(exit_code=0, content='', command='cd /workspace'),
-        create_cmd_output(
-            exit_code=0, content='', command='git config --global core.pager ""'
-        ),
-        create_cmd_output(
-            exit_code=0,
-            content='',
-            command='git config --global --add safe.directory /workspace',
-        ),
-        create_cmd_output(
-            exit_code=0, content='', command='git diff base_commit_hash fix'
-        ),
-        create_cmd_output(exit_code=0, content='git diff content', command='git apply'),
-    ]
-    # Create resolver with mocked token identification
-    resolver = IssueResolver(default_mock_args)
-    result = await resolver.complete_runtime(mock_runtime, 'base_commit_hash')
-    assert result == {'git_patch': 'git diff content'}
-    assert mock_runtime.run_action.call_count == 5
-@pytest.mark.asyncio
-@pytest.mark.parametrize(
-    'test_case',
-    [
-        {
-            'name': 'successful_run',
-            'run_controller_return': MagicMock(
-                history=[NullObservation(content='')],
-                metrics=MagicMock(
-                    get=MagicMock(return_value={'test_result': 'passed'})
-                ),
-                last_error=None,
-            ),
-            'run_controller_raises': None,
-            'expected_success': True,
-            'expected_error': None,
-            'expected_explanation': 'Issue resolved successfully',
-            'is_pr': False,
-            'comment_success': None,
-        },
-        {
-            'name': 'value_error',
-            'run_controller_return': None,
-            'run_controller_raises': ValueError('Test value error'),
-            'expected_success': False,
-            'expected_error': 'Agent failed to run or crashed',
-            'expected_explanation': 'Agent failed to run',
-            'is_pr': False,
-            'comment_success': None,
-        },
-        {
-            'name': 'runtime_error',
-            'run_controller_return': None,
-            'run_controller_raises': RuntimeError('Test runtime error'),
-            'expected_success': False,
-            'expected_error': 'Agent failed to run or crashed',
-            'expected_explanation': 'Agent failed to run',
-            'is_pr': False,
-            'comment_success': None,
-        },
-        {
-            'name': 'json_decode_error',
-            'run_controller_return': MagicMock(
-                history=[NullObservation(content='')],
-                metrics=MagicMock(
-                    get=MagicMock(return_value={'test_result': 'passed'})
-                ),
-                last_error=None,
-            ),
-            'run_controller_raises': None,
-            'expected_success': True,
-            'expected_error': None,
-            'expected_explanation': 'Non-JSON explanation',
-            'is_pr': True,
-            'comment_success': [
-                True,
-                False,
-            ],  # To trigger the PR success logging code path
-        },
-    ],
-)
-async def test_process_issue(
-    default_mock_args,
-    mock_github_token,
-    mock_output_dir,
-    mock_user_instructions_template,
-    test_case,
-):
-    """Test the process_issue method with different scenarios."""
-    # Set up test data
-    issue = Issue(
-        owner='test_owner',
-        repo='test_repo',
-        number=1,
-        title='Test Issue',
-        body='This is a test issue',
-    )
-    base_commit = 'abcdef1234567890'
-    # Customize the mock args for this test
-    default_mock_args.output_dir = mock_output_dir
-    default_mock_args.issue_type = 'pr' if test_case.get('is_pr', False) else 'issue'
-    # Create a resolver instance with mocked token identification
-    resolver = IssueResolver(default_mock_args)
-    resolver.user_instructions_prompt_template = mock_user_instructions_template
-    # Mock the handler with LLM config
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler_instance = MagicMock()
-    handler_instance.guess_success.return_value = (
-        test_case['expected_success'],
-        test_case.get('comment_success', None),
-        test_case['expected_explanation'],
-    )
-    handler_instance.get_instruction.return_value = (
-        'Test instruction',
-        'Test conversation instructions',
-        [],
-    )
-    handler_instance.issue_type = 'pr' if test_case.get('is_pr', False) else 'issue'
-    handler_instance.llm = LLM(llm_config)
-    # Mock the runtime and its methods
-    mock_runtime = MagicMock()
-    mock_runtime.connect = AsyncMock()
-    mock_runtime.run_action.return_value = CmdOutputObservation(
-        content='test patch',
-        command='git diff',
-        metadata=CmdOutputMetadata(exit_code=0),
-    )
-    mock_runtime.event_stream.subscribe = MagicMock()
-    # Mock the create_runtime function
-    mock_create_runtime = MagicMock(return_value=mock_runtime)
-    # Mock the run_controller function
-    mock_run_controller = AsyncMock()
-    if test_case['run_controller_raises']:
-        mock_run_controller.side_effect = test_case['run_controller_raises']
-    else:
-        mock_run_controller.return_value = test_case['run_controller_return']
-    # Patch the necessary functions and methods
-    with (
-        patch('openhands.resolver.issue_resolver.create_runtime', mock_create_runtime),
-        patch('openhands.resolver.issue_resolver.run_controller', mock_run_controller),
-        patch.object(
-            resolver, 'complete_runtime', return_value={'git_patch': 'test patch'}
-        ),
-        patch.object(resolver, 'initialize_runtime') as mock_initialize_runtime,
-    ):
-        # Call the process_issue method
-        result = await resolver.process_issue(issue, base_commit, handler_instance)
-        # Assert the result matches our expectations
-        assert isinstance(result, ResolverOutput)
-        assert result.issue == issue
-        assert result.base_commit == base_commit
-        assert result.git_patch == 'test patch'
-        assert result.success == test_case['expected_success']
-        assert result.result_explanation == test_case['expected_explanation']
-        assert result.error == test_case['expected_error']
-        # Assert that the mocked functions were called
-        mock_create_runtime.assert_called_once()
-        mock_runtime.connect.assert_called_once()
-        mock_initialize_runtime.assert_called_once()
-        mock_run_controller.assert_called_once()
-        resolver.complete_runtime.assert_awaited_once_with(mock_runtime, base_commit)
-        # Assert run_controller was called with the right parameters
-        if not test_case['run_controller_raises']:
-            # Check that the first positional argument is a config
-            assert 'config' in mock_run_controller.call_args[1]
-            # Check that initial_user_action is a MessageAction with the right content
-            assert isinstance(
-                mock_run_controller.call_args[1]['initial_user_action'], MessageAction
-            )
-            assert mock_run_controller.call_args[1]['runtime'] == mock_runtime
-        # Assert that guess_success was called only for successful runs
-        if test_case['expected_success']:
-            handler_instance.guess_success.assert_called_once()
-        else:
-            handler_instance.guess_success.assert_not_called()
-def test_get_instruction(mock_user_instructions_template, mock_conversation_instructions_template, mock_followup_prompt_template):
-    issue = Issue(
-        owner='test_owner',
-        repo='test_repo',
-        number=123,
-        title='Test Issue',
-        body='This is a test issue refer to image ![First Image](https://sampleimage.com/image1.png)',
-    )
-    mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
-    issue_handler = ServiceContextIssue(
-        GithubIssueHandler('owner', 'repo', 'token'), mock_llm_config
-    )
-    instruction, conversation_instructions, images_urls = issue_handler.get_instruction(
-        issue, mock_user_instructions_template, mock_conversation_instructions_template,  None
-    )
-    expected_instruction = 'Issue: Test Issue\n\nThis is a test issue refer to image ![First Image](https://sampleimage.com/image1.png)\n\nPlease fix this issue.'
-    assert images_urls == ['https://sampleimage.com/image1.png']
-    assert issue_handler.issue_type == 'issue'
-    assert instruction == expected_instruction
-    assert conversation_instructions is not None
-    issue = Issue(
-        owner='test_owner',
-        repo='test_repo',
-        number=123,
-        title='Test Issue',
-        body='This is a test issue',
-        closing_issues=['Issue 1 fix the type'],
-        review_threads=[
-            ReviewThread(
-                comment="There is still a typo 'pthon' instead of 'python'", files=[]
-            )
-        ],
-        thread_comments=[
-            "I've left review comments, please address them",
-            'This is a valid concern.',
-        ],
-    )
-    pr_handler = ServiceContextPR(
-        GithubPRHandler('owner', 'repo', 'token'), mock_llm_config
-    )
-    instruction, conversation_instructions, images_urls = pr_handler.get_instruction(
-        issue, mock_followup_prompt_template, mock_conversation_instructions_template, None
-    )
-    expected_instruction = "Issue context: [\n    \"Issue 1 fix the type\"\n]\n\nReview comments: None\n\nReview threads: [\n    \"There is still a typo 'pthon' instead of 'python'\"\n]\n\nFiles: []\n\nThread comments: I've left review comments, please address them\n---\nThis is a valid concern.\n\nPlease fix this issue."
-    assert images_urls == []
-    assert pr_handler.issue_type == 'pr'
-    # Compare content ignoring exact formatting
-    assert "There is still a typo 'pthon' instead of 'python'" in instruction
-    assert "I've left review comments, please address them" in instruction
-    assert 'This is a valid concern' in instruction
-    assert conversation_instructions is not None
-def test_file_instruction():
-    issue = Issue(
-        owner='test_owner',
-        repo='test_repo',
-        number=123,
-        title='Test Issue',
-        body='This is a test issue ![image](https://sampleimage.com/sample.png)',
-    )
-    # load prompt from openhands/resolver/prompts/resolve/basic.jinja
-    with open('openhands/resolver/prompts/resolve/basic.jinja', 'r') as f:
-        prompt = f.read()
-    with open('openhands/resolver/prompts/resolve/basic-conversation-instructions.jinja', 'r') as f:
-        conversation_instructions_template = f.read()
-    # Test without thread comments
-    mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
-    issue_handler = ServiceContextIssue(
-        GithubIssueHandler('owner', 'repo', 'token'), mock_llm_config
-    )
-    instruction, conversation_instructions, images_urls = issue_handler.get_instruction(
-        issue, prompt,conversation_instructions_template,  None
-    )
-    expected_instruction = """Please fix the following issue for the repository in /workspace.
-An environment has been set up for you to start working. You may assume all necessary tools are installed.
-# Problem Statement
-Test Issue
-This is a test issue ![image](https://sampleimage.com/sample.png)"""
-    expected_conversation_instructions = """IMPORTANT: You should ONLY interact with the environment provided to you AND NEVER ASK FOR HUMAN HELP.
-You SHOULD INCLUDE PROPER INDENTATION in your edit commands.
-When you think you have fixed the issue through code changes, please finish the interaction."""
-    assert instruction == expected_instruction
-    assert conversation_instructions == expected_conversation_instructions
-    assert images_urls == ['https://sampleimage.com/sample.png']
-def test_file_instruction_with_repo_instruction():
-    issue = Issue(
-        owner='test_owner',
-        repo='test_repo',
-        number=123,
-        title='Test Issue',
-        body='This is a test issue',
-    )
-    # load prompt from openhands/resolver/prompts/resolve/basic.jinja
-    with open('openhands/resolver/prompts/resolve/basic.jinja', 'r') as f:
-        prompt = f.read()
-    with open('openhands/resolver/prompts/resolve/basic-conversation-instructions.jinja', 'r') as f:
-        conversation_instructions_prompt = f.read()
-    # load repo instruction from openhands/resolver/prompts/repo_instructions/all-hands-ai___openhands-resolver.txt
-    with open(
-        'openhands/resolver/prompts/repo_instructions/all-hands-ai___openhands-resolver.txt',
-        'r',
-    ) as f:
-        repo_instruction = f.read()
-    mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
-    issue_handler = ServiceContextIssue(
-        GithubIssueHandler('owner', 'repo', 'token'), mock_llm_config
-    )
-    instruction, conversation_instructions, image_urls = issue_handler.get_instruction(
-        issue, prompt, conversation_instructions_prompt, repo_instruction
-    )
-    expected_instruction = """Please fix the following issue for the repository in /workspace.
-An environment has been set up for you to start working. You may assume all necessary tools are installed.
-# Problem Statement
-Test Issue
-This is a test issue"""
-    expected_conversation_instructions = """IMPORTANT: You should ONLY interact with the environment provided to you AND NEVER ASK FOR HUMAN HELP.
-You SHOULD INCLUDE PROPER INDENTATION in your edit commands.
-Some basic information about this repository:
-This is a Python repo for openhands-resolver, a library that attempts to resolve github issues with the AI agent OpenHands.
-- Setup: `poetry install --with test --with dev`
-- Testing: `poetry run pytest tests/test_*.py`
-When you think you have fixed the issue through code changes, please finish the interaction."""
-    assert instruction == expected_instruction
-    assert conversation_instructions == expected_conversation_instructions
-    assert conversation_instructions is not None
-    assert issue_handler.issue_type == 'issue'
-    assert image_urls == []
-def test_guess_success():
-    mock_issue = Issue(
-        owner='test_owner',
-        repo='test_repo',
-        number=1,
-        title='Test Issue',
-        body='This is a test issue',
-    )
-    mock_history = [create_cmd_output(exit_code=0, content='', command='cd /workspace')]
-    mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
-    mock_completion_response = MagicMock()
-    mock_completion_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content='--- success\ntrue\n--- explanation\nIssue resolved successfully'
-            )
-        )
-    ]
-    issue_handler = ServiceContextIssue(
-        GithubIssueHandler('owner', 'repo', 'token'), mock_llm_config
-    )
-    with patch.object(
-        LLM, 'completion', MagicMock(return_value=mock_completion_response)
-    ):
-        success, comment_success, explanation = issue_handler.guess_success(
-            mock_issue, mock_history
-        )
-        assert issue_handler.issue_type == 'issue'
-        assert comment_success is None
-        assert success
-        assert explanation == 'Issue resolved successfully'
-def test_guess_success_with_thread_comments():
-    mock_issue = Issue(
-        owner='test_owner',
-        repo='test_repo',
-        number=1,
-        title='Test Issue',
-        body='This is a test issue',
-        thread_comments=[
-            'First comment',
-            'Second comment',
-            'latest feedback:\nPlease add tests',
-        ],
-    )
-    mock_history = [MagicMock(message='I have added tests for this case')]
-    mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
-    mock_completion_response = MagicMock()
-    mock_completion_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content='--- success\ntrue\n--- explanation\nTests have been added to verify thread comments handling'
-            )
-        )
-    ]
-    issue_handler = ServiceContextIssue(
-        GithubIssueHandler('owner', 'repo', 'token'), mock_llm_config
-    )
-    with patch.object(
-        LLM, 'completion', MagicMock(return_value=mock_completion_response)
-    ):
-        success, comment_success, explanation = issue_handler.guess_success(
-            mock_issue, mock_history
-        )
-        assert issue_handler.issue_type == 'issue'
-        assert comment_success is None
-        assert success
-        assert 'Tests have been added' in explanation
-def test_instruction_with_thread_comments():
-    # Create an issue with thread comments
-    issue = Issue(
-        owner='test_owner',
-        repo='test_repo',
-        number=123,
-        title='Test Issue',
-        body='This is a test issue',
-        thread_comments=[
-            'First comment',
-            'Second comment',
-            'latest feedback:\nPlease add tests',
-        ],
-    )
-    # Load the basic prompt template
-    with open('openhands/resolver/prompts/resolve/basic.jinja', 'r') as f:
-        prompt = f.read()
-    with open('openhands/resolver/prompts/resolve/basic-conversation-instructions.jinja', 'r') as f:
-        conversation_instructions_template = f.read()
-    llm_config = LLMConfig(model='test', api_key='test')
-    issue_handler = ServiceContextIssue(
-        GithubIssueHandler('owner', 'repo', 'token'), llm_config
-    )
-    instruction, _, images_urls = issue_handler.get_instruction(
-        issue, prompt, conversation_instructions_template, None
-    )
-    # Verify that thread comments are included in the instruction
-    assert 'First comment' in instruction
-    assert 'Second comment' in instruction
-    assert 'Please add tests' in instruction
-    assert 'Issue Thread Comments:' in instruction
-    assert images_urls == []
-def test_guess_success_failure():
-    mock_issue = Issue(
-        owner='test_owner',
-        repo='test_repo',
-        number=1,
-        title='Test Issue',
-        body='This is a test issue',
-        thread_comments=[
-            'First comment',
-            'Second comment',
-            'latest feedback:\nPlease add tests',
-        ],
-    )
-    mock_history = [MagicMock(message='I have added tests for this case')]
-    mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
-    mock_completion_response = MagicMock()
-    mock_completion_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content='--- success\ntrue\n--- explanation\nTests have been added to verify thread comments handling'
-            )
-        )
-    ]
-    issue_handler = ServiceContextIssue(
-        GithubIssueHandler('owner', 'repo', 'token'), mock_llm_config
-    )
-    with patch.object(
-        LLM, 'completion', MagicMock(return_value=mock_completion_response)
-    ):
-        success, comment_success, explanation = issue_handler.guess_success(
-            mock_issue, mock_history
-        )
-        assert issue_handler.issue_type == 'issue'
-        assert comment_success is None
-        assert success
-        assert 'Tests have been added' in explanation
-def test_guess_success_negative_case():
-    mock_issue = Issue(
-        owner='test_owner',
-        repo='test_repo',
-        number=1,
-        title='Test Issue',
-        body='This is a test issue',
-    )
-    mock_history = [create_cmd_output(exit_code=0, content='', command='cd /workspace')]
-    mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
-    mock_completion_response = MagicMock()
-    mock_completion_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content='--- success\nfalse\n--- explanation\nIssue not resolved'
-            )
-        )
-    ]
-    issue_handler = ServiceContextIssue(
-        GithubIssueHandler('owner', 'repo', 'token'), mock_llm_config
-    )
-    with patch.object(
-        LLM, 'completion', MagicMock(return_value=mock_completion_response)
-    ):
-        success, comment_success, explanation = issue_handler.guess_success(
-            mock_issue, mock_history
-        )
-        assert issue_handler.issue_type == 'issue'
-        assert comment_success is None
-        assert not success
-        assert explanation == 'Issue not resolved'
-def test_guess_success_invalid_output():
-    mock_issue = Issue(
-        owner='test_owner',
-        repo='test_repo',
-        number=1,
-        title='Test Issue',
-        body='This is a test issue',
-    )
-    mock_history = [create_cmd_output(exit_code=0, content='', command='cd /workspace')]
-    mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
-    mock_completion_response = MagicMock()
-    mock_completion_response.choices = [
-        MagicMock(message=MagicMock(content='This is not a valid output'))
-    ]
-    issue_handler = ServiceContextIssue(
-        GithubIssueHandler('owner', 'repo', 'token'), mock_llm_config
-    )
-    with patch.object(
-        LLM, 'completion', MagicMock(return_value=mock_completion_response)
-    ):
-        success, comment_success, explanation = issue_handler.guess_success(
-            mock_issue, mock_history
-        )
-        assert issue_handler.issue_type == 'issue'
-        assert comment_success is None
-        assert not success
-        assert (
-            explanation
-            == 'Failed to decode answer from LLM response: This is not a valid output'
-        )
-def test_download_pr_with_review_comments():
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(GithubPRHandler('owner', 'repo', 'token'), llm_config)
-    mock_pr_response = MagicMock()
-    mock_pr_response.json.side_effect = [
-        [
-            {
-                'number': 1,
-                'title': 'PR 1',
-                'body': 'This is a pull request',
-                'head': {'ref': 'b1'},
-            },
-        ],
-        None,
-    ]
-    mock_pr_response.raise_for_status = MagicMock()
-    # Mock for PR comments response
-    mock_comments_response = MagicMock()
-    mock_comments_response.json.return_value = []  # No PR comments
-    mock_comments_response.raise_for_status = MagicMock()
-    # Mock for GraphQL request with review comments but no threads
-    mock_graphql_response = MagicMock()
-    mock_graphql_response.json.side_effect = lambda: {
-        'data': {
-            'repository': {
-                'pullRequest': {
-                    'closingIssuesReferences': {'edges': []},
-                    'reviews': {
-                        'nodes': [
-                            {'body': 'Please fix this typo'},
-                            {'body': 'Add more tests'},
-                        ]
-                    },
-                }
-            }
-        }
-    }
-    mock_graphql_response.raise_for_status = MagicMock()
-    def get_mock_response(url, *args, **kwargs):
-        if '/comments' in url:
-            return mock_comments_response
-        return mock_pr_response
-    with patch('httpx.get', side_effect=get_mock_response):
-        with patch('httpx.post', return_value=mock_graphql_response):
-            issues = handler.get_converted_issues(issue_numbers=[1])
-    assert len(issues) == 1
-    assert handler.issue_type == 'pr'
-    assert isinstance(issues[0], Issue)
-    assert issues[0].number == 1
-    assert issues[0].title == 'PR 1'
-    assert issues[0].head_branch == 'b1'
-    # Verify review comments are set but threads are empty
-    assert len(issues[0].review_comments) == 2
-    assert issues[0].review_comments[0] == 'Please fix this typo'
-    assert issues[0].review_comments[1] == 'Add more tests'
-    assert not issues[0].review_threads
-    assert not issues[0].closing_issues
-    assert not issues[0].thread_ids
-def test_download_issue_with_specific_comment():
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextIssue(
-        GithubIssueHandler('owner', 'repo', 'token'), llm_config
-    )
-    # Define the specific comment_id to filter
-    specific_comment_id = 101
-    # Mock issue and comment responses
-    mock_issue_response = MagicMock()
-    mock_issue_response.json.side_effect = [
-        [
-            {'number': 1, 'title': 'Issue 1', 'body': 'This is an issue'},
-        ],
-        None,
-    ]
-    mock_issue_response.raise_for_status = MagicMock()
-    mock_comments_response = MagicMock()
-    mock_comments_response.json.return_value = [
-        {
-            'id': specific_comment_id,
-            'body': 'Specific comment body',
-            'issue_url': 'https://api.github.com/repos/owner/repo/issues/1',
-        },
-        {
-            'id': 102,
-            'body': 'Another comment body',
-            'issue_url': 'https://api.github.com/repos/owner/repo/issues/2',
-        },
-    ]
-    mock_comments_response.raise_for_status = MagicMock()
-    def get_mock_response(url, *args, **kwargs):
-        if '/comments' in url:
-            return mock_comments_response
-        return mock_issue_response
-    with patch('httpx.get', side_effect=get_mock_response):
-        issues = handler.get_converted_issues(
-            issue_numbers=[1], comment_id=specific_comment_id
-        )
-    assert len(issues) == 1
-    assert issues[0].number == 1
-    assert issues[0].title == 'Issue 1'
-    assert issues[0].thread_comments == ['Specific comment body']
-if __name__ == '__main__':
-    pytest.main()

tests/unit/resolver/github/test_send_pull_request.py DELETED Viewed

@@ -1,1304 +0,0 @@
-import os
-import tempfile
-from unittest.mock import ANY, MagicMock, call, patch
-import pytest
-from openhands.core.config import LLMConfig
-from openhands.integrations.service_types import ProviderType
-from openhands.resolver.interfaces.github import GithubIssueHandler
-from openhands.resolver.interfaces.issue import ReviewThread
-from openhands.resolver.resolver_output import Issue, ResolverOutput
-from openhands.resolver.send_pull_request import (
-    apply_patch,
-    initialize_repo,
-    load_single_resolver_output,
-    main,
-    make_commit,
-    process_single_issue,
-    send_pull_request,
-    update_existing_pull_request,
-)
-@pytest.fixture
-def mock_output_dir():
-    with tempfile.TemporaryDirectory() as temp_dir:
-        repo_path = os.path.join(temp_dir, 'repo')
-        # Initialize a GitHub repo in "repo" and add a commit with "README.md"
-        os.makedirs(repo_path)
-        os.system(f'git init {repo_path}')
-        readme_path = os.path.join(repo_path, 'README.md')
-        with open(readme_path, 'w') as f:
-            f.write('hello world')
-        os.system(f'git -C {repo_path} add README.md')
-        os.system(f"git -C {repo_path} commit -m 'Initial commit'")
-        yield temp_dir
-@pytest.fixture
-def mock_issue():
-    return Issue(
-        number=42,
-        title='Test Issue',
-        owner='test-owner',
-        repo='test-repo',
-        body='Test body',
-    )
-@pytest.fixture
-def mock_llm_config():
-    return LLMConfig()
-def test_load_single_resolver_output():
-    mock_output_jsonl = 'tests/unit/resolver/mock_output/output.jsonl'
-    # Test loading an existing issue
-    resolver_output = load_single_resolver_output(mock_output_jsonl, 5)
-    assert isinstance(resolver_output, ResolverOutput)
-    assert resolver_output.issue.number == 5
-    assert resolver_output.issue.title == 'Add MIT license'
-    assert resolver_output.issue.owner == 'neubig'
-    assert resolver_output.issue.repo == 'pr-viewer'
-    # Test loading a non-existent issue
-    with pytest.raises(ValueError):
-        load_single_resolver_output(mock_output_jsonl, 999)
-def test_apply_patch(mock_output_dir):
-    # Create a sample file in the mock repo
-    sample_file = os.path.join(mock_output_dir, 'sample.txt')
-    with open(sample_file, 'w') as f:
-        f.write('Original content')
-    # Create a sample patch
-    patch_content = """
-diff --git a/sample.txt b/sample.txt
-index 9daeafb..b02def2 100644
---- a/sample.txt
-+++ b/sample.txt
-@@ -1 +1,2 @@
--Original content
-+Updated content
-+New line
-"""
-    # Apply the patch
-    apply_patch(mock_output_dir, patch_content)
-    # Check if the file was updated correctly
-    with open(sample_file, 'r') as f:
-        updated_content = f.read()
-    assert updated_content.strip() == 'Updated content\nNew line'.strip()
-def test_apply_patch_preserves_line_endings(mock_output_dir):
-    # Create sample files with different line endings
-    unix_file = os.path.join(mock_output_dir, 'unix_style.txt')
-    dos_file = os.path.join(mock_output_dir, 'dos_style.txt')
-    with open(unix_file, 'w', newline='\n') as f:
-        f.write('Line 1\nLine 2\nLine 3')
-    with open(dos_file, 'w', newline='\r\n') as f:
-        f.write('Line 1\r\nLine 2\r\nLine 3')
-    # Create patches for both files
-    unix_patch = """
-diff --git a/unix_style.txt b/unix_style.txt
-index 9daeafb..b02def2 100644
---- a/unix_style.txt
-+++ b/unix_style.txt
-@@ -1,3 +1,3 @@
- Line 1
--Line 2
-+Updated Line 2
- Line 3
-"""
-    dos_patch = """
-diff --git a/dos_style.txt b/dos_style.txt
-index 9daeafb..b02def2 100644
---- a/dos_style.txt
-+++ b/dos_style.txt
-@@ -1,3 +1,3 @@
- Line 1
--Line 2
-+Updated Line 2
- Line 3
-"""
-    # Apply patches
-    apply_patch(mock_output_dir, unix_patch)
-    apply_patch(mock_output_dir, dos_patch)
-    # Check if line endings are preserved
-    with open(unix_file, 'rb') as f:
-        unix_content = f.read()
-    with open(dos_file, 'rb') as f:
-        dos_content = f.read()
-    assert b'\r\n' not in unix_content, (
-        'Unix-style line endings were changed to DOS-style'
-    )
-    assert b'\r\n' in dos_content, 'DOS-style line endings were changed to Unix-style'
-    # Check if content was updated correctly
-    assert unix_content.decode('utf-8').split('\n')[1] == 'Updated Line 2'
-    assert dos_content.decode('utf-8').split('\r\n')[1] == 'Updated Line 2'
-def test_apply_patch_create_new_file(mock_output_dir):
-    # Create a patch that adds a new file
-    patch_content = """
-diff --git a/new_file.txt b/new_file.txt
-new file mode 100644
-index 0000000..3b18e51
---- /dev/null
-+++ b/new_file.txt
-@@ -0,0 +1 @@
-+hello world
-"""
-    # Apply the patch
-    apply_patch(mock_output_dir, patch_content)
-    # Check if the new file was created
-    new_file_path = os.path.join(mock_output_dir, 'new_file.txt')
-    assert os.path.exists(new_file_path), 'New file was not created'
-    # Check if the file content is correct
-    with open(new_file_path, 'r') as f:
-        content = f.read().strip()
-    assert content == 'hello world', 'File content is incorrect'
-def test_apply_patch_rename_file(mock_output_dir):
-    # Create a sample file in the mock repo
-    old_file = os.path.join(mock_output_dir, 'old_name.txt')
-    with open(old_file, 'w') as f:
-        f.write('This file will be renamed')
-    # Create a patch that renames the file
-    patch_content = """diff --git a/old_name.txt b/new_name.txt
-similarity index 100%
-rename from old_name.txt
-rename to new_name.txt"""
-    # Apply the patch
-    apply_patch(mock_output_dir, patch_content)
-    # Check if the file was renamed
-    new_file = os.path.join(mock_output_dir, 'new_name.txt')
-    assert not os.path.exists(old_file), 'Old file still exists'
-    assert os.path.exists(new_file), 'New file was not created'
-    # Check if the content is preserved
-    with open(new_file, 'r') as f:
-        content = f.read()
-    assert content == 'This file will be renamed'
-def test_apply_patch_delete_file(mock_output_dir):
-    # Create a sample file in the mock repo
-    sample_file = os.path.join(mock_output_dir, 'to_be_deleted.txt')
-    with open(sample_file, 'w') as f:
-        f.write('This file will be deleted')
-    # Create a patch that deletes the file
-    patch_content = """
-diff --git a/to_be_deleted.txt b/to_be_deleted.txt
-deleted file mode 100644
-index 9daeafb..0000000
---- a/to_be_deleted.txt
-+++ /dev/null
-@@ -1 +0,0 @@
--This file will be deleted
-"""
-    # Apply the patch
-    apply_patch(mock_output_dir, patch_content)
-    # Check if the file was deleted
-    assert not os.path.exists(sample_file), 'File was not deleted'
-def test_initialize_repo(mock_output_dir):
-    issue_type = 'issue'
-    # Copy the repo to patches
-    ISSUE_NUMBER = 3
-    initialize_repo(mock_output_dir, ISSUE_NUMBER, issue_type)
-    patches_dir = os.path.join(mock_output_dir, 'patches', f'issue_{ISSUE_NUMBER}')
-    # Check if files were copied correctly
-    assert os.path.exists(os.path.join(patches_dir, 'README.md'))
-    # Check file contents
-    with open(os.path.join(patches_dir, 'README.md'), 'r') as f:
-        assert f.read() == 'hello world'
-@patch('openhands.resolver.interfaces.github.GithubIssueHandler.reply_to_comment')
-@patch('httpx.post')
-@patch('subprocess.run')
-@patch('openhands.resolver.send_pull_request.LLM')
-def test_update_existing_pull_request(
-    mock_llm_class,
-    mock_subprocess_run,
-    mock_requests_post,
-    mock_reply_to_comment,
-):
-    # Arrange: Set up test data
-    issue = Issue(
-        owner='test-owner',
-        repo='test-repo',
-        number=1,
-        title='Test PR',
-        body='This is a test PR',
-        thread_ids=['comment1', 'comment2'],
-        head_branch='test-branch',
-    )
-    token = 'test-token'
-    username = 'test-user'
-    patch_dir = '/path/to/patch'
-    additional_message = '["Fixed bug in function A", "Updated documentation for B"]'
-    # Mock the subprocess.run call for git push
-    mock_subprocess_run.return_value = MagicMock(returncode=0)
-    # Mock the requests.post call for adding a PR comment
-    mock_requests_post.return_value.status_code = 201
-    # Mock LLM instance and completion call
-    mock_llm_instance = MagicMock()
-    mock_completion_response = MagicMock()
-    mock_completion_response.choices = [
-        MagicMock(message=MagicMock(content='This is an issue resolution.'))
-    ]
-    mock_llm_instance.completion.return_value = mock_completion_response
-    mock_llm_class.return_value = mock_llm_instance
-    llm_config = LLMConfig()
-    # Act: Call the function without comment_message to test auto-generation
-    result = update_existing_pull_request(
-        issue,
-        token,
-        username,
-        ProviderType.GITHUB,
-        patch_dir,
-        llm_config,
-        comment_message=None,
-        additional_message=additional_message,
-    )
-    # Assert: Check if the git push command was executed
-    push_command = (
-        f'git -C {patch_dir} push '
-        f'https://{username}:{token}@github.com/'
-        f'{issue.owner}/{issue.repo}.git {issue.head_branch}'
-    )
-    mock_subprocess_run.assert_called_once_with(
-        push_command, shell=True, capture_output=True, text=True
-    )
-    # Assert: Check if the auto-generated comment was posted to the PR
-    comment_url = f'https://api.github.com/repos/{issue.owner}/{issue.repo}/issues/{issue.number}/comments'
-    expected_comment = 'This is an issue resolution.'
-    mock_requests_post.assert_called_once_with(
-        comment_url,
-        headers={
-            'Authorization': f'token {token}',
-            'Accept': 'application/vnd.github.v3+json',
-        },
-        json={'body': expected_comment},
-    )
-    # Assert: Check if the reply_to_comment function was called for each thread ID
-    mock_reply_to_comment.assert_has_calls(
-        [
-            call(issue.number, 'comment1', 'Fixed bug in function A'),
-            call(issue.number, 'comment2', 'Updated documentation for B'),
-        ]
-    )
-    # Assert: Check the returned PR URL
-    assert (
-        result == f'https://github.com/{issue.owner}/{issue.repo}/pull/{issue.number}'
-    )
-@pytest.mark.parametrize(
-    'pr_type,target_branch,pr_title',
-    [
-        ('branch', None, None),
-        ('draft', None, None),
-        ('ready', None, None),
-        ('branch', 'feature', None),
-        ('draft', 'develop', None),
-        ('ready', 'staging', None),
-        ('ready', None, 'Custom PR Title'),
-        ('draft', 'develop', 'Another Custom Title'),
-    ],
-)
-@patch('subprocess.run')
-@patch('httpx.post')
-@patch('httpx.get')
-def test_send_pull_request(
-    mock_get,
-    mock_post,
-    mock_run,
-    mock_issue,
-    mock_llm_config,
-    mock_output_dir,
-    pr_type,
-    target_branch,
-    pr_title,
-):
-    repo_path = os.path.join(mock_output_dir, 'repo')
-    # Mock API responses based on whether target_branch is specified
-    if target_branch:
-        mock_get.side_effect = [
-            MagicMock(status_code=404),  # Branch doesn't exist
-            MagicMock(status_code=200),  # Target branch exists
-        ]
-    else:
-        mock_get.side_effect = [
-            MagicMock(status_code=404),  # Branch doesn't exist
-            MagicMock(json=lambda: {'default_branch': 'main'}),  # Get default branch
-        ]
-    mock_post.return_value.json.return_value = {
-        'html_url': 'https://github.com/test-owner/test-repo/pull/1'
-    }
-    # Mock subprocess.run calls
-    mock_run.side_effect = [
-        MagicMock(returncode=0),  # git checkout -b
-        MagicMock(returncode=0),  # git push
-    ]
-    # Call the function
-    result = send_pull_request(
-        issue=mock_issue,
-        token='test-token',
-        username='test-user',
-        platform=ProviderType.GITHUB,
-        patch_dir=repo_path,
-        pr_type=pr_type,
-        target_branch=target_branch,
-        pr_title=pr_title,
-    )
-    # Assert API calls
-    expected_get_calls = 2
-    assert mock_get.call_count == expected_get_calls
-    # Check branch creation and push
-    assert mock_run.call_count == 2
-    checkout_call, push_call = mock_run.call_args_list
-    assert checkout_call == call(
-        ['git', '-C', repo_path, 'checkout', '-b', 'openhands-fix-issue-42'],
-        capture_output=True,
-        text=True,
-    )
-    assert push_call == call(
-        [
-            'git',
-            '-C',
-            repo_path,
-            'push',
-            'https://test-user:[email protected]/test-owner/test-repo.git',
-            'openhands-fix-issue-42',
-        ],
-        capture_output=True,
-        text=True,
-    )
-    # Check PR creation based on pr_type
-    if pr_type == 'branch':
-        assert (
-            result
-            == 'https://github.com/test-owner/test-repo/compare/openhands-fix-issue-42?expand=1'
-        )
-        mock_post.assert_not_called()
-    else:
-        assert result == 'https://github.com/test-owner/test-repo/pull/1'
-        mock_post.assert_called_once()
-        post_data = mock_post.call_args[1]['json']
-        expected_title = pr_title if pr_title else 'Fix issue #42: Test Issue'
-        assert post_data['title'] == expected_title
-        assert post_data['body'].startswith('This pull request fixes #42.')
-        assert post_data['head'] == 'openhands-fix-issue-42'
-        assert post_data['base'] == (target_branch if target_branch else 'main')
-        assert post_data['draft'] == (pr_type == 'draft')
-@patch('subprocess.run')
-@patch('httpx.post')
-@patch('httpx.get')
-def test_send_pull_request_with_reviewer(
-    mock_get, mock_post, mock_run, mock_issue, mock_output_dir, mock_llm_config
-):
-    repo_path = os.path.join(mock_output_dir, 'repo')
-    reviewer = 'test-reviewer'
-    # Mock API responses
-    mock_get.side_effect = [
-        MagicMock(status_code=404),  # Branch doesn't exist
-        MagicMock(json=lambda: {'default_branch': 'main'}),  # Get default branch
-    ]
-    # Mock PR creation response
-    mock_post.side_effect = [
-        MagicMock(
-            status_code=201,
-            json=lambda: {
-                'html_url': 'https://github.com/test-owner/test-repo/pull/1',
-                'number': 1,
-            },
-        ),  # PR creation
-        MagicMock(status_code=201),  # Reviewer request
-    ]
-    # Mock subprocess.run calls
-    mock_run.side_effect = [
-        MagicMock(returncode=0),  # git checkout -b
-        MagicMock(returncode=0),  # git push
-    ]
-    # Call the function with reviewer
-    result = send_pull_request(
-        issue=mock_issue,
-        token='test-token',
-        username='test-user',
-        platform=ProviderType.GITHUB,
-        patch_dir=repo_path,
-        pr_type='ready',
-        reviewer=reviewer,
-    )
-    # Assert API calls
-    assert mock_get.call_count == 2
-    assert mock_post.call_count == 2
-    # Check PR creation
-    pr_create_call = mock_post.call_args_list[0]
-    assert pr_create_call[1]['json']['title'] == 'Fix issue #42: Test Issue'
-    # Check reviewer request
-    reviewer_request_call = mock_post.call_args_list[1]
-    assert (
-        reviewer_request_call[0][0]
-        == 'https://api.github.com/repos/test-owner/test-repo/pulls/1/requested_reviewers'
-    )
-    assert reviewer_request_call[1]['json'] == {'reviewers': ['test-reviewer']}
-    # Check the result URL
-    assert result == 'https://github.com/test-owner/test-repo/pull/1'
-@patch('subprocess.run')
-@patch('httpx.post')
-@patch('httpx.get')
-def test_send_pull_request_target_branch_with_fork(
-    mock_get, mock_post, mock_run, mock_issue, mock_output_dir
-):
-    """Test that target_branch works correctly when using a fork."""
-    repo_path = os.path.join(mock_output_dir, 'repo')
-    fork_owner = 'fork-owner'
-    target_branch = 'custom-target'
-    # Mock API responses
-    mock_get.side_effect = [
-        MagicMock(status_code=404),  # Branch doesn't exist
-        MagicMock(status_code=200),  # Target branch exists
-    ]
-    mock_post.return_value.json.return_value = {
-        'html_url': 'https://github.com/test-owner/test-repo/pull/1'
-    }
-    # Mock subprocess.run calls
-    mock_run.side_effect = [
-        MagicMock(returncode=0),  # git checkout -b
-        MagicMock(returncode=0),  # git push
-    ]
-    # Call the function with fork_owner and target_branch
-    send_pull_request(
-        issue=mock_issue,
-        token='test-token',
-        username='test-user',
-        platform=ProviderType.GITHUB,
-        patch_dir=repo_path,
-        pr_type='ready',
-        fork_owner=fork_owner,
-        target_branch=target_branch,
-    )
-    # Assert API calls
-    assert mock_get.call_count == 2
-    # Verify target branch was checked in original repo, not fork
-    target_branch_check = mock_get.call_args_list[1]
-    assert (
-        target_branch_check[0][0]
-        == f'https://api.github.com/repos/test-owner/test-repo/branches/{target_branch}'
-    )
-    # Check PR creation
-    mock_post.assert_called_once()
-    post_data = mock_post.call_args[1]['json']
-    assert post_data['base'] == target_branch  # PR should target the specified branch
-    assert (
-        post_data['head'] == 'fork-owner:openhands-fix-issue-42'
-    )  # Branch name should be standard
-    # Check that push was to fork
-    push_call = mock_run.call_args_list[1]
-    assert f'https://test-user:[email protected]/{fork_owner}/test-repo.git' in str(
-        push_call
-    )
-@patch('subprocess.run')
-@patch('httpx.post')
-@patch('httpx.get')
-def test_send_pull_request_target_branch_with_additional_message(
-    mock_get, mock_post, mock_run, mock_issue, mock_output_dir
-):
-    """Test that target_branch works correctly with additional PR message."""
-    repo_path = os.path.join(mock_output_dir, 'repo')
-    target_branch = 'feature-branch'
-    additional_message = 'Additional PR context'
-    # Mock API responses
-    mock_get.side_effect = [
-        MagicMock(status_code=404),  # Branch doesn't exist
-        MagicMock(status_code=200),  # Target branch exists
-    ]
-    mock_post.return_value.json.return_value = {
-        'html_url': 'https://github.com/test-owner/test-repo/pull/1'
-    }
-    # Mock subprocess.run calls
-    mock_run.side_effect = [
-        MagicMock(returncode=0),  # git checkout -b
-        MagicMock(returncode=0),  # git push
-    ]
-    # Call the function with target_branch and additional_message
-    send_pull_request(
-        issue=mock_issue,
-        token='test-token',
-        username='test-user',
-        platform=ProviderType.GITHUB,
-        patch_dir=repo_path,
-        pr_type='ready',
-        target_branch=target_branch,
-        additional_message=additional_message,
-    )
-    # Assert API calls
-    assert mock_get.call_count == 2
-    # Check PR creation
-    mock_post.assert_called_once()
-    post_data = mock_post.call_args[1]['json']
-    assert post_data['base'] == target_branch
-    assert additional_message in post_data['body']
-    assert 'This pull request fixes #42' in post_data['body']
-@patch('httpx.get')
-def test_send_pull_request_invalid_target_branch(
-    mock_get, mock_issue, mock_output_dir, mock_llm_config
-):
-    """Test that an error is raised when specifying a non-existent target branch"""
-    repo_path = os.path.join(mock_output_dir, 'repo')
-    # Mock API response for non-existent branch
-    mock_get.side_effect = [
-        MagicMock(status_code=404),  # Branch doesn't exist
-        MagicMock(status_code=404),  # Target branch doesn't exist
-    ]
-    # Test that ValueError is raised when target branch doesn't exist
-    with pytest.raises(
-        ValueError, match='Target branch nonexistent-branch does not exist'
-    ):
-        send_pull_request(
-            issue=mock_issue,
-            token='test-token',
-            username='test-user',
-            platform=ProviderType.GITHUB,
-            patch_dir=repo_path,
-            pr_type='ready',
-            target_branch='nonexistent-branch',
-        )
-    # Verify API calls
-    assert mock_get.call_count == 2
-@patch('subprocess.run')
-@patch('httpx.post')
-@patch('httpx.get')
-def test_send_pull_request_git_push_failure(
-    mock_get, mock_post, mock_run, mock_issue, mock_output_dir, mock_llm_config
-):
-    repo_path = os.path.join(mock_output_dir, 'repo')
-    # Mock API responses
-    mock_get.return_value = MagicMock(json=lambda: {'default_branch': 'main'})
-    # Mock the subprocess.run calls
-    mock_run.side_effect = [
-        MagicMock(returncode=0),  # git checkout -b
-        MagicMock(returncode=1, stderr='Error: failed to push some refs'),  # git push
-    ]
-    # Test that RuntimeError is raised when git push fails
-    with pytest.raises(
-        RuntimeError, match='Failed to push changes to the remote repository'
-    ):
-        send_pull_request(
-            issue=mock_issue,
-            token='test-token',
-            username='test-user',
-            platform=ProviderType.GITHUB,
-            patch_dir=repo_path,
-            pr_type='ready',
-        )
-    # Assert that subprocess.run was called twice
-    assert mock_run.call_count == 2
-    # Check the git checkout -b command
-    checkout_call = mock_run.call_args_list[0]
-    assert checkout_call[0][0] == [
-        'git',
-        '-C',
-        repo_path,
-        'checkout',
-        '-b',
-        'openhands-fix-issue-42',
-    ]
-    # Check the git push command
-    push_call = mock_run.call_args_list[1]
-    assert push_call[0][0] == [
-        'git',
-        '-C',
-        repo_path,
-        'push',
-        'https://test-user:[email protected]/test-owner/test-repo.git',
-        'openhands-fix-issue-42',
-    ]
-    # Assert that no pull request was created
-    mock_post.assert_not_called()
-@patch('subprocess.run')
-@patch('httpx.post')
-@patch('httpx.get')
-def test_send_pull_request_permission_error(
-    mock_get, mock_post, mock_run, mock_issue, mock_output_dir, mock_llm_config
-):
-    repo_path = os.path.join(mock_output_dir, 'repo')
-    # Mock API responses
-    mock_get.return_value = MagicMock(json=lambda: {'default_branch': 'main'})
-    mock_post.return_value.status_code = 403
-    # Mock subprocess.run calls
-    mock_run.side_effect = [
-        MagicMock(returncode=0),  # git checkout -b
-        MagicMock(returncode=0),  # git push
-    ]
-    # Test that RuntimeError is raised when PR creation fails due to permissions
-    with pytest.raises(
-        RuntimeError, match='Failed to create pull request due to missing permissions.'
-    ):
-        send_pull_request(
-            issue=mock_issue,
-            token='test-token',
-            username='test-user',
-            platform=ProviderType.GITHUB,
-            patch_dir=repo_path,
-            pr_type='ready',
-        )
-    # Assert that the branch was created and pushed
-    assert mock_run.call_count == 2
-    mock_post.assert_called_once()
-@patch('httpx.post')
-def test_reply_to_comment(mock_post, mock_issue):
-    # Arrange: set up the test data
-    token = 'test_token'
-    comment_id = 'test_comment_id'
-    reply = 'This is a test reply.'
-    # Create an instance of GithubIssueHandler
-    handler = GithubIssueHandler(
-        owner='test-owner', repo='test-repo', token=token, username='test-user'
-    )
-    # Mock the response from the GraphQL API
-    mock_response = MagicMock()
-    mock_response.status_code = 200
-    mock_response.json.return_value = {
-        'data': {
-            'addPullRequestReviewThreadReply': {
-                'comment': {
-                    'id': 'test_reply_id',
-                    'body': 'Openhands fix success summary\n\n\nThis is a test reply.',
-                    'createdAt': '2024-10-01T12:34:56Z',
-                }
-            }
-        }
-    }
-    mock_post.return_value = mock_response
-    # Act: call the function
-    handler.reply_to_comment(mock_issue.number, comment_id, reply)
-    # Assert: check that the POST request was made with the correct parameters
-    query = """
-            mutation($body: String!, $pullRequestReviewThreadId: ID!) {
-                addPullRequestReviewThreadReply(input: { body: $body, pullRequestReviewThreadId: $pullRequestReviewThreadId }) {
-                    comment {
-                        id
-                        body
-                        createdAt
-                    }
-                }
-            }
-            """
-    expected_variables = {
-        'body': 'Openhands fix success summary\n\n\nThis is a test reply.',
-        'pullRequestReviewThreadId': comment_id,
-    }
-    # Check that the correct request was made to the API
-    mock_post.assert_called_once_with(
-        'https://api.github.com/graphql',
-        json={'query': query, 'variables': expected_variables},
-        headers={
-            'Authorization': f'Bearer {token}',
-            'Content-Type': 'application/json',
-        },
-    )
-    # Check that the response status was checked (via response.raise_for_status)
-    mock_response.raise_for_status.assert_called_once()
-@patch('openhands.resolver.send_pull_request.initialize_repo')
-@patch('openhands.resolver.send_pull_request.apply_patch')
-@patch('openhands.resolver.send_pull_request.update_existing_pull_request')
-@patch('openhands.resolver.send_pull_request.make_commit')
-def test_process_single_pr_update(
-    mock_make_commit,
-    mock_update_existing_pull_request,
-    mock_apply_patch,
-    mock_initialize_repo,
-    mock_output_dir,
-    mock_llm_config,
-):
-    # Initialize test data
-    token = 'test_token'
-    username = 'test_user'
-    pr_type = 'draft'
-    resolver_output = ResolverOutput(
-        issue=Issue(
-            owner='test-owner',
-            repo='test-repo',
-            number=1,
-            title='Issue 1',
-            body='Body 1',
-            closing_issues=[],
-            review_threads=[
-                ReviewThread(comment='review comment for feedback', files=[])
-            ],
-            thread_ids=['1'],
-            head_branch='branch 1',
-        ),
-        issue_type='pr',
-        instruction='Test instruction 1',
-        base_commit='def456',
-        git_patch='Test patch 1',
-        history=[],
-        metrics={},
-        success=True,
-        comment_success=None,
-        result_explanation='[Test success 1]',
-        error=None,
-    )
-    mock_update_existing_pull_request.return_value = (
-        'https://github.com/test-owner/test-repo/pull/1'
-    )
-    mock_initialize_repo.return_value = f'{mock_output_dir}/patches/pr_1'
-    process_single_issue(
-        mock_output_dir,
-        resolver_output,
-        token,
-        username,
-        ProviderType.GITHUB,
-        pr_type,
-        mock_llm_config,
-        None,
-        False,
-        None,
-    )
-    mock_initialize_repo.assert_called_once_with(mock_output_dir, 1, 'pr', 'branch 1')
-    mock_apply_patch.assert_called_once_with(
-        f'{mock_output_dir}/patches/pr_1', resolver_output.git_patch
-    )
-    mock_make_commit.assert_called_once_with(
-        f'{mock_output_dir}/patches/pr_1', resolver_output.issue, 'pr'
-    )
-    mock_update_existing_pull_request.assert_called_once_with(
-        issue=resolver_output.issue,
-        token=token,
-        username=username,
-        platform=ProviderType.GITHUB,
-        patch_dir=f'{mock_output_dir}/patches/pr_1',
-        additional_message='[Test success 1]',
-        llm_config=mock_llm_config,
-        base_domain='github.com',
-    )
-@patch('openhands.resolver.send_pull_request.initialize_repo')
-@patch('openhands.resolver.send_pull_request.apply_patch')
-@patch('openhands.resolver.send_pull_request.send_pull_request')
-@patch('openhands.resolver.send_pull_request.make_commit')
-def test_process_single_issue(
-    mock_make_commit,
-    mock_send_pull_request,
-    mock_apply_patch,
-    mock_initialize_repo,
-    mock_output_dir,
-    mock_llm_config,
-):
-    # Initialize test data
-    token = 'test_token'
-    username = 'test_user'
-    pr_type = 'draft'
-    platform = ProviderType.GITHUB
-    resolver_output = ResolverOutput(
-        issue=Issue(
-            owner='test-owner',
-            repo='test-repo',
-            number=1,
-            title='Issue 1',
-            body='Body 1',
-        ),
-        issue_type='issue',
-        instruction='Test instruction 1',
-        base_commit='def456',
-        git_patch='Test patch 1',
-        history=[],
-        metrics={},
-        success=True,
-        comment_success=None,
-        result_explanation='Test success 1',
-        error=None,
-    )
-    # Mock return value
-    mock_send_pull_request.return_value = (
-        'https://github.com/test-owner/test-repo/pull/1'
-    )
-    mock_initialize_repo.return_value = f'{mock_output_dir}/patches/issue_1'
-    # Call the function
-    process_single_issue(
-        mock_output_dir,
-        resolver_output,
-        token,
-        username,
-        platform,
-        pr_type,
-        mock_llm_config,
-        None,
-        False,
-        None,
-    )
-    # Assert that the mocked functions were called with correct arguments
-    mock_initialize_repo.assert_called_once_with(mock_output_dir, 1, 'issue', 'def456')
-    mock_apply_patch.assert_called_once_with(
-        f'{mock_output_dir}/patches/issue_1', resolver_output.git_patch
-    )
-    mock_make_commit.assert_called_once_with(
-        f'{mock_output_dir}/patches/issue_1', resolver_output.issue, 'issue'
-    )
-    mock_send_pull_request.assert_called_once_with(
-        issue=resolver_output.issue,
-        token=token,
-        username=username,
-        platform=platform,
-        patch_dir=f'{mock_output_dir}/patches/issue_1',
-        pr_type=pr_type,
-        fork_owner=None,
-        additional_message=resolver_output.result_explanation,
-        target_branch=None,
-        reviewer=None,
-        pr_title=None,
-        base_domain='github.com',
-    )
-@patch('openhands.resolver.send_pull_request.initialize_repo')
-@patch('openhands.resolver.send_pull_request.apply_patch')
-@patch('openhands.resolver.send_pull_request.send_pull_request')
-@patch('openhands.resolver.send_pull_request.make_commit')
-def test_process_single_issue_unsuccessful(
-    mock_make_commit,
-    mock_send_pull_request,
-    mock_apply_patch,
-    mock_initialize_repo,
-    mock_output_dir,
-    mock_llm_config,
-):
-    # Initialize test data
-    token = 'test_token'
-    username = 'test_user'
-    pr_type = 'draft'
-    resolver_output = ResolverOutput(
-        issue=Issue(
-            owner='test-owner',
-            repo='test-repo',
-            number=1,
-            title='Issue 1',
-            body='Body 1',
-        ),
-        issue_type='issue',
-        instruction='Test instruction 1',
-        base_commit='def456',
-        git_patch='Test patch 1',
-        history=[],
-        metrics={},
-        success=False,
-        comment_success=None,
-        result_explanation='',
-        error='Test error',
-    )
-    # Call the function
-    process_single_issue(
-        mock_output_dir,
-        resolver_output,
-        token,
-        username,
-        ProviderType.GITHUB,
-        pr_type,
-        mock_llm_config,
-        None,
-        False,
-        None,
-    )
-    # Assert that none of the mocked functions were called
-    mock_initialize_repo.assert_not_called()
-    mock_apply_patch.assert_not_called()
-    mock_make_commit.assert_not_called()
-    mock_send_pull_request.assert_not_called()
-@patch('httpx.get')
-@patch('subprocess.run')
-def test_send_pull_request_branch_naming(
-    mock_run, mock_get, mock_issue, mock_output_dir, mock_llm_config
-):
-    repo_path = os.path.join(mock_output_dir, 'repo')
-    # Mock API responses
-    mock_get.side_effect = [
-        MagicMock(status_code=200),  # First branch exists
-        MagicMock(status_code=200),  # Second branch exists
-        MagicMock(status_code=404),  # Third branch doesn't exist
-        MagicMock(json=lambda: {'default_branch': 'main'}),  # Get default branch
-    ]
-    # Mock subprocess.run calls
-    mock_run.side_effect = [
-        MagicMock(returncode=0),  # git checkout -b
-        MagicMock(returncode=0),  # git push
-    ]
-    # Call the function
-    result = send_pull_request(
-        issue=mock_issue,
-        token='test-token',
-        username='test-user',
-        platform=ProviderType.GITHUB,
-        patch_dir=repo_path,
-        pr_type='branch',
-    )
-    # Assert API calls
-    assert mock_get.call_count == 4
-    # Check branch creation and push
-    assert mock_run.call_count == 2
-    checkout_call, push_call = mock_run.call_args_list
-    assert checkout_call == call(
-        ['git', '-C', repo_path, 'checkout', '-b', 'openhands-fix-issue-42-try3'],
-        capture_output=True,
-        text=True,
-    )
-    assert push_call == call(
-        [
-            'git',
-            '-C',
-            repo_path,
-            'push',
-            'https://test-user:[email protected]/test-owner/test-repo.git',
-            'openhands-fix-issue-42-try3',
-        ],
-        capture_output=True,
-        text=True,
-    )
-    # Check the result
-    assert (
-        result
-        == 'https://github.com/test-owner/test-repo/compare/openhands-fix-issue-42-try3?expand=1'
-    )
-@patch('openhands.resolver.send_pull_request.argparse.ArgumentParser')
-@patch('openhands.resolver.send_pull_request.process_single_issue')
-@patch('openhands.resolver.send_pull_request.load_single_resolver_output')
-@patch('openhands.resolver.send_pull_request.identify_token')
-@patch('os.path.exists')
-@patch('os.getenv')
-def test_main(
-    mock_getenv,
-    mock_path_exists,
-    mock_identify_token,
-    mock_load_single_resolver_output,
-    mock_process_single_issue,
-    mock_parser,
-):
-    # Setup mock parser
-    mock_args = MagicMock()
-    mock_args.token = None
-    mock_args.username = 'mock_username'
-    mock_args.output_dir = '/mock/output'
-    mock_args.pr_type = 'draft'
-    mock_args.issue_number = '42'
-    mock_args.fork_owner = None
-    mock_args.send_on_failure = False
-    mock_args.llm_model = 'mock_model'
-    mock_args.llm_base_url = 'mock_url'
-    mock_args.llm_api_key = 'mock_key'
-    mock_args.target_branch = None
-    mock_args.reviewer = None
-    mock_args.pr_title = None
-    mock_args.selected_repo = None
-    mock_parser.return_value.parse_args.return_value = mock_args
-    # Setup environment variables
-    mock_getenv.side_effect = (
-        lambda key, default=None: 'mock_token' if key == 'GITHUB_TOKEN' else default
-    )
-    # Setup path exists
-    mock_path_exists.return_value = True
-    # Setup mock resolver output
-    mock_resolver_output = MagicMock()
-    mock_load_single_resolver_output.return_value = mock_resolver_output
-    mock_identify_token.return_value = ProviderType.GITHUB
-    # Run main function
-    main()
-    mock_identify_token.assert_called_with('mock_token', mock_args.base_domain)
-    llm_config = LLMConfig(
-        model=mock_args.llm_model,
-        base_url=mock_args.llm_base_url,
-        api_key=mock_args.llm_api_key,
-    )
-    # Use any_call instead of assert_called_with for more flexible matching
-    assert mock_process_single_issue.call_args == call(
-        '/mock/output',
-        mock_resolver_output,
-        'mock_token',
-        'mock_username',
-        ProviderType.GITHUB,
-        'draft',
-        llm_config,
-        None,
-        False,
-        mock_args.target_branch,
-        mock_args.reviewer,
-        mock_args.pr_title,
-        ANY,
-    )
-    # Other assertions
-    mock_parser.assert_called_once()
-    mock_getenv.assert_any_call('GITHUB_TOKEN')
-    mock_path_exists.assert_called_with('/mock/output')
-    mock_load_single_resolver_output.assert_called_with('/mock/output/output.jsonl', 42)
-    # Test for invalid issue number
-    mock_args.issue_number = 'invalid'
-    with pytest.raises(ValueError):
-        main()
-    # Test for invalid token
-    mock_args.issue_number = '42'  # Reset to valid issue number
-    mock_getenv.side_effect = (
-        lambda key, default=None: None
-    )  # Return None for all env vars
-    with pytest.raises(ValueError, match='token is not set'):
-        main()
-@patch('subprocess.run')
-def test_make_commit_escapes_issue_title(mock_subprocess_run):
-    # Setup
-    repo_dir = '/path/to/repo'
-    issue = Issue(
-        owner='test-owner',
-        repo='test-repo',
-        number=42,
-        title='Issue with "quotes" and $pecial characters',
-        body='Test body',
-    )
-    # Mock subprocess.run to return success for all calls
-    mock_subprocess_run.return_value = MagicMock(
-        returncode=0, stdout='sample output', stderr=''
-    )
-    # Call the function
-    issue_type = 'issue'
-    make_commit(repo_dir, issue, issue_type)
-    # Assert that subprocess.run was called with the correct arguments
-    calls = mock_subprocess_run.call_args_list
-    assert len(calls) == 4  # git config check, git add, git commit
-    # Check the git commit call
-    git_commit_call = calls[3][0][0]
-    expected_commit_message = (
-        'Fix issue #42: Issue with "quotes" and $pecial characters'
-    )
-    assert [
-        'git',
-        '-C',
-        '/path/to/repo',
-        'commit',
-        '-m',
-        expected_commit_message,
-    ] == git_commit_call
-@patch('subprocess.run')
-def test_make_commit_no_changes(mock_subprocess_run):
-    # Setup
-    repo_dir = '/path/to/repo'
-    issue = Issue(
-        owner='test-owner',
-        repo='test-repo',
-        number=42,
-        title='Issue with no changes',
-        body='Test body',
-    )
-    # Mock subprocess.run to simulate no changes in the repo
-    mock_subprocess_run.side_effect = [
-        MagicMock(returncode=0),
-        MagicMock(returncode=0),
-        MagicMock(returncode=1, stdout=''),  # git status --porcelain (no changes)
-    ]
-    with pytest.raises(
-        RuntimeError, match='ERROR: Openhands failed to make code changes.'
-    ):
-        make_commit(repo_dir, issue, 'issue')
-    # Check that subprocess.run was called for checking git status and add, but not commit
-    assert mock_subprocess_run.call_count == 3
-    git_status_call = mock_subprocess_run.call_args_list[2][0][0]
-    assert f'git -C {repo_dir} status --porcelain' in git_status_call
-def test_apply_patch_rename_directory(mock_output_dir):
-    # Create a sample directory structure
-    old_dir = os.path.join(mock_output_dir, 'prompts', 'resolve')
-    os.makedirs(old_dir)
-    # Create test files
-    test_files = [
-        'issue-success-check.jinja',
-        'pr-feedback-check.jinja',
-        'pr-thread-check.jinja',
-    ]
-    for filename in test_files:
-        file_path = os.path.join(old_dir, filename)
-        with open(file_path, 'w') as f:
-            f.write(f'Content of {filename}')
-    # Create a patch that renames the directory
-    patch_content = """diff --git a/prompts/resolve/issue-success-check.jinja b/prompts/guess_success/issue-success-check.jinja
-similarity index 100%
-rename from prompts/resolve/issue-success-check.jinja
-rename to prompts/guess_success/issue-success-check.jinja
-diff --git a/prompts/resolve/pr-feedback-check.jinja b/prompts/guess_success/pr-feedback-check.jinja
-similarity index 100%
-rename from prompts/resolve/pr-feedback-check.jinja
-rename to prompts/guess_success/pr-feedback-check.jinja
-diff --git a/prompts/resolve/pr-thread-check.jinja b/prompts/guess_success/pr-thread-check.jinja
-similarity index 100%
-rename from prompts/resolve/pr-thread-check.jinja
-rename to prompts/guess_success/pr-thread-check.jinja"""
-    # Apply the patch
-    apply_patch(mock_output_dir, patch_content)
-    # Check if files were moved correctly
-    new_dir = os.path.join(mock_output_dir, 'prompts', 'guess_success')
-    assert not os.path.exists(old_dir), 'Old directory still exists'
-    assert os.path.exists(new_dir), 'New directory was not created'
-    # Check if all files were moved and content preserved
-    for filename in test_files:
-        old_path = os.path.join(old_dir, filename)
-        new_path = os.path.join(new_dir, filename)
-        assert not os.path.exists(old_path), f'Old file {filename} still exists'
-        assert os.path.exists(new_path), f'New file {filename} was not created'
-        with open(new_path, 'r') as f:
-            content = f.read()
-        assert content == f'Content of {filename}', f'Content mismatch for {filename}'

tests/unit/resolver/gitlab/test_gitlab_guess_success.py DELETED Viewed

@@ -1,202 +0,0 @@
-import json
-from unittest.mock import MagicMock, patch
-from openhands.core.config import LLMConfig
-from openhands.events.action.message import MessageAction
-from openhands.llm import LLM
-from openhands.resolver.interfaces.gitlab import GitlabIssueHandler, GitlabPRHandler
-from openhands.resolver.interfaces.issue import Issue
-from openhands.resolver.interfaces.issue_definitions import (
-    ServiceContextIssue,
-    ServiceContextPR,
-)
-def test_guess_success_multiline_explanation():
-    # Mock data
-    issue = Issue(
-        owner='test',
-        repo='test',
-        number=1,
-        title='Test Issue',
-        body='Test body',
-        thread_comments=None,
-        review_comments=None,
-    )
-    history = [MessageAction(content='Test message')]
-    llm_config = LLMConfig(model='test', api_key='test')
-    # Create a mock response with multi-line explanation
-    mock_response = MagicMock()
-    mock_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content="""--- success
-true
---- explanation
-The PR successfully addressed the issue by:
-- Fixed bug A
-- Added test B
-- Updated documentation C
-Automatic fix generated by OpenHands 🙌"""
-            )
-        )
-    ]
-    # Use patch to mock the LLM completion call
-    with patch.object(LLM, 'completion', return_value=mock_response) as mock_completion:
-        # Create a handler instance
-        handler = ServiceContextIssue(
-            GitlabIssueHandler('test', 'test', 'test'), llm_config
-        )
-        # Call guess_success
-        success, _, explanation = handler.guess_success(issue, history)
-        # Verify the results
-        assert success is True
-        assert 'The PR successfully addressed the issue by:' in explanation
-        assert 'Fixed bug A' in explanation
-        assert 'Added test B' in explanation
-        assert 'Updated documentation C' in explanation
-        assert 'Automatic fix generated by OpenHands' in explanation
-        # Verify that LLM completion was called exactly once
-        mock_completion.assert_called_once()
-def test_pr_handler_guess_success_with_thread_comments():
-    # Create a PR handler instance
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(GitlabPRHandler('test', 'test', 'test'), llm_config)
-    # Create a mock issue with thread comments but no review comments
-    issue = Issue(
-        owner='test-owner',
-        repo='test-repo',
-        number=1,
-        title='Test PR',
-        body='Test Body',
-        thread_comments=['First comment', 'Second comment'],
-        closing_issues=['Issue description'],
-        review_comments=None,
-        thread_ids=None,
-        head_branch='test-branch',
-    )
-    # Create mock history
-    history = [MessageAction(content='Fixed the issue by implementing X and Y')]
-    # Create mock LLM config
-    llm_config = LLMConfig(model='test-model', api_key='test-key')
-    # Mock the LLM response
-    mock_response = MagicMock()
-    mock_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content="""--- success
-true
---- explanation
-The changes successfully address the feedback."""
-            )
-        )
-    ]
-    # Test the guess_success method
-    with patch.object(LLM, 'completion', return_value=mock_response):
-        success, success_list, explanation = handler.guess_success(issue, history)
-        # Verify the results
-        assert success is True
-        assert success_list == [True]
-        assert 'successfully address' in explanation
-        assert len(json.loads(explanation)) == 1
-def test_pr_handler_guess_success_only_review_comments():
-    # Create a PR handler instance
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Create a mock issue with only review comments
-    issue = Issue(
-        owner='test-owner',
-        repo='test-repo',
-        number=1,
-        title='Test PR',
-        body='Test Body',
-        thread_comments=None,
-        closing_issues=['Issue description'],
-        review_comments=['Please fix the formatting', 'Add more tests'],
-        thread_ids=None,
-        head_branch='test-branch',
-    )
-    # Create mock history
-    history = [MessageAction(content='Fixed the formatting and added more tests')]
-    # Create mock LLM config
-    llm_config = LLMConfig(model='test-model', api_key='test-key')
-    # Mock the LLM response
-    mock_response = MagicMock()
-    mock_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content="""--- success
-true
---- explanation
-The changes successfully address the review comments."""
-            )
-        )
-    ]
-    # Test the guess_success method
-    with patch.object(LLM, 'completion', return_value=mock_response):
-        success, success_list, explanation = handler.guess_success(issue, history)
-        # Verify the results
-        assert success is True
-        assert success_list == [True]
-        assert (
-            '["The changes successfully address the review comments."]' in explanation
-        )
-def test_pr_handler_guess_success_no_comments():
-    # Create a PR handler instance
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(GitlabPRHandler('test', 'test', 'test'), llm_config)
-    # Create a mock issue with no comments
-    issue = Issue(
-        owner='test-owner',
-        repo='test-repo',
-        number=1,
-        title='Test PR',
-        body='Test Body',
-        thread_comments=None,
-        closing_issues=['Issue description'],
-        review_comments=None,
-        thread_ids=None,
-        head_branch='test-branch',
-    )
-    # Create mock history
-    history = [MessageAction(content='Fixed the issue')]
-    # Create mock LLM config
-    llm_config = LLMConfig(model='test-model', api_key='test-key')
-    # Test that it returns appropriate message when no comments are present
-    success, success_list, explanation = handler.guess_success(issue, history)
-    assert success is False
-    assert success_list is None
-    assert explanation == 'No feedback was found to process'

tests/unit/resolver/gitlab/test_gitlab_issue_handler.py DELETED Viewed

@@ -1,683 +0,0 @@
-from unittest.mock import MagicMock, patch
-from openhands.core.config import LLMConfig
-from openhands.resolver.interfaces.gitlab import GitlabIssueHandler, GitlabPRHandler
-from openhands.resolver.interfaces.issue import ReviewThread
-from openhands.resolver.interfaces.issue_definitions import (
-    ServiceContextIssue,
-    ServiceContextPR,
-)
-def test_get_converted_issues_initializes_review_comments():
-    # Mock the necessary dependencies
-    with patch('httpx.get') as mock_get:
-        # Mock the response for issues
-        mock_issues_response = MagicMock()
-        mock_issues_response.json.return_value = [
-            {'iid': 1, 'title': 'Test Issue', 'description': 'Test Body'}
-        ]
-        # Mock the response for comments
-        mock_comments_response = MagicMock()
-        mock_comments_response.json.return_value = []
-        # Set up the mock to return different responses for different calls
-        # First call is for issues, second call is for comments
-        mock_get.side_effect = [
-            mock_issues_response,
-            mock_comments_response,
-            mock_comments_response,
-        ]  # Need two comment responses because we make two API calls
-        # Create an instance of IssueHandler
-        llm_config = LLMConfig(model='test', api_key='test')
-        handler = ServiceContextIssue(
-            GitlabIssueHandler('test-owner', 'test-repo', 'test-token'), llm_config
-        )
-        # Get converted issues
-        issues = handler.get_converted_issues(issue_numbers=[1])
-        # Verify that we got exactly one issue
-        assert len(issues) == 1
-        # Verify that review_comments is initialized as None
-        assert issues[0].review_comments is None
-        # Verify other fields are set correctly
-        assert issues[0].number == 1
-        assert issues[0].title == 'Test Issue'
-        assert issues[0].body == 'Test Body'
-        assert issues[0].owner == 'test-owner'
-        assert issues[0].repo == 'test-repo'
-def test_get_converted_issues_handles_empty_body():
-    # Mock the necessary dependencies
-    with patch('httpx.get') as mock_get:
-        # Mock the response for issues
-        mock_issues_response = MagicMock()
-        mock_issues_response.json.return_value = [
-            {'iid': 1, 'title': 'Test Issue', 'description': None}
-        ]
-        # Mock the response for comments
-        mock_comments_response = MagicMock()
-        mock_comments_response.json.return_value = []
-        # Set up the mock to return different responses
-        mock_get.side_effect = [
-            mock_issues_response,
-            mock_comments_response,
-            mock_comments_response,
-        ]
-        # Create an instance of IssueHandler
-        llm_config = LLMConfig(model='test', api_key='test')
-        handler = ServiceContextIssue(
-            GitlabIssueHandler('test-owner', 'test-repo', 'test-token'), llm_config
-        )
-        # Get converted issues
-        issues = handler.get_converted_issues(issue_numbers=[1])
-        # Verify that we got exactly one issue
-        assert len(issues) == 1
-        # Verify that body is empty string when None
-        assert issues[0].body == ''
-        # Verify other fields are set correctly
-        assert issues[0].number == 1
-        assert issues[0].title == 'Test Issue'
-        assert issues[0].owner == 'test-owner'
-        assert issues[0].repo == 'test-repo'
-        # Verify that review_comments is initialized as None
-        assert issues[0].review_comments is None
-def test_pr_handler_get_converted_issues_with_comments():
-    # Mock the necessary dependencies
-    with patch('httpx.get') as mock_get:
-        # Mock the response for PRs
-        mock_prs_response = MagicMock()
-        mock_prs_response.json.return_value = [
-            {
-                'iid': 1,
-                'title': 'Test PR',
-                'description': 'Test Body fixes #1',
-                'source_branch': 'test-branch',
-            }
-        ]
-        # Mock the response for PR comments
-        mock_comments_response = MagicMock()
-        mock_comments_response.json.return_value = [
-            {'body': 'First comment', 'resolvable': True, 'system': False},
-            {'body': 'Second comment', 'resolvable': True, 'system': False},
-        ]
-        # Mock the response for PR metadata (GraphQL)
-        mock_graphql_response = MagicMock()
-        mock_graphql_response.json.return_value = {
-            'data': {
-                'project': {
-                    'mergeRequest': {
-                        'discussions': {'edges': []},
-                    }
-                }
-            }
-        }
-        # Set up the mock to return different responses
-        # We need to return empty responses for subsequent pages
-        mock_empty_response = MagicMock()
-        mock_empty_response.json.return_value = []
-        # Mock the response for fetching the external issue referenced in PR body
-        mock_external_issue_response = MagicMock()
-        mock_external_issue_response.json.return_value = {
-            'description': 'This is additional context from an externally referenced issue.'
-        }
-        mock_get.side_effect = [
-            mock_prs_response,  # First call for PRs
-            mock_empty_response,  # Second call for PRs (empty page)
-            mock_empty_response,  # Third call for related issues
-            mock_comments_response,  # Fourth call for PR comments
-            mock_empty_response,  # Fifth call for PR comments (empty page)
-            mock_external_issue_response,  # Mock response for the external issue reference #1
-        ]
-        # Mock the post request for GraphQL
-        with patch('httpx.post') as mock_post:
-            mock_post.return_value = mock_graphql_response
-            # Create an instance of PRHandler
-            llm_config = LLMConfig(model='test', api_key='test')
-            handler = ServiceContextPR(
-                GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-            )
-            # Get converted issues
-            prs = handler.get_converted_issues(issue_numbers=[1])
-            # Verify that we got exactly one PR
-            assert len(prs) == 1
-            # Verify that thread_comments are set correctly
-            assert prs[0].thread_comments == ['First comment', 'Second comment']
-            # Verify other fields are set correctly
-            assert prs[0].number == 1
-            assert prs[0].title == 'Test PR'
-            assert prs[0].body == 'Test Body fixes #1'
-            assert prs[0].owner == 'test-owner'
-            assert prs[0].repo == 'test-repo'
-            assert prs[0].head_branch == 'test-branch'
-            assert prs[0].closing_issues == [
-                'This is additional context from an externally referenced issue.'
-            ]
-def test_get_issue_comments_with_specific_comment_id():
-    # Mock the necessary dependencies
-    with patch('httpx.get') as mock_get:
-        # Mock the response for comments
-        mock_comments_response = MagicMock()
-        mock_comments_response.json.return_value = [
-            {'id': 123, 'body': 'First comment', 'resolvable': True, 'system': False},
-            {'id': 456, 'body': 'Second comment', 'resolvable': True, 'system': False},
-        ]
-        mock_get.return_value = mock_comments_response
-        # Create an instance of IssueHandler
-        llm_config = LLMConfig(model='test', api_key='test')
-        handler = ServiceContextIssue(
-            GitlabIssueHandler('test-owner', 'test-repo', 'test-token'), llm_config
-        )
-        # Get comments with a specific comment_id
-        specific_comment = handler.get_issue_comments(issue_number=1, comment_id=123)
-        # Verify only the specific comment is returned
-        assert specific_comment == ['First comment']
-def test_pr_handler_get_converted_issues_with_specific_thread_comment():
-    # Define the specific comment_id to filter
-    specific_comment_id = 123
-    # Mock GraphQL response for review threads
-    with patch('httpx.get') as mock_get:
-        # Mock the response for PRs
-        mock_prs_response = MagicMock()
-        mock_prs_response.json.return_value = [
-            {
-                'iid': 1,
-                'title': 'Test PR',
-                'description': 'Test Body',
-                'source_branch': 'test-branch',
-            }
-        ]
-        # Mock the response for PR comments
-        mock_comments_response = MagicMock()
-        mock_comments_response.json.return_value = [
-            {'body': 'First comment', 'id': 123, 'resolvable': True, 'system': False},
-            {'body': 'Second comment', 'id': 124, 'resolvable': True, 'system': False},
-        ]
-        # Mock the response for PR metadata (GraphQL)
-        mock_graphql_response = MagicMock()
-        mock_graphql_response.json.return_value = {
-            'data': {
-                'project': {
-                    'mergeRequest': {
-                        'discussions': {
-                            'edges': [
-                                {
-                                    'node': {
-                                        'id': 'review-thread-1',
-                                        'resolved': False,
-                                        'resolvable': True,
-                                        'notes': {
-                                            'nodes': [
-                                                {
-                                                    'id': 'GID/121',
-                                                    'body': 'Specific review comment',
-                                                    'position': {
-                                                        'filePath': 'file1.txt',
-                                                    },
-                                                },
-                                                {
-                                                    'id': 'GID/456',
-                                                    'body': 'Another review comment',
-                                                    'position': {
-                                                        'filePath': 'file2.txt',
-                                                    },
-                                                },
-                                            ]
-                                        },
-                                    }
-                                }
-                            ]
-                        },
-                    }
-                }
-            }
-        }
-        # Set up the mock to return different responses
-        # We need to return empty responses for subsequent pages
-        mock_empty_response = MagicMock()
-        mock_empty_response.json.return_value = []
-        mock_get.side_effect = [
-            mock_prs_response,  # First call for PRs
-            mock_empty_response,  # Second call for PRs (empty page)
-            mock_empty_response,  # Third call for related issues
-            mock_comments_response,  # Fourth call for PR comments
-            mock_empty_response,  # Fifth call for PR comments (empty page)
-        ]
-        # Mock the post request for GraphQL
-        with patch('httpx.post') as mock_post:
-            mock_post.return_value = mock_graphql_response
-            # Create an instance of PRHandler
-            llm_config = LLMConfig(model='test', api_key='test')
-            handler = ServiceContextPR(
-                GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-            )
-            # Get converted issues
-            prs = handler.get_converted_issues(
-                issue_numbers=[1], comment_id=specific_comment_id
-            )
-            # Verify that we got exactly one PR
-            assert len(prs) == 1
-            # Verify that thread_comments are set correctly
-            assert prs[0].thread_comments == ['First comment']
-            assert prs[0].review_comments is None
-            assert prs[0].review_threads == []
-            # Verify other fields are set correctly
-            assert prs[0].number == 1
-            assert prs[0].title == 'Test PR'
-            assert prs[0].body == 'Test Body'
-            assert prs[0].owner == 'test-owner'
-            assert prs[0].repo == 'test-repo'
-            assert prs[0].head_branch == 'test-branch'
-def test_pr_handler_get_converted_issues_with_specific_review_thread_comment():
-    # Define the specific comment_id to filter
-    specific_comment_id = 123
-    # Mock GraphQL response for review threads
-    with patch('httpx.get') as mock_get:
-        # Mock the response for PRs
-        mock_prs_response = MagicMock()
-        mock_prs_response.json.return_value = [
-            {
-                'iid': 1,
-                'title': 'Test PR',
-                'description': 'Test Body',
-                'source_branch': 'test-branch',
-            }
-        ]
-        # Mock the response for PR comments
-        mock_comments_response = MagicMock()
-        mock_comments_response.json.return_value = [
-            {
-                'description': 'First comment',
-                'id': 120,
-                'resolvable': True,
-                'system': False,
-            },
-            {
-                'description': 'Second comment',
-                'id': 124,
-                'resolvable': True,
-                'system': False,
-            },
-        ]
-        # Mock the response for PR metadata (GraphQL)
-        mock_graphql_response = MagicMock()
-        mock_graphql_response.json.return_value = {
-            'data': {
-                'project': {
-                    'mergeRequest': {
-                        'discussions': {
-                            'edges': [
-                                {
-                                    'node': {
-                                        'id': 'review-thread-1',
-                                        'resolved': False,
-                                        'resolvable': True,
-                                        'notes': {
-                                            'nodes': [
-                                                {
-                                                    'id': f'GID/{specific_comment_id}',
-                                                    'body': 'Specific review comment',
-                                                    'position': {
-                                                        'filePath': 'file1.txt',
-                                                    },
-                                                },
-                                                {
-                                                    'id': 'GID/456',
-                                                    'body': 'Another review comment',
-                                                    'position': {
-                                                        'filePath': 'file1.txt',
-                                                    },
-                                                },
-                                            ]
-                                        },
-                                    }
-                                }
-                            ]
-                        },
-                    }
-                }
-            }
-        }
-        # Set up the mock to return different responses
-        # We need to return empty responses for subsequent pages
-        mock_empty_response = MagicMock()
-        mock_empty_response.json.return_value = []
-        mock_get.side_effect = [
-            mock_prs_response,  # First call for PRs
-            mock_empty_response,  # Second call for PRs (empty page)
-            mock_empty_response,  # Third call for related issues
-            mock_comments_response,  # Fourth call for PR comments
-            mock_empty_response,  # Fifth call for PR comments (empty page)
-        ]
-        # Mock the post request for GraphQL
-        with patch('httpx.post') as mock_post:
-            mock_post.return_value = mock_graphql_response
-            # Create an instance of PRHandler
-            llm_config = LLMConfig(model='test', api_key='test')
-            handler = ServiceContextPR(
-                GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-            )
-            # Get converted issues
-            prs = handler.get_converted_issues(
-                issue_numbers=[1], comment_id=specific_comment_id
-            )
-            # Verify that we got exactly one PR
-            assert len(prs) == 1
-            # Verify that thread_comments are set correctly
-            assert prs[0].thread_comments is None
-            assert prs[0].review_comments is None
-            assert len(prs[0].review_threads) == 1
-            assert isinstance(prs[0].review_threads[0], ReviewThread)
-            assert (
-                prs[0].review_threads[0].comment
-                == 'Specific review comment\n---\nlatest feedback:\nAnother review comment\n'
-            )
-            assert prs[0].review_threads[0].files == ['file1.txt']
-            # Verify other fields are set correctly
-            assert prs[0].number == 1
-            assert prs[0].title == 'Test PR'
-            assert prs[0].body == 'Test Body'
-            assert prs[0].owner == 'test-owner'
-            assert prs[0].repo == 'test-repo'
-            assert prs[0].head_branch == 'test-branch'
-def test_pr_handler_get_converted_issues_with_specific_comment_and_issue_refs():
-    # Define the specific comment_id to filter
-    specific_comment_id = 123
-    # Mock GraphQL response for review threads
-    with patch('httpx.get') as mock_get:
-        # Mock the response for PRs
-        mock_prs_response = MagicMock()
-        mock_prs_response.json.return_value = [
-            {
-                'iid': 1,
-                'title': 'Test PR fixes #3',
-                'description': 'Test Body',
-                'source_branch': 'test-branch',
-            }
-        ]
-        # Mock the response for PR comments
-        mock_comments_response = MagicMock()
-        mock_comments_response.json.return_value = [
-            {
-                'description': 'First comment',
-                'id': 120,
-                'resolvable': True,
-                'system': False,
-            },
-            {
-                'description': 'Second comment',
-                'id': 124,
-                'resolvable': True,
-                'system': False,
-            },
-        ]
-        # Mock the response for PR metadata (GraphQL)
-        mock_graphql_response = MagicMock()
-        mock_graphql_response.json.return_value = {
-            'data': {
-                'project': {
-                    'mergeRequest': {
-                        'discussions': {
-                            'edges': [
-                                {
-                                    'node': {
-                                        'id': 'review-thread-1',
-                                        'resolved': False,
-                                        'resolvable': True,
-                                        'notes': {
-                                            'nodes': [
-                                                {
-                                                    'id': f'GID/{specific_comment_id}',
-                                                    'body': 'Specific review comment that references #6',
-                                                    'position': {
-                                                        'filePath': 'file1.txt',
-                                                    },
-                                                },
-                                                {
-                                                    'id': 'GID/456',
-                                                    'body': 'Another review comment referencing #7',
-                                                    'position': {
-                                                        'filePath': 'file2.txt',
-                                                    },
-                                                },
-                                            ]
-                                        },
-                                    }
-                                }
-                            ]
-                        },
-                    }
-                }
-            }
-        }
-        # Set up the mock to return different responses
-        # We need to return empty responses for subsequent pages
-        mock_empty_response = MagicMock()
-        mock_empty_response.json.return_value = []
-        # Mock the response for fetching the external issue referenced in PR body
-        mock_external_issue_response_in_body = MagicMock()
-        mock_external_issue_response_in_body.json.return_value = {
-            'description': 'External context #1.'
-        }
-        # Mock the response for fetching the external issue referenced in review thread
-        mock_external_issue_response_review_thread = MagicMock()
-        mock_external_issue_response_review_thread.json.return_value = {
-            'description': 'External context #2.'
-        }
-        mock_get.side_effect = [
-            mock_prs_response,  # First call for PRs
-            mock_empty_response,  # Second call for PRs (empty page)
-            mock_empty_response,  # Third call for related issues
-            mock_comments_response,  # Fourth call for PR comments
-            mock_empty_response,  # Fifth call for PR comments (empty page)
-            mock_external_issue_response_in_body,
-            mock_external_issue_response_review_thread,
-        ]
-        # Mock the post request for GraphQL
-        with patch('httpx.post') as mock_post:
-            mock_post.return_value = mock_graphql_response
-            # Create an instance of PRHandler
-            llm_config = LLMConfig(model='test', api_key='test')
-            handler = ServiceContextPR(
-                GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-            )
-            # Get converted issues
-            prs = handler.get_converted_issues(
-                issue_numbers=[1], comment_id=specific_comment_id
-            )
-            # Verify that we got exactly one PR
-            assert len(prs) == 1
-            # Verify that thread_comments are set correctly
-            assert prs[0].thread_comments is None
-            assert prs[0].review_comments is None
-            assert len(prs[0].review_threads) == 1
-            assert isinstance(prs[0].review_threads[0], ReviewThread)
-            assert (
-                prs[0].review_threads[0].comment
-                == 'Specific review comment that references #6\n---\nlatest feedback:\nAnother review comment referencing #7\n'
-            )
-            assert prs[0].closing_issues == [
-                'External context #1.',
-                'External context #2.',
-            ]  # Only includes references inside comment ID and body PR
-            # Verify other fields are set correctly
-            assert prs[0].number == 1
-            assert prs[0].title == 'Test PR fixes #3'
-            assert prs[0].body == 'Test Body'
-            assert prs[0].owner == 'test-owner'
-            assert prs[0].repo == 'test-repo'
-            assert prs[0].head_branch == 'test-branch'
-def test_pr_handler_get_converted_issues_with_duplicate_issue_refs():
-    # Mock the necessary dependencies
-    with patch('httpx.get') as mock_get:
-        # Mock the response for PRs
-        mock_prs_response = MagicMock()
-        mock_prs_response.json.return_value = [
-            {
-                'iid': 1,
-                'title': 'Test PR',
-                'description': 'Test Body fixes #1',
-                'source_branch': 'test-branch',
-            }
-        ]
-        # Mock the response for PR comments
-        mock_comments_response = MagicMock()
-        mock_comments_response.json.return_value = [
-            {
-                'body': 'First comment addressing #1',
-                'resolvable': True,
-                'system': False,
-            },
-            {
-                'body': 'Second comment addressing #2',
-                'resolvable': True,
-                'system': False,
-            },
-        ]
-        # Mock the response for PR metadata (GraphQL)
-        mock_graphql_response = MagicMock()
-        mock_graphql_response.json.return_value = {
-            'data': {
-                'project': {
-                    'mergeRequest': {
-                        'discussions': {'edges': []},
-                    }
-                }
-            }
-        }
-        # Set up the mock to return different responses
-        # We need to return empty responses for subsequent pages
-        mock_empty_response = MagicMock()
-        mock_empty_response.json.return_value = []
-        # Mock the response for fetching the external issue referenced in PR body
-        mock_external_issue_response_in_body = MagicMock()
-        mock_external_issue_response_in_body.json.return_value = {
-            'description': 'External context #1.'
-        }
-        # Mock the response for fetching the external issue referenced in review thread
-        mock_external_issue_response_in_comment = MagicMock()
-        mock_external_issue_response_in_comment.json.return_value = {
-            'description': 'External context #2.'
-        }
-        mock_get.side_effect = [
-            mock_prs_response,  # First call for PRs
-            mock_empty_response,  # Second call for PRs (empty page)
-            mock_empty_response,  # Third call for related issues
-            mock_comments_response,  # Fourth call for PR comments
-            mock_empty_response,  # Fifth call for PR comments (empty page)
-            mock_external_issue_response_in_body,  # Mock response for the external issue reference #1
-            mock_external_issue_response_in_comment,
-        ]
-        # Mock the post request for GraphQL
-        with patch('httpx.post') as mock_post:
-            mock_post.return_value = mock_graphql_response
-            # Create an instance of PRHandler
-            llm_config = LLMConfig(model='test', api_key='test')
-            handler = ServiceContextPR(
-                GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-            )
-            # Get converted issues
-            prs = handler.get_converted_issues(issue_numbers=[1])
-            # Verify that we got exactly one PR
-            assert len(prs) == 1
-            # Verify that thread_comments are set correctly
-            assert prs[0].thread_comments == [
-                'First comment addressing #1',
-                'Second comment addressing #2',
-            ]
-            # Verify other fields are set correctly
-            assert prs[0].number == 1
-            assert prs[0].title == 'Test PR'
-            assert prs[0].body == 'Test Body fixes #1'
-            assert prs[0].owner == 'test-owner'
-            assert prs[0].repo == 'test-repo'
-            assert prs[0].head_branch == 'test-branch'
-            assert prs[0].closing_issues == [
-                'External context #1.',
-                'External context #2.',
-            ]

tests/unit/resolver/gitlab/test_gitlab_issue_handler_error_handling.py DELETED Viewed

@@ -1,283 +0,0 @@
-from unittest.mock import MagicMock, patch
-import httpx
-import pytest
-from litellm.exceptions import RateLimitError
-from openhands.core.config import LLMConfig
-from openhands.events.action.message import MessageAction
-from openhands.llm.llm import LLM
-from openhands.resolver.interfaces.gitlab import GitlabIssueHandler, GitlabPRHandler
-from openhands.resolver.interfaces.issue import Issue
-from openhands.resolver.interfaces.issue_definitions import (
-    ServiceContextIssue,
-    ServiceContextPR,
-)
-@pytest.fixture(autouse=True)
-def mock_logger(monkeypatch):
-    # suppress logging of completion data to file
-    mock_logger = MagicMock()
-    monkeypatch.setattr('openhands.llm.debug_mixin.llm_prompt_logger', mock_logger)
-    monkeypatch.setattr('openhands.llm.debug_mixin.llm_response_logger', mock_logger)
-    return mock_logger
-@pytest.fixture
-def default_config():
-    return LLMConfig(
-        model='gpt-4o',
-        api_key='test_key',
-        num_retries=2,
-        retry_min_wait=1,
-        retry_max_wait=2,
-    )
-def test_handle_nonexistent_issue_reference():
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Mock the requests.get to simulate a 404 error
-    mock_response = MagicMock()
-    mock_response.raise_for_status.side_effect = httpx.HTTPError(
-        '404 Client Error: Not Found'
-    )
-    with patch('httpx.get', return_value=mock_response):
-        # Call the method with a non-existent issue reference
-        result = handler._strategy.get_context_from_external_issues_references(
-            closing_issues=[],
-            closing_issue_numbers=[],
-            issue_body='This references #999999',  # Non-existent issue
-            review_comments=[],
-            review_threads=[],
-            thread_comments=None,
-        )
-        # The method should return an empty list since the referenced issue couldn't be fetched
-        assert result == []
-def test_handle_rate_limit_error():
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Mock the requests.get to simulate a rate limit error
-    mock_response = MagicMock()
-    mock_response.raise_for_status.side_effect = httpx.HTTPError(
-        '403 Client Error: Rate Limit Exceeded'
-    )
-    with patch('httpx.get', return_value=mock_response):
-        # Call the method with an issue reference
-        result = handler._strategy.get_context_from_external_issues_references(
-            closing_issues=[],
-            closing_issue_numbers=[],
-            issue_body='This references #123',
-            review_comments=[],
-            review_threads=[],
-            thread_comments=None,
-        )
-        # The method should return an empty list since the request was rate limited
-        assert result == []
-def test_handle_network_error():
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Mock the requests.get to simulate a network error
-    with patch('httpx.get', side_effect=httpx.NetworkError('Network Error')):
-        # Call the method with an issue reference
-        result = handler._strategy.get_context_from_external_issues_references(
-            closing_issues=[],
-            closing_issue_numbers=[],
-            issue_body='This references #123',
-            review_comments=[],
-            review_threads=[],
-            thread_comments=None,
-        )
-        # The method should return an empty list since the network request failed
-        assert result == []
-def test_successful_issue_reference():
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Mock a successful response
-    mock_response = MagicMock()
-    mock_response.raise_for_status.return_value = None
-    mock_response.json.return_value = {
-        'description': 'This is the referenced issue body'
-    }
-    with patch('httpx.get', return_value=mock_response):
-        # Call the method with an issue reference
-        result = handler._strategy.get_context_from_external_issues_references(
-            closing_issues=[],
-            closing_issue_numbers=[],
-            issue_body='This references #123',
-            review_comments=[],
-            review_threads=[],
-            thread_comments=None,
-        )
-        # The method should return a list with the referenced issue body
-        assert result == ['This is the referenced issue body']
-class MockLLMResponse:
-    """Mock LLM Response class to mimic the actual LLM response structure."""
-    class Choice:
-        class Message:
-            def __init__(self, content):
-                self.content = content
-        def __init__(self, content):
-            self.message = self.Message(content)
-    def __init__(self, content):
-        self.choices = [self.Choice(content)]
-class DotDict(dict):
-    """
-    A dictionary that supports dot notation access.
-    """
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        for key, value in self.items():
-            if isinstance(value, dict):
-                self[key] = DotDict(value)
-            elif isinstance(value, list):
-                self[key] = [
-                    DotDict(item) if isinstance(item, dict) else item for item in value
-                ]
-    def __getattr__(self, key):
-        if key in self:
-            return self[key]
-        else:
-            raise AttributeError(
-                f"'{self.__class__.__name__}' object has no attribute '{key}'"
-            )
-    def __setattr__(self, key, value):
-        self[key] = value
-    def __delattr__(self, key):
-        if key in self:
-            del self[key]
-        else:
-            raise AttributeError(
-                f"'{self.__class__.__name__}' object has no attribute '{key}'"
-            )
-@patch('openhands.llm.llm.litellm_completion')
-def test_guess_success_rate_limit_wait_time(mock_litellm_completion, default_config):
-    """Test that the retry mechanism in guess_success respects wait time between retries."""
-    with patch('time.sleep') as mock_sleep:
-        # Simulate a rate limit error followed by a successful response
-        mock_litellm_completion.side_effect = [
-            RateLimitError(
-                'Rate limit exceeded', llm_provider='test_provider', model='test_model'
-            ),
-            DotDict(
-                {
-                    'choices': [
-                        {
-                            'message': {
-                                'content': '--- success\ntrue\n--- explanation\nRetry successful'
-                            }
-                        }
-                    ]
-                }
-            ),
-        ]
-        llm = LLM(config=default_config)
-        handler = ServiceContextIssue(
-            GitlabIssueHandler('test-owner', 'test-repo', 'test-token'), default_config
-        )
-        handler.llm = llm
-        # Mock issue and history
-        issue = Issue(
-            owner='test-owner',
-            repo='test-repo',
-            number=1,
-            title='Test Issue',
-            body='This is a test issue.',
-            thread_comments=['Please improve error handling'],
-        )
-        history = [MessageAction(content='Fixed error handling.')]
-        # Call guess_success
-        success, _, explanation = handler.guess_success(issue, history)
-        # Assertions
-        assert success is True
-        assert explanation == 'Retry successful'
-        assert mock_litellm_completion.call_count == 2  # Two attempts made
-        mock_sleep.assert_called_once()  # Sleep called once between retries
-        # Validate wait time
-        wait_time = mock_sleep.call_args[0][0]
-        assert (
-            default_config.retry_min_wait <= wait_time <= default_config.retry_max_wait
-        ), (
-            f'Expected wait time between {default_config.retry_min_wait} and {default_config.retry_max_wait} seconds, but got {wait_time}'
-        )
-@patch('openhands.llm.llm.litellm_completion')
-def test_guess_success_exhausts_retries(mock_completion, default_config):
-    """Test the retry mechanism in guess_success exhausts retries and raises an error."""
-    # Simulate persistent rate limit errors by always raising RateLimitError
-    mock_completion.side_effect = RateLimitError(
-        'Rate limit exceeded', llm_provider='test_provider', model='test_model'
-    )
-    # Initialize LLM and handler
-    llm = LLM(config=default_config)
-    handler = ServiceContextPR(
-        GitlabPRHandler('test-owner', 'test-repo', 'test-token'), default_config
-    )
-    handler.llm = llm
-    # Mock issue and history
-    issue = Issue(
-        owner='test-owner',
-        repo='test-repo',
-        number=1,
-        title='Test Issue',
-        body='This is a test issue.',
-        thread_comments=['Please improve error handling'],
-    )
-    history = [MessageAction(content='Fixed error handling.')]
-    # Call guess_success and expect it to raise an error after retries
-    with pytest.raises(RateLimitError):
-        handler.guess_success(issue, history)
-    # Assertions
-    assert (
-        mock_completion.call_count == default_config.num_retries
-    )  # Initial call + retries

tests/unit/resolver/gitlab/test_gitlab_pr_handler_guess_success.py DELETED Viewed

@@ -1,672 +0,0 @@
-import json
-from unittest.mock import MagicMock, patch
-import pytest
-from openhands.core.config import LLMConfig
-from openhands.events.action.message import MessageAction
-from openhands.llm.llm import LLM
-from openhands.resolver.interfaces.gitlab import GitlabPRHandler
-from openhands.resolver.interfaces.issue import Issue, ReviewThread
-from openhands.resolver.interfaces.issue_definitions import ServiceContextPR
-@pytest.fixture
-def pr_handler():
-    llm_config = LLMConfig(model='test-model')
-    handler = ServiceContextPR(
-        GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    return handler
-@pytest.fixture
-def mock_llm_success_response():
-    return MagicMock(
-        choices=[
-            MagicMock(
-                message=MagicMock(
-                    content="""--- success
-true
---- explanation
-The changes look good"""
-                )
-            )
-        ]
-    )
-def test_guess_success_review_threads_litellm_call():
-    """Test that the completion() call for review threads contains the expected content."""
-    # Create a PR handler instance
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Create a mock issue with review threads
-    issue = Issue(
-        owner='test-owner',
-        repo='test-repo',
-        number=1,
-        title='Test PR',
-        body='Test Body',
-        thread_comments=None,
-        closing_issues=['Issue 1 description', 'Issue 2 description'],
-        review_comments=None,
-        review_threads=[
-            ReviewThread(
-                comment='Please fix the formatting\n---\nlatest feedback:\nAdd docstrings',
-                files=['/src/file1.py', '/src/file2.py'],
-            ),
-            ReviewThread(
-                comment='Add more tests\n---\nlatest feedback:\nAdd test cases',
-                files=['/tests/test_file.py'],
-            ),
-        ],
-        thread_ids=['1', '2'],
-        head_branch='test-branch',
-    )
-    # Create mock history with a detailed response
-    history = [
-        MessageAction(
-            content="""I have made the following changes:
-1. Fixed formatting in file1.py and file2.py
-2. Added docstrings to all functions
-3. Added test cases in test_file.py"""
-        )
-    ]
-    # Create mock LLM config
-    llm_config = LLMConfig(model='test-model', api_key='test-key')
-    # Mock the LLM response
-    mock_response = MagicMock()
-    mock_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content="""--- success
-true
---- explanation
-The changes successfully address the feedback."""
-            )
-        )
-    ]
-    # Test the guess_success method
-    with patch.object(LLM, 'completion') as mock_completion:
-        mock_completion.return_value = mock_response
-        success, success_list, explanation = handler.guess_success(issue, history)
-        # Verify the completion() calls
-        assert mock_completion.call_count == 2  # One call per review thread
-        # Check first call
-        first_call = mock_completion.call_args_list[0]
-        first_prompt = first_call[1]['messages'][0]['content']
-        assert (
-            'Issue descriptions:\n'
-            + json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
-            in first_prompt
-        )
-        assert (
-            'Feedback:\nPlease fix the formatting\n---\nlatest feedback:\nAdd docstrings'
-            in first_prompt
-        )
-        assert (
-            'Files locations:\n'
-            + json.dumps(['/src/file1.py', '/src/file2.py'], indent=4)
-            in first_prompt
-        )
-        assert 'Last message from AI agent:\n' + history[0].content in first_prompt
-        # Check second call
-        second_call = mock_completion.call_args_list[1]
-        second_prompt = second_call[1]['messages'][0]['content']
-        assert (
-            'Issue descriptions:\n'
-            + json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
-            in second_prompt
-        )
-        assert (
-            'Feedback:\nAdd more tests\n---\nlatest feedback:\nAdd test cases'
-            in second_prompt
-        )
-        assert (
-            'Files locations:\n' + json.dumps(['/tests/test_file.py'], indent=4)
-            in second_prompt
-        )
-        assert 'Last message from AI agent:\n' + history[0].content in second_prompt
-        assert len(json.loads(explanation)) == 2
-def test_guess_success_thread_comments_litellm_call():
-    """Test that the completion() call for thread comments contains the expected content."""
-    # Create a PR handler instance
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Create a mock issue with thread comments
-    issue = Issue(
-        owner='test-owner',
-        repo='test-repo',
-        number=1,
-        title='Test PR',
-        body='Test Body',
-        thread_comments=[
-            'Please improve error handling',
-            'Add input validation',
-            'latest feedback:\nHandle edge cases',
-        ],
-        closing_issues=['Issue 1 description', 'Issue 2 description'],
-        review_comments=None,
-        thread_ids=None,
-        head_branch='test-branch',
-    )
-    # Create mock history with a detailed response
-    history = [
-        MessageAction(
-            content="""I have made the following changes:
-1. Added try/catch blocks for error handling
-2. Added input validation checks
-3. Added handling for edge cases"""
-        )
-    ]
-    # Create mock LLM config
-    llm_config = LLMConfig(model='test-model', api_key='test-key')
-    # Mock the LLM response
-    mock_response = MagicMock()
-    mock_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content="""--- success
-true
---- explanation
-The changes successfully address the feedback."""
-            )
-        )
-    ]
-    # Test the guess_success method
-    with patch.object(LLM, 'completion') as mock_completion:
-        mock_completion.return_value = mock_response
-        success, success_list, explanation = handler.guess_success(issue, history)
-        # Verify the completion() call
-        mock_completion.assert_called_once()
-        call_args = mock_completion.call_args
-        prompt = call_args[1]['messages'][0]['content']
-        # Check prompt content
-        assert (
-            'Issue descriptions:\n'
-            + json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
-            in prompt
-        )
-        assert 'PR Thread Comments:\n' + '\n---\n'.join(issue.thread_comments) in prompt
-        assert 'Last message from AI agent:\n' + history[0].content in prompt
-        assert len(json.loads(explanation)) == 1
-def test_check_feedback_with_llm():
-    """Test the _check_feedback_with_llm helper function."""
-    # Create a PR handler instance
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Test cases for different LLM responses
-    test_cases = [
-        {
-            'response': '--- success\ntrue\n--- explanation\nChanges look good',
-            'expected': (True, 'Changes look good'),
-        },
-        {
-            'response': '--- success\nfalse\n--- explanation\nNot all issues fixed',
-            'expected': (False, 'Not all issues fixed'),
-        },
-        {
-            'response': 'Invalid response format',
-            'expected': (
-                False,
-                'Failed to decode answer from LLM response: Invalid response format',
-            ),
-        },
-        {
-            'response': '--- success\ntrue\n--- explanation\nMultiline\nexplanation\nhere',
-            'expected': (True, 'Multiline\nexplanation\nhere'),
-        },
-    ]
-    for case in test_cases:
-        # Mock the LLM response
-        mock_response = MagicMock()
-        mock_response.choices = [MagicMock(message=MagicMock(content=case['response']))]
-        # Test the function
-        with patch.object(LLM, 'completion', return_value=mock_response):
-            success, explanation = handler._check_feedback_with_llm('test prompt')
-            assert (success, explanation) == case['expected']
-def test_check_review_thread_with_git_patch():
-    """Test that git patch from complete_runtime is included in the prompt."""
-    # Create a PR handler instance
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Create test data
-    review_thread = ReviewThread(
-        comment='Please fix the formatting\n---\nlatest feedback:\nAdd docstrings',
-        files=['/src/file1.py', '/src/file2.py'],
-    )
-    issues_context = json.dumps(
-        ['Issue 1 description', 'Issue 2 description'], indent=4
-    )
-    last_message = 'I have fixed the formatting and added docstrings'
-    git_patch = 'diff --git a/src/file1.py b/src/file1.py\n+"""Added docstring."""\n'
-    # Mock the LLM response
-    mock_response = MagicMock()
-    mock_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content="""--- success
-true
---- explanation
-Changes look good"""
-            )
-        )
-    ]
-    # Test the function
-    with patch.object(LLM, 'completion') as mock_completion:
-        mock_completion.return_value = mock_response
-        success, explanation = handler._check_review_thread(
-            review_thread, issues_context, last_message, git_patch
-        )
-        # Verify the completion() call
-        mock_completion.assert_called_once()
-        call_args = mock_completion.call_args
-        prompt = call_args[1]['messages'][0]['content']
-        # Check prompt content
-        assert 'Issue descriptions:\n' + issues_context in prompt
-        assert 'Feedback:\n' + review_thread.comment in prompt
-        assert (
-            'Files locations:\n' + json.dumps(review_thread.files, indent=4) in prompt
-        )
-        assert 'Last message from AI agent:\n' + last_message in prompt
-        assert 'Changes made (git patch):\n' + git_patch in prompt
-        # Check result
-        assert success is True
-        assert explanation == 'Changes look good'
-def test_check_review_thread():
-    """Test the _check_review_thread helper function."""
-    # Create a PR handler instance
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Create test data
-    review_thread = ReviewThread(
-        comment='Please fix the formatting\n---\nlatest feedback:\nAdd docstrings',
-        files=['/src/file1.py', '/src/file2.py'],
-    )
-    issues_context = json.dumps(
-        ['Issue 1 description', 'Issue 2 description'], indent=4
-    )
-    last_message = 'I have fixed the formatting and added docstrings'
-    # Mock the LLM response
-    mock_response = MagicMock()
-    mock_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content="""--- success
-true
---- explanation
-Changes look good"""
-            )
-        )
-    ]
-    # Test the function
-    with patch.object(LLM, 'completion') as mock_completion:
-        mock_completion.return_value = mock_response
-        success, explanation = handler._check_review_thread(
-            review_thread, issues_context, last_message
-        )
-        # Verify the completion() call
-        mock_completion.assert_called_once()
-        call_args = mock_completion.call_args
-        prompt = call_args[1]['messages'][0]['content']
-        # Check prompt content
-        assert 'Issue descriptions:\n' + issues_context in prompt
-        assert 'Feedback:\n' + review_thread.comment in prompt
-        assert (
-            'Files locations:\n' + json.dumps(review_thread.files, indent=4) in prompt
-        )
-        assert 'Last message from AI agent:\n' + last_message in prompt
-        # Check result
-        assert success is True
-        assert explanation == 'Changes look good'
-def test_check_thread_comments_with_git_patch():
-    """Test that git patch from complete_runtime is included in the prompt."""
-    # Create a PR handler instance
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Create test data
-    thread_comments = [
-        'Please improve error handling',
-        'Add input validation',
-        'latest feedback:\nHandle edge cases',
-    ]
-    issues_context = json.dumps(
-        ['Issue 1 description', 'Issue 2 description'], indent=4
-    )
-    last_message = 'I have added error handling and input validation'
-    git_patch = 'diff --git a/src/file1.py b/src/file1.py\n+try:\n+    validate_input()\n+except ValueError:\n+    handle_error()\n'
-    # Mock the LLM response
-    mock_response = MagicMock()
-    mock_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content="""--- success
-true
---- explanation
-Changes look good"""
-            )
-        )
-    ]
-    # Test the function
-    with patch.object(LLM, 'completion') as mock_completion:
-        mock_completion.return_value = mock_response
-        success, explanation = handler._check_thread_comments(
-            thread_comments, issues_context, last_message, git_patch
-        )
-        # Verify the completion() call
-        mock_completion.assert_called_once()
-        call_args = mock_completion.call_args
-        prompt = call_args[1]['messages'][0]['content']
-        # Check prompt content
-        assert 'Issue descriptions:\n' + issues_context in prompt
-        assert 'PR Thread Comments:\n' + '\n---\n'.join(thread_comments) in prompt
-        assert 'Last message from AI agent:\n' + last_message in prompt
-        assert 'Changes made (git patch):\n' + git_patch in prompt
-        # Check result
-        assert success is True
-        assert explanation == 'Changes look good'
-def test_check_thread_comments():
-    """Test the _check_thread_comments helper function."""
-    # Create a PR handler instance
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Create test data
-    thread_comments = [
-        'Please improve error handling',
-        'Add input validation',
-        'latest feedback:\nHandle edge cases',
-    ]
-    issues_context = json.dumps(
-        ['Issue 1 description', 'Issue 2 description'], indent=4
-    )
-    last_message = 'I have added error handling and input validation'
-    # Mock the LLM response
-    mock_response = MagicMock()
-    mock_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content="""--- success
-true
---- explanation
-Changes look good"""
-            )
-        )
-    ]
-    # Test the function
-    with patch.object(LLM, 'completion') as mock_completion:
-        mock_completion.return_value = mock_response
-        success, explanation = handler._check_thread_comments(
-            thread_comments, issues_context, last_message
-        )
-        # Verify the completion() call
-        mock_completion.assert_called_once()
-        call_args = mock_completion.call_args
-        prompt = call_args[1]['messages'][0]['content']
-        # Check prompt content
-        assert 'Issue descriptions:\n' + issues_context in prompt
-        assert 'PR Thread Comments:\n' + '\n---\n'.join(thread_comments) in prompt
-        assert 'Last message from AI agent:\n' + last_message in prompt
-        # Check result
-        assert success is True
-        assert explanation == 'Changes look good'
-def test_check_review_comments_with_git_patch():
-    """Test that git patch from complete_runtime is included in the prompt."""
-    # Create a PR handler instance
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Create test data
-    review_comments = [
-        'Please fix the code style',
-        'Add more test cases',
-        'latest feedback:\nImprove documentation',
-    ]
-    issues_context = json.dumps(
-        ['Issue 1 description', 'Issue 2 description'], indent=4
-    )
-    last_message = 'I have fixed the code style and added tests'
-    git_patch = 'diff --git a/src/file1.py b/src/file1.py\n+"""This module does X."""\n+def func():\n+    """Do Y."""\n'
-    # Mock the LLM response
-    mock_response = MagicMock()
-    mock_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content="""--- success
-true
---- explanation
-Changes look good"""
-            )
-        )
-    ]
-    # Test the function
-    with patch.object(LLM, 'completion') as mock_completion:
-        mock_completion.return_value = mock_response
-        success, explanation = handler._check_review_comments(
-            review_comments, issues_context, last_message, git_patch
-        )
-        # Verify the completion() call
-        mock_completion.assert_called_once()
-        call_args = mock_completion.call_args
-        prompt = call_args[1]['messages'][0]['content']
-        # Check prompt content
-        assert 'Issue descriptions:\n' + issues_context in prompt
-        assert 'PR Review Comments:\n' + '\n---\n'.join(review_comments) in prompt
-        assert 'Last message from AI agent:\n' + last_message in prompt
-        assert 'Changes made (git patch):\n' + git_patch in prompt
-        # Check result
-        assert success is True
-        assert explanation == 'Changes look good'
-def test_check_review_comments():
-    """Test the _check_review_comments helper function."""
-    # Create a PR handler instance
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Create test data
-    review_comments = [
-        'Please improve code readability',
-        'Add comments to complex functions',
-        'Follow PEP 8 style guide',
-    ]
-    issues_context = json.dumps(
-        ['Issue 1 description', 'Issue 2 description'], indent=4
-    )
-    last_message = 'I have improved code readability and added comments'
-    # Mock the LLM response
-    mock_response = MagicMock()
-    mock_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content="""--- success
-true
---- explanation
-Changes look good"""
-            )
-        )
-    ]
-    # Test the function
-    with patch.object(LLM, 'completion') as mock_completion:
-        mock_completion.return_value = mock_response
-        success, explanation = handler._check_review_comments(
-            review_comments, issues_context, last_message
-        )
-        # Verify the completion() call
-        mock_completion.assert_called_once()
-        call_args = mock_completion.call_args
-        prompt = call_args[1]['messages'][0]['content']
-        # Check prompt content
-        assert 'Issue descriptions:\n' + issues_context in prompt
-        assert 'PR Review Comments:\n' + '\n---\n'.join(review_comments) in prompt
-        assert 'Last message from AI agent:\n' + last_message in prompt
-        # Check result
-        assert success is True
-        assert explanation == 'Changes look good'
-def test_guess_success_review_comments_litellm_call():
-    """Test that the completion() call for review comments contains the expected content."""
-    # Create a PR handler instance
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(
-        GitlabPRHandler('test-owner', 'test-repo', 'test-token'), llm_config
-    )
-    # Create a mock issue with review comments
-    issue = Issue(
-        owner='test-owner',
-        repo='test-repo',
-        number=1,
-        title='Test PR',
-        body='Test Body',
-        thread_comments=None,
-        closing_issues=['Issue 1 description', 'Issue 2 description'],
-        review_comments=[
-            'Please improve code readability',
-            'Add comments to complex functions',
-            'Follow PEP 8 style guide',
-        ],
-        thread_ids=None,
-        head_branch='test-branch',
-    )
-    # Create mock history with a detailed response
-    history = [
-        MessageAction(
-            content="""I have made the following changes:
-1. Improved code readability by breaking down complex functions
-2. Added detailed comments to all complex functions
-3. Fixed code style to follow PEP 8"""
-        )
-    ]
-    # Mock the LLM response
-    mock_response = MagicMock()
-    mock_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content="""--- success
-true
---- explanation
-The changes successfully address the feedback."""
-            )
-        )
-    ]
-    with patch.object(LLM, 'completion') as mock_completion:
-        mock_completion.return_value = mock_response
-        success, success_list, explanation = handler.guess_success(issue, history)
-        # Verify the completion() call
-        mock_completion.assert_called_once()
-        call_args = mock_completion.call_args
-        prompt = call_args[1]['messages'][0]['content']
-        # Check prompt content
-        assert (
-            'Issue descriptions:\n'
-            + json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
-            in prompt
-        )
-        assert 'PR Review Comments:\n' + '\n---\n'.join(issue.review_comments) in prompt
-        assert 'Last message from AI agent:\n' + history[0].content in prompt
-        assert len(json.loads(explanation)) == 1

tests/unit/resolver/gitlab/test_gitlab_pr_title_escaping.py DELETED Viewed

@@ -1,166 +0,0 @@
-import os
-import subprocess
-import tempfile
-from openhands.core.logger import openhands_logger as logger
-from openhands.integrations.service_types import ProviderType
-from openhands.resolver.interfaces.issue import Issue
-from openhands.resolver.send_pull_request import make_commit, send_pull_request
-def test_commit_message_with_quotes():
-    # Create a temporary directory and initialize git repo
-    with tempfile.TemporaryDirectory() as temp_dir:
-        subprocess.run(['git', 'init', temp_dir], check=True)
-        # Create a test file and add it to git
-        test_file = os.path.join(temp_dir, 'test.txt')
-        with open(test_file, 'w') as f:
-            f.write('test content')
-        subprocess.run(['git', '-C', temp_dir, 'add', 'test.txt'], check=True)
-        # Create a test issue with problematic title
-        issue = Issue(
-            owner='test-owner',
-            repo='test-repo',
-            number=123,
-            title="Issue with 'quotes' and \"double quotes\" and <class 'ValueError'>",
-            body='Test body',
-            labels=[],
-            assignees=[],
-            state='open',
-            created_at='2024-01-01T00:00:00Z',
-            updated_at='2024-01-01T00:00:00Z',
-            closed_at=None,
-            head_branch=None,
-            thread_ids=None,
-        )
-        # Make the commit
-        make_commit(temp_dir, issue, 'issue')
-        # Get the commit message
-        result = subprocess.run(
-            ['git', '-C', temp_dir, 'log', '-1', '--pretty=%B'],
-            capture_output=True,
-            text=True,
-            check=True,
-        )
-        commit_msg = result.stdout.strip()
-        # The commit message should contain the quotes without excessive escaping
-        expected = "Fix issue #123: Issue with 'quotes' and \"double quotes\" and <class 'ValueError'>"
-        assert commit_msg == expected, f'Expected: {expected}\nGot: {commit_msg}'
-def test_pr_title_with_quotes(monkeypatch):
-    # Mock httpx.post to avoid actual API calls
-    class MockResponse:
-        def __init__(self, status_code=201):
-            self.status_code = status_code
-            self.text = ''
-        def json(self):
-            return {'html_url': 'https://github.com/test/test/pull/1'}
-        def raise_for_status(self):
-            pass
-    def mock_post(*args, **kwargs):
-        # Verify that the PR title is not over-escaped
-        data = kwargs.get('json', {})
-        title = data.get('title', '')
-        expected = "Fix issue #123: Issue with 'quotes' and \"double quotes\" and <class 'ValueError'>"
-        assert title == expected, (
-            f'PR title was incorrectly escaped.\nExpected: {expected}\nGot: {title}'
-        )
-        return MockResponse()
-    class MockGetResponse:
-        def __init__(self, status_code=200):
-            self.status_code = status_code
-            self.text = ''
-        def json(self):
-            return {'default_branch': 'main'}
-        def raise_for_status(self):
-            pass
-    monkeypatch.setattr('httpx.post', mock_post)
-    monkeypatch.setattr('httpx.get', lambda *args, **kwargs: MockGetResponse())
-    monkeypatch.setattr(
-        'openhands.resolver.interfaces.github.GithubIssueHandler.branch_exists',
-        lambda *args, **kwargs: False,
-    )
-    # Mock subprocess.run to avoid actual git commands
-    original_run = subprocess.run
-    def mock_run(*args, **kwargs):
-        logger.info(f'Running command: {args[0] if args else kwargs.get("args", [])}')
-        if isinstance(args[0], list) and args[0][0] == 'git':
-            if 'push' in args[0]:
-                return subprocess.CompletedProcess(
-                    args[0], returncode=0, stdout='', stderr=''
-                )
-            return original_run(*args, **kwargs)
-        return original_run(*args, **kwargs)
-    monkeypatch.setattr('subprocess.run', mock_run)
-    # Create a temporary directory and initialize git repo
-    with tempfile.TemporaryDirectory() as temp_dir:
-        logger.info('Initializing git repo...')
-        subprocess.run(['git', 'init', temp_dir], check=True)
-        # Add these lines to configure git
-        subprocess.run(
-            ['git', '-C', temp_dir, 'config', 'user.name', 'Test User'], check=True
-        )
-        subprocess.run(
-            ['git', '-C', temp_dir, 'config', 'user.email', '[email protected]'],
-            check=True,
-        )
-        # Create a test file and add it to git
-        test_file = os.path.join(temp_dir, 'test.txt')
-        with open(test_file, 'w') as f:
-            f.write('test content')
-        logger.info('Adding and committing test file...')
-        subprocess.run(['git', '-C', temp_dir, 'add', 'test.txt'], check=True)
-        subprocess.run(
-            ['git', '-C', temp_dir, 'commit', '-m', 'Initial commit'], check=True
-        )
-        # Create a test issue with problematic title
-        logger.info('Creating test issue...')
-        issue = Issue(
-            owner='test-owner',
-            repo='test-repo',
-            number=123,
-            title="Issue with 'quotes' and \"double quotes\" and <class 'ValueError'>",
-            body='Test body',
-            labels=[],
-            assignees=[],
-            state='open',
-            created_at='2024-01-01T00:00:00Z',
-            updated_at='2024-01-01T00:00:00Z',
-            closed_at=None,
-            head_branch=None,
-            thread_ids=None,
-        )
-        # Try to send a PR - this will fail if the title is incorrectly escaped
-        logger.info('Sending PR...')
-        send_pull_request(
-            issue=issue,
-            token='dummy-token',
-            username='test-user',
-            platform=ProviderType.GITHUB,
-            patch_dir=temp_dir,
-            pr_type='ready',
-        )

tests/unit/resolver/gitlab/test_gitlab_resolve_issues.py DELETED Viewed

@@ -1,1000 +0,0 @@
-import os
-import tempfile
-from unittest.mock import AsyncMock, MagicMock, patch
-import pytest
-from openhands.core.config import LLMConfig
-from openhands.events.action import CmdRunAction
-from openhands.events.observation import (
-    CmdOutputMetadata,
-    CmdOutputObservation,
-    NullObservation,
-)
-from openhands.integrations.service_types import ProviderType
-from openhands.llm.llm import LLM
-from openhands.resolver.interfaces.gitlab import GitlabIssueHandler, GitlabPRHandler
-from openhands.resolver.interfaces.issue import Issue, ReviewThread
-from openhands.resolver.interfaces.issue_definitions import (
-    ServiceContextIssue,
-    ServiceContextPR,
-)
-from openhands.resolver.issue_resolver import (
-    IssueResolver,
-)
-from openhands.resolver.resolver_output import ResolverOutput
-@pytest.fixture
-def default_mock_args():
-    """Fixture that provides a default mock args object with common values.
-    Tests can override specific attributes as needed.
-    """
-    mock_args = MagicMock()
-    mock_args.selected_repo = 'test-owner/test-repo'
-    mock_args.token = 'test-token'
-    mock_args.username = 'test-user'
-    mock_args.max_iterations = 5
-    mock_args.output_dir = '/tmp'
-    mock_args.llm_model = 'test'
-    mock_args.llm_api_key = 'test'
-    mock_args.llm_base_url = None
-    mock_args.base_domain = None
-    mock_args.runtime_container_image = None
-    mock_args.is_experimental = False
-    mock_args.issue_number = None
-    mock_args.comment_id = None
-    mock_args.repo_instruction_file = None
-    mock_args.issue_type = 'issue'
-    mock_args.prompt_file = None
-    return mock_args
-@pytest.fixture
-def mock_gitlab_token():
-    """Fixture that patches the identify_token function to return GitLab provider type.
-    This eliminates the need for repeated patching in each test function.
-    """
-    with patch(
-        'openhands.resolver.issue_resolver.identify_token',
-        return_value=ProviderType.GITLAB,
-    ) as patched:
-        yield patched
-@pytest.fixture
-def mock_output_dir():
-    with tempfile.TemporaryDirectory() as temp_dir:
-        repo_path = os.path.join(temp_dir, 'repo')
-        # Initialize a Gitlab repo in "repo" and add a commit with "README.md"
-        os.makedirs(repo_path)
-        os.system(f'git init {repo_path}')
-        readme_path = os.path.join(repo_path, 'README.md')
-        with open(readme_path, 'w') as f:
-            f.write('hello world')
-        os.system(f'git -C {repo_path} add README.md')
-        os.system(f"git -C {repo_path} commit -m 'Initial commit'")
-        yield temp_dir
-@pytest.fixture
-def mock_subprocess():
-    with patch('subprocess.check_output') as mock_check_output:
-        yield mock_check_output
-@pytest.fixture
-def mock_os():
-    with patch('os.system') as mock_system, patch('os.path.join') as mock_join:
-        yield mock_system, mock_join
-@pytest.fixture
-def mock_user_instructions_template():
-    return 'Issue: {{ body }}\n\nPlease fix this issue.'
-@pytest.fixture
-def mock_conversation_instructions_template():
-    return 'Instructions: {{ repo_instruction }}'
-@pytest.fixture
-def mock_followup_prompt_template():
-    return 'Issue context: {{ issues }}\n\nReview comments: {{ review_comments }}\n\nReview threads: {{ review_threads }}\n\nFiles: {{ files }}\n\nThread comments: {{ thread_context }}\n\nPlease fix this issue.'
-def create_cmd_output(exit_code: int, content: str, command: str):
-    return CmdOutputObservation(
-        content=content,
-        command=command,
-        metadata=CmdOutputMetadata(exit_code=exit_code),
-    )
-def test_initialize_runtime(default_mock_args, mock_gitlab_token):
-    mock_runtime = MagicMock()
-    if os.getenv('GITLAB_CI') == 'true':
-        mock_runtime.run_action.side_effect = [
-            create_cmd_output(exit_code=0, content='', command='cd /workspace'),
-            create_cmd_output(
-                exit_code=0, content='', command='sudo chown -R 1001:0 /workspace/*'
-            ),
-            create_cmd_output(
-                exit_code=0, content='', command='git config --global core.pager ""'
-            ),
-        ]
-    else:
-        mock_runtime.run_action.side_effect = [
-            create_cmd_output(exit_code=0, content='', command='cd /workspace'),
-            create_cmd_output(
-                exit_code=0, content='', command='git config --global core.pager ""'
-            ),
-        ]
-    # Create resolver with mocked token identification
-    resolver = IssueResolver(default_mock_args)
-    resolver.initialize_runtime(mock_runtime)
-    if os.getenv('GITLAB_CI') == 'true':
-        assert mock_runtime.run_action.call_count == 3
-    else:
-        assert mock_runtime.run_action.call_count == 2
-    mock_runtime.run_action.assert_any_call(CmdRunAction(command='cd /workspace'))
-    if os.getenv('GITLAB_CI') == 'true':
-        mock_runtime.run_action.assert_any_call(
-            CmdRunAction(command='sudo chown -R 1001:0 /workspace/*')
-        )
-    mock_runtime.run_action.assert_any_call(
-        CmdRunAction(command='git config --global core.pager ""')
-    )
-@pytest.mark.asyncio
-async def test_resolve_issue_no_issues_found(default_mock_args, mock_gitlab_token):
-    """Test the resolve_issue method when no issues are found."""
-    # Mock dependencies
-    mock_handler = MagicMock()
-    mock_handler.get_converted_issues.return_value = []  # Return empty list
-    # Customize the mock args for this test
-    default_mock_args.issue_number = 5432
-    # Create a resolver instance with mocked token identification
-    resolver = IssueResolver(default_mock_args)
-    # Mock the issue handler
-    resolver.issue_handler = mock_handler
-    # Test that the correct exception is raised
-    with pytest.raises(ValueError) as exc_info:
-        await resolver.resolve_issue()
-    # Verify the error message
-    assert 'No issues found for issue number 5432' in str(exc_info.value)
-    assert 'test-owner/test-repo' in str(exc_info.value)
-    mock_handler.get_converted_issues.assert_called_once_with(
-        issue_numbers=[5432], comment_id=None
-    )
-def test_download_issues_from_gitlab():
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextIssue(
-        GitlabIssueHandler('owner', 'repo', 'token'), llm_config
-    )
-    mock_issues_response = MagicMock()
-    mock_issues_response.json.side_effect = [
-        [
-            {'iid': 1, 'title': 'Issue 1', 'description': 'This is an issue'},
-            {
-                'iid': 2,
-                'title': 'PR 1',
-                'description': 'This is a pull request',
-                'pull_request': {},
-            },
-            {'iid': 3, 'title': 'Issue 2', 'description': 'This is another issue'},
-        ],
-        None,
-    ]
-    mock_issues_response.raise_for_status = MagicMock()
-    mock_comments_response = MagicMock()
-    mock_comments_response.json.return_value = []
-    mock_comments_response.raise_for_status = MagicMock()
-    def get_mock_response(url, *args, **kwargs):
-        if '/notes' in url:
-            return mock_comments_response
-        return mock_issues_response
-    with patch('httpx.get', side_effect=get_mock_response):
-        issues = handler.get_converted_issues(issue_numbers=[1, 3])
-    assert len(issues) == 2
-    assert handler.issue_type == 'issue'
-    assert all(isinstance(issue, Issue) for issue in issues)
-    assert [issue.number for issue in issues] == [1, 3]
-    assert [issue.title for issue in issues] == ['Issue 1', 'Issue 2']
-    assert [issue.review_comments for issue in issues] == [None, None]
-    assert [issue.closing_issues for issue in issues] == [None, None]
-    assert [issue.thread_ids for issue in issues] == [None, None]
-def test_download_pr_from_gitlab():
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextPR(GitlabPRHandler('owner', 'repo', 'token'), llm_config)
-    mock_pr_response = MagicMock()
-    mock_pr_response.json.side_effect = [
-        [
-            {
-                'iid': 1,
-                'title': 'PR 1',
-                'description': 'This is a pull request',
-                'source_branch': 'b1',
-            },
-            {
-                'iid': 2,
-                'title': 'My PR',
-                'description': 'This is another pull request',
-                'source_branch': 'b2',
-            },
-            {
-                'iid': 3,
-                'title': 'PR 3',
-                'description': 'Final PR',
-                'source_branch': 'b3',
-            },
-        ],
-        None,
-    ]
-    mock_pr_response.raise_for_status = MagicMock()
-    # Mock for related issues response
-    mock_related_issuse_response = MagicMock()
-    mock_related_issuse_response.json.return_value = [
-        {'description': 'Issue 1 body', 'iid': 1},
-        {'description': 'Issue 2 body', 'iid': 2},
-    ]
-    mock_related_issuse_response.raise_for_status = MagicMock()
-    # Mock for PR comments response
-    mock_comments_response = MagicMock()
-    mock_comments_response.json.return_value = []  # No PR comments
-    mock_comments_response.raise_for_status = MagicMock()
-    # Mock for GraphQL request (for download_pr_metadata)
-    mock_graphql_response = MagicMock()
-    mock_graphql_response.json.side_effect = lambda: {
-        'data': {
-            'project': {
-                'mergeRequest': {
-                    'discussions': {
-                        'edges': [
-                            {
-                                'node': {
-                                    'id': '1',
-                                    'resolved': False,
-                                    'resolvable': True,
-                                    'notes': {
-                                        'nodes': [
-                                            {
-                                                'body': 'Unresolved comment 1',
-                                                'position': {
-                                                    'filePath': '/frontend/header.tsx',
-                                                },
-                                            },
-                                            {
-                                                'body': 'Follow up thread',
-                                            },
-                                        ]
-                                    },
-                                }
-                            },
-                            {
-                                'node': {
-                                    'id': '2',
-                                    'resolved': True,
-                                    'resolvable': True,
-                                    'notes': {
-                                        'nodes': [
-                                            {
-                                                'body': 'Resolved comment 1',
-                                                'position': {
-                                                    'filePath': '/some/file.py',
-                                                },
-                                            },
-                                        ]
-                                    },
-                                }
-                            },
-                            {
-                                'node': {
-                                    'id': '3',
-                                    'resolved': False,
-                                    'resolvable': True,
-                                    'notes': {
-                                        'nodes': [
-                                            {
-                                                'body': 'Unresolved comment 3',
-                                                'position': {
-                                                    'filePath': '/another/file.py',
-                                                },
-                                            },
-                                        ]
-                                    },
-                                }
-                            },
-                        ]
-                    },
-                }
-            }
-        }
-    }
-    mock_graphql_response.raise_for_status = MagicMock()
-    def get_mock_response(url, *args, **kwargs):
-        if '/notes' in url:
-            return mock_comments_response
-        if '/related_issues' in url:
-            return mock_related_issuse_response
-        return mock_pr_response
-    with patch('httpx.get', side_effect=get_mock_response):
-        with patch('httpx.post', return_value=mock_graphql_response):
-            issues = handler.get_converted_issues(issue_numbers=[1, 2, 3])
-    assert len(issues) == 3
-    assert handler.issue_type == 'pr'
-    assert all(isinstance(issue, Issue) for issue in issues)
-    assert [issue.number for issue in issues] == [1, 2, 3]
-    assert [issue.title for issue in issues] == ['PR 1', 'My PR', 'PR 3']
-    assert [issue.head_branch for issue in issues] == ['b1', 'b2', 'b3']
-    assert len(issues[0].review_threads) == 2  # Only unresolved threads
-    assert (
-        issues[0].review_threads[0].comment
-        == 'Unresolved comment 1\n---\nlatest feedback:\nFollow up thread\n'
-    )
-    assert issues[0].review_threads[0].files == ['/frontend/header.tsx']
-    assert (
-        issues[0].review_threads[1].comment
-        == 'latest feedback:\nUnresolved comment 3\n'
-    )
-    assert issues[0].review_threads[1].files == ['/another/file.py']
-    assert issues[0].closing_issues == ['Issue 1 body', 'Issue 2 body']
-    assert issues[0].thread_ids == ['1', '3']
-@pytest.mark.asyncio
-async def test_complete_runtime(default_mock_args, mock_gitlab_token):
-    mock_runtime = MagicMock()
-    mock_runtime.run_action.side_effect = [
-        create_cmd_output(exit_code=0, content='', command='cd /workspace'),
-        create_cmd_output(
-            exit_code=0, content='', command='git config --global core.pager ""'
-        ),
-        create_cmd_output(
-            exit_code=0,
-            content='',
-            command='git config --global --add safe.directory /workspace',
-        ),
-        create_cmd_output(exit_code=0, content='', command='git add -A'),
-        create_cmd_output(
-            exit_code=0,
-            content='git diff content',
-            command='git diff --no-color --cached base_commit_hash',
-        ),
-    ]
-    # Create a resolver instance with mocked token identification
-    resolver = IssueResolver(default_mock_args)
-    result = await resolver.complete_runtime(mock_runtime, 'base_commit_hash')
-    assert result == {'git_patch': 'git diff content'}
-    assert mock_runtime.run_action.call_count == 5
-@pytest.mark.asyncio
-@pytest.mark.parametrize(
-    'test_case',
-    [
-        {
-            'name': 'successful_run',
-            'run_controller_return': MagicMock(
-                history=[NullObservation(content='')],
-                metrics=MagicMock(
-                    get=MagicMock(return_value={'test_result': 'passed'})
-                ),
-                last_error=None,
-            ),
-            'run_controller_raises': None,
-            'expected_success': True,
-            'expected_error': None,
-            'expected_explanation': 'Issue resolved successfully',
-            'is_pr': False,
-            'comment_success': None,
-        },
-        {
-            'name': 'value_error',
-            'run_controller_raises': ValueError('Test value error'),
-            'expected_success': False,
-            'expected_error': 'Agent failed to run or crashed',
-            'expected_explanation': 'Agent failed to run',
-            'is_pr': False,
-            'comment_success': None,
-        },
-        {
-            'name': 'runtime_error',
-            'run_controller_raises': RuntimeError('Test runtime error'),
-            'expected_success': False,
-            'expected_error': 'Agent failed to run or crashed',
-            'expected_explanation': 'Agent failed to run',
-            'is_pr': False,
-            'comment_success': None,
-        },
-        {
-            'name': 'json_decode_error',
-            'run_controller_return': MagicMock(
-                history=[NullObservation(content='')],
-                metrics=MagicMock(
-                    get=MagicMock(return_value={'test_result': 'passed'})
-                ),
-                last_error=None,
-            ),
-            'run_controller_raises': None,
-            'expected_success': True,
-            'expected_error': None,
-            'expected_explanation': 'Non-JSON explanation',
-            'is_pr': True,
-            'comment_success': [True, False],
-        },
-    ],
-)
-async def test_process_issue(
-    default_mock_args,
-    mock_gitlab_token,
-    mock_output_dir,
-    mock_user_instructions_template,
-    test_case,
-):
-    """Test the process_issue method with different scenarios."""
-    # Set up test data
-    issue = Issue(
-        owner='test_owner',
-        repo='test_repo',
-        number=1,
-        title='Test Issue',
-        body='This is a test issue',
-    )
-    base_commit = 'abcdef1234567890'
-    # Customize the mock args for this test
-    default_mock_args.output_dir = mock_output_dir
-    default_mock_args.issue_type = 'pr' if test_case.get('is_pr', False) else 'issue'
-    # Create a resolver instance with mocked token identification
-    resolver = IssueResolver(default_mock_args)
-    resolver.user_instructions_prompt_template = mock_user_instructions_template
-    # Mock the handler with LLM config
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler_instance = MagicMock()
-    handler_instance.guess_success.return_value = (
-        test_case['expected_success'],
-        test_case.get('comment_success', None),
-        test_case['expected_explanation'],
-    )
-    handler_instance.get_instruction.return_value = (
-        'Test instruction',
-        'Test conversation instructions',
-        [],
-    )
-    handler_instance.issue_type = 'pr' if test_case.get('is_pr', False) else 'issue'
-    handler_instance.llm = LLM(llm_config)
-    # Create mock runtime and mock run_controller
-    mock_runtime = MagicMock()
-    mock_runtime.connect = AsyncMock()
-    mock_create_runtime = MagicMock(return_value=mock_runtime)
-    # Configure run_controller mock based on test case
-    mock_run_controller = AsyncMock()
-    if test_case.get('run_controller_raises'):
-        mock_run_controller.side_effect = test_case['run_controller_raises']
-    else:
-        mock_run_controller.return_value = test_case['run_controller_return']
-    # Patch the necessary functions and methods
-    with (
-        patch('openhands.resolver.issue_resolver.create_runtime', mock_create_runtime),
-        patch('openhands.resolver.issue_resolver.run_controller', mock_run_controller),
-        patch.object(
-            resolver, 'complete_runtime', return_value={'git_patch': 'test patch'}
-        ),
-        patch.object(resolver, 'initialize_runtime') as mock_initialize_runtime,
-        patch(
-            'openhands.resolver.issue_resolver.SandboxConfig', return_value=MagicMock()
-        ),
-        patch(
-            'openhands.resolver.issue_resolver.OpenHandsConfig',
-            return_value=MagicMock(),
-        ),
-    ):
-        # Call the process_issue method
-        result = await resolver.process_issue(issue, base_commit, handler_instance)
-        mock_create_runtime.assert_called_once()
-        mock_runtime.connect.assert_called_once()
-        mock_initialize_runtime.assert_called_once()
-        mock_run_controller.assert_called_once()
-        resolver.complete_runtime.assert_awaited_once_with(mock_runtime, base_commit)
-        # Assert the result matches our expectations
-        assert isinstance(result, ResolverOutput)
-        assert result.issue == issue
-        assert result.base_commit == base_commit
-        assert result.git_patch == 'test patch'
-        assert result.success == test_case['expected_success']
-        assert result.result_explanation == test_case['expected_explanation']
-        assert result.error == test_case['expected_error']
-        if test_case['expected_success']:
-            handler_instance.guess_success.assert_called_once()
-        else:
-            handler_instance.guess_success.assert_not_called()
-def test_get_instruction(
-    mock_user_instructions_template,
-    mock_conversation_instructions_template,
-    mock_followup_prompt_template,
-):
-    issue = Issue(
-        owner='test_owner',
-        repo='test_repo',
-        number=123,
-        title='Test Issue',
-        body='This is a test issue refer to image ![First Image](https://sampleimage.com/image1.png)',
-    )
-    mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
-    issue_handler = ServiceContextIssue(
-        GitlabIssueHandler('owner', 'repo', 'token'), mock_llm_config
-    )
-    instruction, conversation_instructions, images_urls = issue_handler.get_instruction(
-        issue,
-        mock_user_instructions_template,
-        mock_conversation_instructions_template,
-        None,
-    )
-    expected_instruction = 'Issue: Test Issue\n\nThis is a test issue refer to image ![First Image](https://sampleimage.com/image1.png)\n\nPlease fix this issue.'
-    assert images_urls == ['https://sampleimage.com/image1.png']
-    assert issue_handler.issue_type == 'issue'
-    assert instruction == expected_instruction
-    assert conversation_instructions is not None
-    issue = Issue(
-        owner='test_owner',
-        repo='test_repo',
-        number=123,
-        title='Test Issue',
-        body='This is a test issue',
-        closing_issues=['Issue 1 fix the type'],
-        review_threads=[
-            ReviewThread(
-                comment="There is still a typo 'pthon' instead of 'python'", files=[]
-            )
-        ],
-        thread_comments=[
-            "I've left review comments, please address them",
-            'This is a valid concern.',
-        ],
-    )
-    pr_handler = ServiceContextPR(
-        GitlabPRHandler('owner', 'repo', 'token'), mock_llm_config
-    )
-    instruction, conversation_instructions, images_urls = pr_handler.get_instruction(
-        issue,
-        mock_followup_prompt_template,
-        mock_conversation_instructions_template,
-        None,
-    )
-    expected_instruction = "Issue context: [\n    \"Issue 1 fix the type\"\n]\n\nReview comments: None\n\nReview threads: [\n    \"There is still a typo 'pthon' instead of 'python'\"\n]\n\nFiles: []\n\nThread comments: I've left review comments, please address them\n---\nThis is a valid concern.\n\nPlease fix this issue."
-    assert images_urls == []
-    assert pr_handler.issue_type == 'pr'
-    # Compare content ignoring exact formatting
-    assert "There is still a typo 'pthon' instead of 'python'" in instruction
-    assert "I've left review comments, please address them" in instruction
-    assert 'This is a valid concern' in instruction
-    assert conversation_instructions is not None
-def test_file_instruction():
-    issue = Issue(
-        owner='test_owner',
-        repo='test_repo',
-        number=123,
-        title='Test Issue',
-        body='This is a test issue ![image](https://sampleimage.com/sample.png)',
-    )
-    # load prompt from openhands/resolver/prompts/resolve/basic.jinja
-    with open('openhands/resolver/prompts/resolve/basic.jinja', 'r') as f:
-        prompt = f.read()
-    with open(
-        'openhands/resolver/prompts/resolve/basic-conversation-instructions.jinja', 'r'
-    ) as f:
-        conversation_instructions_template = f.read()
-    # Test without thread comments
-    mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
-    issue_handler = ServiceContextIssue(
-        GitlabIssueHandler('owner', 'repo', 'token'), mock_llm_config
-    )
-    instruction, conversation_instructions, images_urls = issue_handler.get_instruction(
-        issue, prompt, conversation_instructions_template, None
-    )
-    expected_instruction = """Please fix the following issue for the repository in /workspace.
-An environment has been set up for you to start working. You may assume all necessary tools are installed.
-# Problem Statement
-Test Issue
-This is a test issue ![image](https://sampleimage.com/sample.png)"""
-    expected_conversation_instructions = """IMPORTANT: You should ONLY interact with the environment provided to you AND NEVER ASK FOR HUMAN HELP.
-You SHOULD INCLUDE PROPER INDENTATION in your edit commands.
-When you think you have fixed the issue through code changes, please finish the interaction."""
-    assert instruction == expected_instruction
-    assert conversation_instructions == expected_conversation_instructions
-    assert images_urls == ['https://sampleimage.com/sample.png']
-def test_file_instruction_with_repo_instruction():
-    issue = Issue(
-        owner='test_owner',
-        repo='test_repo',
-        number=123,
-        title='Test Issue',
-        body='This is a test issue',
-    )
-    # load prompt from openhands/resolver/prompts/resolve/basic.jinja
-    with open('openhands/resolver/prompts/resolve/basic.jinja', 'r') as f:
-        prompt = f.read()
-    with open(
-        'openhands/resolver/prompts/resolve/basic-conversation-instructions.jinja', 'r'
-    ) as f:
-        conversation_instructions_prompt = f.read()
-    # load repo instruction from openhands/resolver/prompts/repo_instructions/all-hands-ai___openhands-resolver.txt
-    with open(
-        'openhands/resolver/prompts/repo_instructions/all-hands-ai___openhands-resolver.txt',
-        'r',
-    ) as f:
-        repo_instruction = f.read()
-    mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
-    issue_handler = ServiceContextIssue(
-        GitlabIssueHandler('owner', 'repo', 'token'), mock_llm_config
-    )
-    instruction, conversation_instructions, image_urls = issue_handler.get_instruction(
-        issue, prompt, conversation_instructions_prompt, repo_instruction
-    )
-    expected_instruction = """Please fix the following issue for the repository in /workspace.
-An environment has been set up for you to start working. You may assume all necessary tools are installed.
-# Problem Statement
-Test Issue
-This is a test issue"""
-    expected_conversation_instructions = """IMPORTANT: You should ONLY interact with the environment provided to you AND NEVER ASK FOR HUMAN HELP.
-You SHOULD INCLUDE PROPER INDENTATION in your edit commands.
-Some basic information about this repository:
-This is a Python repo for openhands-resolver, a library that attempts to resolve github issues with the AI agent OpenHands.
-- Setup: `poetry install --with test --with dev`
-- Testing: `poetry run pytest tests/test_*.py`
-When you think you have fixed the issue through code changes, please finish the interaction."""
-    assert instruction == expected_instruction
-    assert conversation_instructions == expected_conversation_instructions
-    assert conversation_instructions is not None
-    assert issue_handler.issue_type == 'issue'
-    assert image_urls == []
-def test_guess_success():
-    mock_issue = Issue(
-        owner='test_owner',
-        repo='test_repo',
-        number=1,
-        title='Test Issue',
-        body='This is a test issue',
-    )
-    mock_history = [create_cmd_output(exit_code=0, content='', command='cd /workspace')]
-    mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
-    mock_completion_response = MagicMock()
-    mock_completion_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content='--- success\ntrue\n--- explanation\nIssue resolved successfully'
-            )
-        )
-    ]
-    issue_handler = ServiceContextIssue(
-        GitlabIssueHandler('owner', 'repo', 'token'), mock_llm_config
-    )
-    with patch.object(
-        LLM, 'completion', MagicMock(return_value=mock_completion_response)
-    ):
-        success, comment_success, explanation = issue_handler.guess_success(
-            mock_issue, mock_history
-        )
-        assert issue_handler.issue_type == 'issue'
-        assert comment_success is None
-        assert success
-        assert explanation == 'Issue resolved successfully'
-def test_guess_success_with_thread_comments():
-    mock_issue = Issue(
-        owner='test_owner',
-        repo='test_repo',
-        number=1,
-        title='Test Issue',
-        body='This is a test issue',
-        thread_comments=[
-            'First comment',
-            'Second comment',
-            'latest feedback:\nPlease add tests',
-        ],
-    )
-    mock_history = [MagicMock(message='I have added tests for this case')]
-    mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
-    mock_completion_response = MagicMock()
-    mock_completion_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content='--- success\ntrue\n--- explanation\nTests have been added to verify thread comments handling'
-            )
-        )
-    ]
-    issue_handler = ServiceContextIssue(
-        GitlabIssueHandler('owner', 'repo', 'token'), mock_llm_config
-    )
-    with patch.object(
-        LLM, 'completion', MagicMock(return_value=mock_completion_response)
-    ):
-        success, comment_success, explanation = issue_handler.guess_success(
-            mock_issue, mock_history
-        )
-        assert issue_handler.issue_type == 'issue'
-        assert comment_success is None
-        assert success
-        assert 'Tests have been added' in explanation
-def test_instruction_with_thread_comments():
-    # Create an issue with thread comments
-    issue = Issue(
-        owner='test_owner',
-        repo='test_repo',
-        number=123,
-        title='Test Issue',
-        body='This is a test issue',
-        thread_comments=[
-            'First comment',
-            'Second comment',
-            'latest feedback:\nPlease add tests',
-        ],
-    )
-    # Load the basic prompt template
-    with open('openhands/resolver/prompts/resolve/basic.jinja', 'r') as f:
-        prompt = f.read()
-    with open(
-        'openhands/resolver/prompts/resolve/basic-conversation-instructions.jinja', 'r'
-    ) as f:
-        conversation_instructions_template = f.read()
-    llm_config = LLMConfig(model='test', api_key='test')
-    issue_handler = ServiceContextIssue(
-        GitlabIssueHandler('owner', 'repo', 'token'), llm_config
-    )
-    instruction, conversation_instructions, images_urls = issue_handler.get_instruction(
-        issue, prompt, conversation_instructions_template, None
-    )
-    # Verify that thread comments are included in the instruction
-    assert 'First comment' in instruction
-    assert 'Second comment' in instruction
-    assert 'Please add tests' in instruction
-    assert 'Issue Thread Comments:' in instruction
-    assert images_urls == []
-def test_guess_success_failure():
-    mock_issue = Issue(
-        owner='test_owner',
-        repo='test_repo',
-        number=1,
-        title='Test Issue',
-        body='This is a test issue',
-        thread_comments=[
-            'First comment',
-            'Second comment',
-            'latest feedback:\nPlease add tests',
-        ],
-    )
-    mock_history = [MagicMock(message='I have added tests for this case')]
-    mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
-    mock_completion_response = MagicMock()
-    mock_completion_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content='--- success\ntrue\n--- explanation\nTests have been added to verify thread comments handling'
-            )
-        )
-    ]
-    issue_handler = ServiceContextIssue(
-        GitlabIssueHandler('owner', 'repo', 'token'), mock_llm_config
-    )
-    with patch.object(
-        LLM, 'completion', MagicMock(return_value=mock_completion_response)
-    ):
-        success, comment_success, explanation = issue_handler.guess_success(
-            mock_issue, mock_history
-        )
-        assert issue_handler.issue_type == 'issue'
-        assert comment_success is None
-        assert success
-        assert 'Tests have been added' in explanation
-def test_guess_success_negative_case():
-    mock_issue = Issue(
-        owner='test_owner',
-        repo='test_repo',
-        number=1,
-        title='Test Issue',
-        body='This is a test issue',
-    )
-    mock_history = [create_cmd_output(exit_code=0, content='', command='cd /workspace')]
-    mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
-    mock_completion_response = MagicMock()
-    mock_completion_response.choices = [
-        MagicMock(
-            message=MagicMock(
-                content='--- success\nfalse\n--- explanation\nIssue not resolved'
-            )
-        )
-    ]
-    issue_handler = ServiceContextIssue(
-        GitlabIssueHandler('owner', 'repo', 'token'), mock_llm_config
-    )
-    with patch.object(
-        LLM, 'completion', MagicMock(return_value=mock_completion_response)
-    ):
-        success, comment_success, explanation = issue_handler.guess_success(
-            mock_issue, mock_history
-        )
-        assert issue_handler.issue_type == 'issue'
-        assert comment_success is None
-        assert not success
-        assert explanation == 'Issue not resolved'
-def test_guess_success_invalid_output():
-    mock_issue = Issue(
-        owner='test_owner',
-        repo='test_repo',
-        number=1,
-        title='Test Issue',
-        body='This is a test issue',
-    )
-    mock_history = [create_cmd_output(exit_code=0, content='', command='cd /workspace')]
-    mock_llm_config = LLMConfig(model='test_model', api_key='test_api_key')
-    mock_completion_response = MagicMock()
-    mock_completion_response.choices = [
-        MagicMock(message=MagicMock(content='This is not a valid output'))
-    ]
-    issue_handler = ServiceContextIssue(
-        GitlabIssueHandler('owner', 'repo', 'token'), mock_llm_config
-    )
-    with patch.object(
-        LLM, 'completion', MagicMock(return_value=mock_completion_response)
-    ):
-        success, comment_success, explanation = issue_handler.guess_success(
-            mock_issue, mock_history
-        )
-        assert issue_handler.issue_type == 'issue'
-        assert comment_success is None
-        assert not success
-        assert (
-            explanation
-            == 'Failed to decode answer from LLM response: This is not a valid output'
-        )
-def test_download_issue_with_specific_comment():
-    llm_config = LLMConfig(model='test', api_key='test')
-    handler = ServiceContextIssue(
-        GitlabIssueHandler('owner', 'repo', 'token'), llm_config
-    )
-    # Define the specific comment_id to filter
-    specific_comment_id = 101
-    # Mock issue and comment responses
-    mock_issue_response = MagicMock()
-    mock_issue_response.json.side_effect = [
-        [
-            {'iid': 1, 'title': 'Issue 1', 'description': 'This is an issue'},
-        ],
-        None,
-    ]
-    mock_issue_response.raise_for_status = MagicMock()
-    mock_comments_response = MagicMock()
-    mock_comments_response.json.return_value = [
-        {
-            'id': specific_comment_id,
-            'body': 'Specific comment body',
-        },
-        {
-            'id': 102,
-            'body': 'Another comment body',
-        },
-    ]
-    mock_comments_response.raise_for_status = MagicMock()
-    def get_mock_response(url, *args, **kwargs):
-        if '/notes' in url:
-            return mock_comments_response
-        return mock_issue_response
-    with patch('httpx.get', side_effect=get_mock_response):
-        issues = handler.get_converted_issues(
-            issue_numbers=[1], comment_id=specific_comment_id
-        )
-    assert len(issues) == 1
-    assert issues[0].number == 1
-    assert issues[0].title == 'Issue 1'
-    assert issues[0].thread_comments == ['Specific comment body']
-if __name__ == '__main__':
-    pytest.main()

tests/unit/resolver/gitlab/test_gitlab_send_pull_request.py DELETED Viewed

@@ -1,1206 +0,0 @@
-import os
-import tempfile
-from unittest.mock import ANY, MagicMock, call, patch
-from urllib.parse import quote
-import pytest
-from openhands.core.config import LLMConfig
-from openhands.integrations.service_types import ProviderType
-from openhands.resolver.interfaces.gitlab import GitlabIssueHandler
-from openhands.resolver.interfaces.issue import ReviewThread
-from openhands.resolver.resolver_output import Issue, ResolverOutput
-from openhands.resolver.send_pull_request import (
-    apply_patch,
-    initialize_repo,
-    load_single_resolver_output,
-    main,
-    make_commit,
-    process_single_issue,
-    send_pull_request,
-    update_existing_pull_request,
-)
-@pytest.fixture
-def mock_output_dir():
-    with tempfile.TemporaryDirectory() as temp_dir:
-        repo_path = os.path.join(temp_dir, 'repo')
-        # Initialize a Gitlab repo in "repo" and add a commit with "README.md"
-        os.makedirs(repo_path)
-        os.system(f'git init {repo_path}')
-        readme_path = os.path.join(repo_path, 'README.md')
-        with open(readme_path, 'w') as f:
-            f.write('hello world')
-        os.system(f'git -C {repo_path} add README.md')
-        os.system(f"git -C {repo_path} commit -m 'Initial commit'")
-        yield temp_dir
-@pytest.fixture
-def mock_issue():
-    return Issue(
-        number=42,
-        title='Test Issue',
-        owner='test-owner',
-        repo='test-repo',
-        body='Test body',
-    )
-@pytest.fixture
-def mock_llm_config():
-    return LLMConfig()
-def test_load_single_resolver_output():
-    mock_output_jsonl = 'tests/unit/resolver/mock_output/output.jsonl'
-    # Test loading an existing issue
-    resolver_output = load_single_resolver_output(mock_output_jsonl, 5)
-    assert isinstance(resolver_output, ResolverOutput)
-    assert resolver_output.issue.number == 5
-    assert resolver_output.issue.title == 'Add MIT license'
-    assert resolver_output.issue.owner == 'neubig'
-    assert resolver_output.issue.repo == 'pr-viewer'
-    # Test loading a non-existent issue
-    with pytest.raises(ValueError):
-        load_single_resolver_output(mock_output_jsonl, 999)
-def test_apply_patch(mock_output_dir):
-    # Create a sample file in the mock repo
-    sample_file = os.path.join(mock_output_dir, 'sample.txt')
-    with open(sample_file, 'w') as f:
-        f.write('Original content')
-    # Create a sample patch
-    patch_content = """
-diff --git a/sample.txt b/sample.txt
-index 9daeafb..b02def2 100644
---- a/sample.txt
-+++ b/sample.txt
-@@ -1 +1,2 @@
--Original content
-+Updated content
-+New line
-"""
-    # Apply the patch
-    apply_patch(mock_output_dir, patch_content)
-    # Check if the file was updated correctly
-    with open(sample_file, 'r') as f:
-        updated_content = f.read()
-    assert updated_content.strip() == 'Updated content\nNew line'.strip()
-def test_apply_patch_preserves_line_endings(mock_output_dir):
-    # Create sample files with different line endings
-    unix_file = os.path.join(mock_output_dir, 'unix_style.txt')
-    dos_file = os.path.join(mock_output_dir, 'dos_style.txt')
-    with open(unix_file, 'w', newline='\n') as f:
-        f.write('Line 1\nLine 2\nLine 3')
-    with open(dos_file, 'w', newline='\r\n') as f:
-        f.write('Line 1\r\nLine 2\r\nLine 3')
-    # Create patches for both files
-    unix_patch = """
-diff --git a/unix_style.txt b/unix_style.txt
-index 9daeafb..b02def2 100644
---- a/unix_style.txt
-+++ b/unix_style.txt
-@@ -1,3 +1,3 @@
- Line 1
--Line 2
-+Updated Line 2
- Line 3
-"""
-    dos_patch = """
-diff --git a/dos_style.txt b/dos_style.txt
-index 9daeafb..b02def2 100644
---- a/dos_style.txt
-+++ b/dos_style.txt
-@@ -1,3 +1,3 @@
- Line 1
--Line 2
-+Updated Line 2
- Line 3
-"""
-    # Apply patches
-    apply_patch(mock_output_dir, unix_patch)
-    apply_patch(mock_output_dir, dos_patch)
-    # Check if line endings are preserved
-    with open(unix_file, 'rb') as f:
-        unix_content = f.read()
-    with open(dos_file, 'rb') as f:
-        dos_content = f.read()
-    assert b'\r\n' not in unix_content, (
-        'Unix-style line endings were changed to DOS-style'
-    )
-    assert b'\r\n' in dos_content, 'DOS-style line endings were changed to Unix-style'
-    # Check if content was updated correctly
-    assert unix_content.decode('utf-8').split('\n')[1] == 'Updated Line 2'
-    assert dos_content.decode('utf-8').split('\r\n')[1] == 'Updated Line 2'
-def test_apply_patch_create_new_file(mock_output_dir):
-    # Create a patch that adds a new file
-    patch_content = """
-diff --git a/new_file.txt b/new_file.txt
-new file mode 100644
-index 0000000..3b18e51
---- /dev/null
-+++ b/new_file.txt
-@@ -0,0 +1 @@
-+hello world
-"""
-    # Apply the patch
-    apply_patch(mock_output_dir, patch_content)
-    # Check if the new file was created
-    new_file_path = os.path.join(mock_output_dir, 'new_file.txt')
-    assert os.path.exists(new_file_path), 'New file was not created'
-    # Check if the file content is correct
-    with open(new_file_path, 'r') as f:
-        content = f.read().strip()
-    assert content == 'hello world', 'File content is incorrect'
-def test_apply_patch_rename_file(mock_output_dir):
-    # Create a sample file in the mock repo
-    old_file = os.path.join(mock_output_dir, 'old_name.txt')
-    with open(old_file, 'w') as f:
-        f.write('This file will be renamed')
-    # Create a patch that renames the file
-    patch_content = """diff --git a/old_name.txt b/new_name.txt
-similarity index 100%
-rename from old_name.txt
-rename to new_name.txt"""
-    # Apply the patch
-    apply_patch(mock_output_dir, patch_content)
-    # Check if the file was renamed
-    new_file = os.path.join(mock_output_dir, 'new_name.txt')
-    assert not os.path.exists(old_file), 'Old file still exists'
-    assert os.path.exists(new_file), 'New file was not created'
-    # Check if the content is preserved
-    with open(new_file, 'r') as f:
-        content = f.read()
-    assert content == 'This file will be renamed'
-def test_apply_patch_delete_file(mock_output_dir):
-    # Create a sample file in the mock repo
-    sample_file = os.path.join(mock_output_dir, 'to_be_deleted.txt')
-    with open(sample_file, 'w') as f:
-        f.write('This file will be deleted')
-    # Create a patch that deletes the file
-    patch_content = """
-diff --git a/to_be_deleted.txt b/to_be_deleted.txt
-deleted file mode 100644
-index 9daeafb..0000000
---- a/to_be_deleted.txt
-+++ /dev/null
-@@ -1 +0,0 @@
--This file will be deleted
-"""
-    # Apply the patch
-    apply_patch(mock_output_dir, patch_content)
-    # Check if the file was deleted
-    assert not os.path.exists(sample_file), 'File was not deleted'
-def test_initialize_repo(mock_output_dir):
-    issue_type = 'issue'
-    # Copy the repo to patches
-    ISSUE_NUMBER = 3
-    initialize_repo(mock_output_dir, ISSUE_NUMBER, issue_type)
-    patches_dir = os.path.join(mock_output_dir, 'patches', f'issue_{ISSUE_NUMBER}')
-    # Check if files were copied correctly
-    assert os.path.exists(os.path.join(patches_dir, 'README.md'))
-    # Check file contents
-    with open(os.path.join(patches_dir, 'README.md'), 'r') as f:
-        assert f.read() == 'hello world'
-@patch('openhands.resolver.interfaces.gitlab.GitlabIssueHandler.reply_to_comment')
-@patch('httpx.post')
-@patch('subprocess.run')
-@patch('openhands.resolver.send_pull_request.LLM')
-def test_update_existing_pull_request(
-    mock_llm_class,
-    mock_subprocess_run,
-    mock_requests_post,
-    mock_reply_to_comment,
-):
-    # Arrange: Set up test data
-    issue = Issue(
-        owner='test-owner',
-        repo='test-repo',
-        number=1,
-        title='Test PR',
-        body='This is a test PR',
-        thread_ids=['comment1', 'comment2'],
-        head_branch='test-branch',
-    )
-    token = 'test-token'
-    username = 'test-user'
-    patch_dir = '/path/to/patch'
-    additional_message = '["Fixed bug in function A", "Updated documentation for B"]'
-    # Mock the subprocess.run call for git push
-    mock_subprocess_run.return_value = MagicMock(returncode=0)
-    # Mock the requests.post call for adding a PR comment
-    mock_requests_post.return_value.status_code = 201
-    # Mock LLM instance and completion call
-    mock_llm_instance = MagicMock()
-    mock_completion_response = MagicMock()
-    mock_completion_response.choices = [
-        MagicMock(message=MagicMock(content='This is an issue resolution.'))
-    ]
-    mock_llm_instance.completion.return_value = mock_completion_response
-    mock_llm_class.return_value = mock_llm_instance
-    llm_config = LLMConfig()
-    # Act: Call the function without comment_message to test auto-generation
-    result = update_existing_pull_request(
-        issue,
-        token,
-        username,
-        ProviderType.GITLAB,
-        patch_dir,
-        llm_config,
-        comment_message=None,
-        additional_message=additional_message,
-    )
-    # Assert: Check if the git push command was executed
-    push_command = (
-        f'git -C {patch_dir} push '
-        f'https://{username}:{token}@gitlab.com/'
-        f'{issue.owner}/{issue.repo}.git {issue.head_branch}'
-    )
-    mock_subprocess_run.assert_called_once_with(
-        push_command, shell=True, capture_output=True, text=True
-    )
-    # Assert: Check if the auto-generated comment was posted to the PR
-    comment_url = f'https://gitlab.com/api/v4/projects/{quote(f"{issue.owner}/{issue.repo}", safe="")}/issues/{issue.number}/notes'
-    expected_comment = 'This is an issue resolution.'
-    mock_requests_post.assert_called_once_with(
-        comment_url,
-        headers={
-            'Authorization': f'Bearer {token}',
-            'Accept': 'application/json',
-        },
-        json={'body': expected_comment},
-    )
-    # Assert: Check if the reply_to_comment function was called for each thread ID
-    mock_reply_to_comment.assert_has_calls(
-        [
-            call(issue.number, 'comment1', 'Fixed bug in function A'),
-            call(issue.number, 'comment2', 'Updated documentation for B'),
-        ]
-    )
-    # Assert: Check the returned PR URL
-    assert (
-        result
-        == f'https://gitlab.com/{issue.owner}/{issue.repo}/-/merge_requests/{issue.number}'
-    )
-@pytest.mark.parametrize(
-    'pr_type,target_branch,pr_title',
-    [
-        ('branch', None, None),
-        ('draft', None, None),
-        ('ready', None, None),
-        ('branch', 'feature', None),
-        ('draft', 'develop', None),
-        ('ready', 'staging', None),
-        ('ready', None, 'Custom PR Title'),
-        ('draft', 'develop', 'Another Custom Title'),
-    ],
-)
-@patch('subprocess.run')
-@patch('httpx.post')
-@patch('httpx.get')
-def test_send_pull_request(
-    mock_get,
-    mock_post,
-    mock_run,
-    mock_issue,
-    mock_llm_config,
-    mock_output_dir,
-    pr_type,
-    target_branch,
-    pr_title,
-):
-    repo_path = os.path.join(mock_output_dir, 'repo')
-    # Mock API responses based on whether target_branch is specified
-    if target_branch:
-        mock_get.side_effect = [
-            MagicMock(status_code=404),  # Branch doesn't exist
-            MagicMock(status_code=200),  # Target branch exists
-            MagicMock(json=lambda: {'default_branch': 'main'}),  # Get default branch
-        ]
-    else:
-        mock_get.side_effect = [
-            MagicMock(status_code=404),  # Branch doesn't exist
-            MagicMock(json=lambda: {'default_branch': 'main'}),  # Get default branch
-            MagicMock(json=lambda: {'default_branch': 'main'}),  # Get default branch
-        ]
-    mock_post.return_value.json.return_value = {
-        'web_url': 'https://gitlab.com/test-owner/test-repo/-/merge_requests/1',
-    }
-    # Mock subprocess.run calls
-    mock_run.side_effect = [
-        MagicMock(returncode=0),  # git checkout -b
-        MagicMock(returncode=0),  # git push
-    ]
-    # Call the function
-    result = send_pull_request(
-        issue=mock_issue,
-        token='test-token',
-        username='test-user',
-        platform=ProviderType.GITLAB,
-        patch_dir=repo_path,
-        pr_type=pr_type,
-        target_branch=target_branch,
-        pr_title=pr_title,
-    )
-    # Assert API calls
-    expected_get_calls = 2
-    if pr_type == 'branch':
-        expected_get_calls = 3
-    assert mock_get.call_count == expected_get_calls
-    # Check branch creation and push
-    assert mock_run.call_count == 2
-    checkout_call, push_call = mock_run.call_args_list
-    assert checkout_call == call(
-        ['git', '-C', repo_path, 'checkout', '-b', 'openhands-fix-issue-42'],
-        capture_output=True,
-        text=True,
-    )
-    assert push_call == call(
-        [
-            'git',
-            '-C',
-            repo_path,
-            'push',
-            'https://test-user:[email protected]/test-owner/test-repo.git',
-            'openhands-fix-issue-42',
-        ],
-        capture_output=True,
-        text=True,
-    )
-    # Check PR creation based on pr_type
-    if pr_type == 'branch':
-        assert (
-            result
-            == 'https://gitlab.com/test-owner/test-repo/-/compare/main...openhands-fix-issue-42'
-        )
-        mock_post.assert_not_called()
-    else:
-        assert result == 'https://gitlab.com/test-owner/test-repo/-/merge_requests/1'
-        mock_post.assert_called_once()
-        post_data = mock_post.call_args[1]['json']
-        expected_title = pr_title if pr_title else 'Fix issue #42: Test Issue'
-        assert post_data['title'] == expected_title
-        assert post_data['description'].startswith('This pull request fixes #42.')
-        assert post_data['source_branch'] == 'openhands-fix-issue-42'
-        assert post_data['target_branch'] == (
-            target_branch if target_branch else 'main'
-        )
-        assert post_data['draft'] == (pr_type == 'draft')
-@patch('subprocess.run')
-@patch('httpx.post')
-@patch('httpx.put')
-@patch('httpx.get')
-def test_send_pull_request_with_reviewer(
-    mock_get,
-    mock_put,
-    mock_post,
-    mock_run,
-    mock_issue,
-    mock_output_dir,
-    mock_llm_config,
-):
-    repo_path = os.path.join(mock_output_dir, 'repo')
-    reviewer = 'test-reviewer'
-    # Mock API responses
-    mock_get.side_effect = [
-        MagicMock(status_code=404),  # Branch doesn't exist
-        MagicMock(json=lambda: {'default_branch': 'main'}),  # Get default branch
-        MagicMock(json=lambda: [{'id': 123}]),  # Get user data
-    ]
-    # Mock PR creation response
-    mock_post.side_effect = [
-        MagicMock(
-            status_code=200,
-            json=lambda: {
-                'web_url': 'https://gitlab.com/test-owner/test-repo/-/merge_requests/1',
-                'iid': 1,
-            },
-        ),  # PR creation
-    ]
-    # Mock request reviwers response
-    mock_put.side_effect = [
-        MagicMock(status_code=200),  # Reviewer request
-    ]
-    # Mock subprocess.run calls
-    mock_run.side_effect = [
-        MagicMock(returncode=0),  # git checkout -b
-        MagicMock(returncode=0),  # git push
-    ]
-    # Call the function with reviewer
-    result = send_pull_request(
-        issue=mock_issue,
-        token='test-token',
-        username='test-user',
-        platform=ProviderType.GITLAB,
-        patch_dir=repo_path,
-        pr_type='ready',
-        reviewer=reviewer,
-    )
-    # Assert API calls
-    assert mock_get.call_count == 3
-    assert mock_post.call_count == 1
-    assert mock_put.call_count == 1
-    # Check PR creation
-    pr_create_call = mock_post.call_args_list[0]
-    assert pr_create_call[1]['json']['title'] == 'Fix issue #42: Test Issue'
-    # Check reviewer request
-    reviewer_request_call = mock_put.call_args_list[0]
-    assert (
-        reviewer_request_call[0][0]
-        == 'https://gitlab.com/api/v4/projects/test-owner%2Ftest-repo/merge_requests/1'
-    )
-    assert reviewer_request_call[1]['json'] == {'reviewer_ids': [123]}
-    # Check the result URL
-    assert result == 'https://gitlab.com/test-owner/test-repo/-/merge_requests/1'
-@patch('httpx.get')
-def test_send_pull_request_invalid_target_branch(
-    mock_get, mock_issue, mock_output_dir, mock_llm_config
-):
-    """Test that an error is raised when specifying a non-existent target branch"""
-    repo_path = os.path.join(mock_output_dir, 'repo')
-    # Mock API response for non-existent branch
-    mock_get.side_effect = [
-        MagicMock(status_code=404),  # Branch doesn't exist
-        MagicMock(status_code=404),  # Target branch doesn't exist
-    ]
-    # Test that ValueError is raised when target branch doesn't exist
-    with pytest.raises(
-        ValueError, match='Target branch nonexistent-branch does not exist'
-    ):
-        send_pull_request(
-            issue=mock_issue,
-            token='test-token',
-            username='test-user',
-            platform=ProviderType.GITLAB,
-            patch_dir=repo_path,
-            pr_type='ready',
-            target_branch='nonexistent-branch',
-        )
-    # Verify API calls
-    assert mock_get.call_count == 2
-@patch('subprocess.run')
-@patch('httpx.post')
-@patch('httpx.get')
-def test_send_pull_request_git_push_failure(
-    mock_get, mock_post, mock_run, mock_issue, mock_output_dir, mock_llm_config
-):
-    repo_path = os.path.join(mock_output_dir, 'repo')
-    # Mock API responses
-    mock_get.return_value = MagicMock(json=lambda: {'default_branch': 'main'})
-    # Mock the subprocess.run calls
-    mock_run.side_effect = [
-        MagicMock(returncode=0),  # git checkout -b
-        MagicMock(returncode=1, stderr='Error: failed to push some refs'),  # git push
-    ]
-    # Test that RuntimeError is raised when git push fails
-    with pytest.raises(
-        RuntimeError, match='Failed to push changes to the remote repository'
-    ):
-        send_pull_request(
-            issue=mock_issue,
-            token='test-token',
-            username='test-user',
-            platform=ProviderType.GITLAB,
-            patch_dir=repo_path,
-            pr_type='ready',
-        )
-    # Assert that subprocess.run was called twice
-    assert mock_run.call_count == 2
-    # Check the git checkout -b command
-    checkout_call = mock_run.call_args_list[0]
-    assert checkout_call[0][0] == [
-        'git',
-        '-C',
-        repo_path,
-        'checkout',
-        '-b',
-        'openhands-fix-issue-42',
-    ]
-    # Check the git push command
-    push_call = mock_run.call_args_list[1]
-    assert push_call[0][0] == [
-        'git',
-        '-C',
-        repo_path,
-        'push',
-        'https://test-user:[email protected]/test-owner/test-repo.git',
-        'openhands-fix-issue-42',
-    ]
-    # Assert that no pull request was created
-    mock_post.assert_not_called()
-@patch('subprocess.run')
-@patch('httpx.post')
-@patch('httpx.get')
-def test_send_pull_request_permission_error(
-    mock_get, mock_post, mock_run, mock_issue, mock_output_dir, mock_llm_config
-):
-    repo_path = os.path.join(mock_output_dir, 'repo')
-    # Mock API responses
-    mock_get.return_value = MagicMock(json=lambda: {'default_branch': 'main'})
-    mock_post.return_value.status_code = 403
-    # Mock subprocess.run calls
-    mock_run.side_effect = [
-        MagicMock(returncode=0),  # git checkout -b
-        MagicMock(returncode=0),  # git push
-    ]
-    # Test that RuntimeError is raised when PR creation fails due to permissions
-    with pytest.raises(
-        RuntimeError, match='Failed to create pull request due to missing permissions.'
-    ):
-        send_pull_request(
-            issue=mock_issue,
-            token='test-token',
-            username='test-user',
-            platform=ProviderType.GITLAB,
-            patch_dir=repo_path,
-            pr_type='ready',
-        )
-    # Assert that the branch was created and pushed
-    assert mock_run.call_count == 2
-    mock_post.assert_called_once()
-@patch('httpx.post')
-@patch('httpx.get')
-def test_reply_to_comment(mock_get, mock_post, mock_issue):
-    # Arrange: set up the test data
-    token = 'test_token'
-    comment_id = 'GID/test_comment_id'
-    reply = 'This is a test reply.'
-    # Create an instance of GitlabIssueHandler
-    handler = GitlabIssueHandler(
-        owner='test-owner', repo='test-repo', token=token, username='test-user'
-    )
-    mock_get.return_value = MagicMock(
-        json=lambda: {
-            'notes': [
-                {
-                    'id': 123,
-                }
-            ]
-        }
-    )
-    # Mock the response from the GraphQL API
-    mock_response = MagicMock()
-    mock_response.status_code = 200
-    mock_response.json.return_value = {
-        'id': 123,
-        'body': 'Openhands fix success summary\n\n\nThis is a test reply.',
-        'createdAt': '2024-10-01T12:34:56Z',
-    }
-    mock_post.return_value = mock_response
-    # Act: call the function
-    handler.reply_to_comment(mock_issue.number, comment_id, reply)
-    # Assert: check that the POST request was made with the correct parameters
-    data = {
-        'body': 'Openhands fix success summary\n\n\nThis is a test reply.',
-        'note_id': 123,
-    }
-    # Check that the correct request was made to the API
-    mock_post.assert_called_once_with(
-        f'https://gitlab.com/api/v4/projects/{quote(f"{mock_issue.owner}/{mock_issue.repo}", safe="")}/merge_requests/{mock_issue.number}/discussions/{comment_id.split("/")[-1]}/notes',
-        headers={
-            'Authorization': f'Bearer {token}',
-            'Accept': 'application/json',
-        },
-        json=data,
-    )
-    # Check that the response status was checked (via response.raise_for_status)
-    mock_response.raise_for_status.assert_called_once()
-@patch('openhands.resolver.send_pull_request.initialize_repo')
-@patch('openhands.resolver.send_pull_request.apply_patch')
-@patch('openhands.resolver.send_pull_request.update_existing_pull_request')
-@patch('openhands.resolver.send_pull_request.make_commit')
-def test_process_single_pr_update(
-    mock_make_commit,
-    mock_update_existing_pull_request,
-    mock_apply_patch,
-    mock_initialize_repo,
-    mock_output_dir,
-    mock_llm_config,
-):
-    # Initialize test data
-    token = 'test_token'
-    username = 'test_user'
-    pr_type = 'draft'
-    resolver_output = ResolverOutput(
-        issue=Issue(
-            owner='test-owner',
-            repo='test-repo',
-            number=1,
-            title='Issue 1',
-            body='Body 1',
-            closing_issues=[],
-            review_threads=[
-                ReviewThread(comment='review comment for feedback', files=[])
-            ],
-            thread_ids=['1'],
-            head_branch='branch 1',
-        ),
-        issue_type='pr',
-        instruction='Test instruction 1',
-        base_commit='def456',
-        git_patch='Test patch 1',
-        history=[],
-        metrics={},
-        success=True,
-        comment_success=None,
-        result_explanation='[Test success 1]',
-        error=None,
-    )
-    mock_update_existing_pull_request.return_value = (
-        'https://gitlab.com/test-owner/test-repo/-/merge_requests/1'
-    )
-    mock_initialize_repo.return_value = f'{mock_output_dir}/patches/pr_1'
-    process_single_issue(
-        mock_output_dir,
-        resolver_output,
-        token,
-        username,
-        ProviderType.GITLAB,
-        pr_type,
-        mock_llm_config,
-        None,
-        False,
-        None,
-    )
-    mock_initialize_repo.assert_called_once_with(mock_output_dir, 1, 'pr', 'branch 1')
-    mock_apply_patch.assert_called_once_with(
-        f'{mock_output_dir}/patches/pr_1', resolver_output.git_patch
-    )
-    mock_make_commit.assert_called_once_with(
-        f'{mock_output_dir}/patches/pr_1', resolver_output.issue, 'pr'
-    )
-    mock_update_existing_pull_request.assert_called_once_with(
-        issue=resolver_output.issue,
-        token=token,
-        username=username,
-        platform=ProviderType.GITLAB,
-        patch_dir=f'{mock_output_dir}/patches/pr_1',
-        additional_message='[Test success 1]',
-        llm_config=mock_llm_config,
-        base_domain='gitlab.com',
-    )
-@patch('openhands.resolver.send_pull_request.initialize_repo')
-@patch('openhands.resolver.send_pull_request.apply_patch')
-@patch('openhands.resolver.send_pull_request.send_pull_request')
-@patch('openhands.resolver.send_pull_request.make_commit')
-def test_process_single_issue(
-    mock_make_commit,
-    mock_send_pull_request,
-    mock_apply_patch,
-    mock_initialize_repo,
-    mock_output_dir,
-    mock_llm_config,
-):
-    # Initialize test data
-    token = 'test_token'
-    username = 'test_user'
-    pr_type = 'draft'
-    platform = ProviderType.GITLAB
-    resolver_output = ResolverOutput(
-        issue=Issue(
-            owner='test-owner',
-            repo='test-repo',
-            number=1,
-            title='Issue 1',
-            body='Body 1',
-        ),
-        issue_type='issue',
-        instruction='Test instruction 1',
-        base_commit='def456',
-        git_patch='Test patch 1',
-        history=[],
-        metrics={},
-        success=True,
-        comment_success=None,
-        result_explanation='Test success 1',
-        error=None,
-    )
-    # Mock return value
-    mock_send_pull_request.return_value = (
-        'https://gitlab.com/test-owner/test-repo/-/merge_requests/1'
-    )
-    mock_initialize_repo.return_value = f'{mock_output_dir}/patches/issue_1'
-    # Call the function
-    process_single_issue(
-        mock_output_dir,
-        resolver_output,
-        token,
-        username,
-        platform,
-        pr_type,
-        mock_llm_config,
-        None,
-        False,
-        None,
-    )
-    # Assert that the mocked functions were called with correct arguments
-    mock_initialize_repo.assert_called_once_with(mock_output_dir, 1, 'issue', 'def456')
-    mock_apply_patch.assert_called_once_with(
-        f'{mock_output_dir}/patches/issue_1', resolver_output.git_patch
-    )
-    mock_make_commit.assert_called_once_with(
-        f'{mock_output_dir}/patches/issue_1', resolver_output.issue, 'issue'
-    )
-    mock_send_pull_request.assert_called_once_with(
-        issue=resolver_output.issue,
-        token=token,
-        username=username,
-        platform=platform,
-        patch_dir=f'{mock_output_dir}/patches/issue_1',
-        pr_type=pr_type,
-        fork_owner=None,
-        additional_message=resolver_output.result_explanation,
-        target_branch=None,
-        reviewer=None,
-        pr_title=None,
-        base_domain='gitlab.com',
-    )
-@patch('openhands.resolver.send_pull_request.initialize_repo')
-@patch('openhands.resolver.send_pull_request.apply_patch')
-@patch('openhands.resolver.send_pull_request.send_pull_request')
-@patch('openhands.resolver.send_pull_request.make_commit')
-def test_process_single_issue_unsuccessful(
-    mock_make_commit,
-    mock_send_pull_request,
-    mock_apply_patch,
-    mock_initialize_repo,
-    mock_output_dir,
-    mock_llm_config,
-):
-    # Initialize test data
-    token = 'test_token'
-    username = 'test_user'
-    pr_type = 'draft'
-    resolver_output = ResolverOutput(
-        issue=Issue(
-            owner='test-owner',
-            repo='test-repo',
-            number=1,
-            title='Issue 1',
-            body='Body 1',
-        ),
-        issue_type='issue',
-        instruction='Test instruction 1',
-        base_commit='def456',
-        git_patch='Test patch 1',
-        history=[],
-        metrics={},
-        success=False,
-        comment_success=None,
-        result_explanation='',
-        error='Test error',
-    )
-    # Call the function
-    process_single_issue(
-        mock_output_dir,
-        resolver_output,
-        token,
-        username,
-        ProviderType.GITLAB,
-        pr_type,
-        mock_llm_config,
-        None,
-        False,
-        None,
-    )
-    # Assert that none of the mocked functions were called
-    mock_initialize_repo.assert_not_called()
-    mock_apply_patch.assert_not_called()
-    mock_make_commit.assert_not_called()
-    mock_send_pull_request.assert_not_called()
-@patch('httpx.get')
-@patch('subprocess.run')
-def test_send_pull_request_branch_naming(
-    mock_run, mock_get, mock_issue, mock_output_dir, mock_llm_config
-):
-    repo_path = os.path.join(mock_output_dir, 'repo')
-    # Mock API responses
-    mock_get.side_effect = [
-        MagicMock(status_code=200),  # First branch exists
-        MagicMock(status_code=200),  # Second branch exists
-        MagicMock(status_code=404),  # Third branch doesn't exist
-        MagicMock(json=lambda: {'default_branch': 'main'}),  # Get default branch
-        MagicMock(json=lambda: {'default_branch': 'main'}),  # Get default branch
-    ]
-    # Mock subprocess.run calls
-    mock_run.side_effect = [
-        MagicMock(returncode=0),  # git checkout -b
-        MagicMock(returncode=0),  # git push
-    ]
-    # Call the function
-    result = send_pull_request(
-        issue=mock_issue,
-        token='test-token',
-        username='test-user',
-        platform=ProviderType.GITLAB,
-        patch_dir=repo_path,
-        pr_type='branch',
-    )
-    # Assert API calls
-    assert mock_get.call_count == 5
-    # Check branch creation and push
-    assert mock_run.call_count == 2
-    checkout_call, push_call = mock_run.call_args_list
-    assert checkout_call == call(
-        ['git', '-C', repo_path, 'checkout', '-b', 'openhands-fix-issue-42-try3'],
-        capture_output=True,
-        text=True,
-    )
-    assert push_call == call(
-        [
-            'git',
-            '-C',
-            repo_path,
-            'push',
-            'https://test-user:[email protected]/test-owner/test-repo.git',
-            'openhands-fix-issue-42-try3',
-        ],
-        capture_output=True,
-        text=True,
-    )
-    # Check the result
-    assert (
-        result
-        == 'https://gitlab.com/test-owner/test-repo/-/compare/main...openhands-fix-issue-42-try3'
-    )
-@patch('openhands.resolver.send_pull_request.argparse.ArgumentParser')
-@patch('openhands.resolver.send_pull_request.process_single_issue')
-@patch('openhands.resolver.send_pull_request.load_single_resolver_output')
-@patch('openhands.resolver.send_pull_request.identify_token')
-@patch('os.path.exists')
-@patch('os.getenv')
-def test_main(
-    mock_getenv,
-    mock_path_exists,
-    mock_identify_token,
-    mock_load_single_resolver_output,
-    mock_process_single_issue,
-    mock_parser,
-):
-    # Setup mock parser
-    mock_args = MagicMock()
-    mock_args.token = None
-    mock_args.username = 'mock_username'
-    mock_args.output_dir = '/mock/output'
-    mock_args.pr_type = 'draft'
-    mock_args.issue_number = '42'
-    mock_args.fork_owner = None
-    mock_args.send_on_failure = False
-    mock_args.llm_model = 'mock_model'
-    mock_args.llm_base_url = 'mock_url'
-    mock_args.llm_api_key = 'mock_key'
-    mock_args.target_branch = None
-    mock_args.reviewer = None
-    mock_args.pr_title = None
-    mock_args.selected_repo = None
-    mock_parser.return_value.parse_args.return_value = mock_args
-    # Setup environment variables
-    mock_getenv.side_effect = (
-        lambda key, default=None: 'mock_token' if key == 'GITLAB_TOKEN' else default
-    )
-    # Setup path exists
-    mock_path_exists.return_value = True
-    # Setup mock resolver output
-    mock_resolver_output = MagicMock()
-    mock_load_single_resolver_output.return_value = mock_resolver_output
-    mock_identify_token.return_value = ProviderType.GITLAB
-    # Run main function
-    main()
-    mock_identify_token.assert_called_with('mock_token', mock_args.base_domain)
-    llm_config = LLMConfig(
-        model=mock_args.llm_model,
-        base_url=mock_args.llm_base_url,
-        api_key=mock_args.llm_api_key,
-    )
-    # Use any_call instead of assert_called_with for more flexible matching
-    assert mock_process_single_issue.call_args == call(
-        '/mock/output',
-        mock_resolver_output,
-        'mock_token',
-        'mock_username',
-        ProviderType.GITLAB,
-        'draft',
-        llm_config,
-        None,
-        False,
-        mock_args.target_branch,
-        mock_args.reviewer,
-        mock_args.pr_title,
-        ANY,
-    )
-    # Other assertions
-    mock_parser.assert_called_once()
-    mock_getenv.assert_any_call('GITLAB_TOKEN')
-    mock_path_exists.assert_called_with('/mock/output')
-    mock_load_single_resolver_output.assert_called_with('/mock/output/output.jsonl', 42)
-    # Test for invalid issue number
-    mock_args.issue_number = 'invalid'
-    with pytest.raises(ValueError):
-        main()
-    # Test for invalid token
-    mock_args.issue_number = '42'  # Reset to valid issue number
-    mock_getenv.side_effect = (
-        lambda key, default=None: None
-    )  # Return None for all env vars
-    with pytest.raises(ValueError, match='token is not set'):
-        main()
-@patch('subprocess.run')
-def test_make_commit_escapes_issue_title(mock_subprocess_run):
-    # Setup
-    repo_dir = '/path/to/repo'
-    issue = Issue(
-        owner='test-owner',
-        repo='test-repo',
-        number=42,
-        title='Issue with "quotes" and $pecial characters',
-        body='Test body',
-    )
-    # Mock subprocess.run to return success for all calls
-    mock_subprocess_run.return_value = MagicMock(
-        returncode=0, stdout='sample output', stderr=''
-    )
-    # Call the function
-    issue_type = 'issue'
-    make_commit(repo_dir, issue, issue_type)
-    # Assert that subprocess.run was called with the correct arguments
-    calls = mock_subprocess_run.call_args_list
-    assert len(calls) == 4  # git config check, git add, git commit
-    # Check the git commit call
-    git_commit_call = calls[3][0][0]
-    expected_commit_message = (
-        'Fix issue #42: Issue with "quotes" and $pecial characters'
-    )
-    assert [
-        'git',
-        '-C',
-        '/path/to/repo',
-        'commit',
-        '-m',
-        expected_commit_message,
-    ] == git_commit_call
-@patch('subprocess.run')
-def test_make_commit_no_changes(mock_subprocess_run):
-    # Setup
-    repo_dir = '/path/to/repo'
-    issue = Issue(
-        owner='test-owner',
-        repo='test-repo',
-        number=42,
-        title='Issue with no changes',
-        body='Test body',
-    )
-    # Mock subprocess.run to simulate no changes in the repo
-    mock_subprocess_run.side_effect = [
-        MagicMock(returncode=0),
-        MagicMock(returncode=0),
-        MagicMock(returncode=1, stdout=''),  # git status --porcelain (no changes)
-    ]
-    with pytest.raises(
-        RuntimeError, match='ERROR: Openhands failed to make code changes.'
-    ):
-        make_commit(repo_dir, issue, 'issue')
-    # Check that subprocess.run was called for checking git status and add, but not commit
-    assert mock_subprocess_run.call_count == 3
-    git_status_call = mock_subprocess_run.call_args_list[2][0][0]
-    assert f'git -C {repo_dir} status --porcelain' in git_status_call
-def test_apply_patch_rename_directory(mock_output_dir):
-    # Create a sample directory structure
-    old_dir = os.path.join(mock_output_dir, 'prompts', 'resolve')
-    os.makedirs(old_dir)
-    # Create test files
-    test_files = [
-        'issue-success-check.jinja',
-        'pr-feedback-check.jinja',
-        'pr-thread-check.jinja',
-    ]
-    for filename in test_files:
-        file_path = os.path.join(old_dir, filename)
-        with open(file_path, 'w') as f:
-            f.write(f'Content of {filename}')
-    # Create a patch that renames the directory
-    patch_content = """diff --git a/prompts/resolve/issue-success-check.jinja b/prompts/guess_success/issue-success-check.jinja
-similarity index 100%
-rename from prompts/resolve/issue-success-check.jinja
-rename to prompts/guess_success/issue-success-check.jinja
-diff --git a/prompts/resolve/pr-feedback-check.jinja b/prompts/guess_success/pr-feedback-check.jinja
-similarity index 100%
-rename from prompts/resolve/pr-feedback-check.jinja
-rename to prompts/guess_success/pr-feedback-check.jinja
-diff --git a/prompts/resolve/pr-thread-check.jinja b/prompts/guess_success/pr-thread-check.jinja
-similarity index 100%
-rename from prompts/resolve/pr-thread-check.jinja
-rename to prompts/guess_success/pr-thread-check.jinja"""
-    # Apply the patch
-    apply_patch(mock_output_dir, patch_content)
-    # Check if files were moved correctly
-    new_dir = os.path.join(mock_output_dir, 'prompts', 'guess_success')
-    assert not os.path.exists(old_dir), 'Old directory still exists'
-    assert os.path.exists(new_dir), 'New directory was not created'
-    # Check if all files were moved and content preserved
-    for filename in test_files:
-        old_path = os.path.join(old_dir, filename)
-        new_path = os.path.join(new_dir, filename)
-        assert not os.path.exists(old_path), f'Old file {filename} still exists'
-        assert os.path.exists(new_path), f'New file {filename} was not created'
-        with open(new_path, 'r') as f:
-            content = f.read()
-        assert content == f'Content of {filename}', f'Content mismatch for {filename}'

tests/unit/resolver/mock_output/output.jsonl DELETED Viewed

The diff for this file is too large to render. See raw diff

tests/unit/resolver/mock_output/repo/src/App.css DELETED Viewed

@@ -1,42 +0,0 @@
-#root {
-  max-width: 1280px;
-  margin: 0 auto;
-  padding: 2rem;
-  text-align: center;
-}
-.logo {
-  height: 6em;
-  padding: 1.5em;
-  will-change: filter;
-  transition: filter 300ms;
-}
-.logo:hover {
-  filter: drop-shadow(0 0 2em #646cffaa);
-}
-.logo.react:hover {
-  filter: drop-shadow(0 0 2em #61dafbaa);
-}
-@keyframes logo-spin {
-  from {
-    transform: rotate(0deg);
-  }
-  to {
-    transform: rotate(360deg);
-  }
-}
-@media (prefers-reduced-motion: no-preference) {
-  a:nth-of-type(2) .logo {
-    animation: logo-spin infinite 20s linear;
-  }
-}
-.card {
-  padding: 2em;
-}
-.read-the-docs {
-  color: #888;
-}

tests/unit/resolver/mock_output/repo/src/App.tsx DELETED Viewed

@@ -1,14 +0,0 @@
-import React from 'react'
-import './App.css'
-import PullRequestViewer from './PullRequestViewer'
-function App() {
-  return (
-    <div className="App">
-      <PullRequestViewer />
-    </div>
-  )
-}
-export default App

tests/unit/resolver/mock_output/repo/src/PullRequestViewer.test.tsx DELETED Viewed

@@ -1,19 +0,0 @@
-import React from 'react';
-import { render, screen } from '@testing-library/react';
-import PullRequestViewer from './PullRequestViewer';
-describe('PullRequestViewer', () => {
-  it('renders the component title', () => {
-    render(<PullRequestViewer />);
-    const titleElement = screen.getByText(/Pull Request Viewer/i);
-    expect(titleElement).toBeInTheDocument();
-  });
-  it('renders the repository select dropdown', () => {
-    render(<PullRequestViewer />);
-    const selectElement = screen.getByRole('combobox', { name: /select a repository/i });
-    expect(selectElement).toBeInTheDocument();
-  });
-});

tests/unit/resolver/mock_output/repo/src/PullRequestViewer.tsx DELETED Viewed

@@ -1,112 +0,0 @@
-import React, { useState, useEffect } from 'react';
-import axios from 'axios';
-import { Octokit } from '@octokit/rest';
-import Select from 'react-select';
-const octokit = new Octokit({ auth: import.meta.env.VITE_GITHUB_TOKEN });
-interface PullRequest {
-  title: string;
-  html_url: string;
-  user: {
-    login: string;
-  };
-}
-interface Repo {
-  value: string;
-  label: string;
-}
-const PullRequestViewer: React.FC = () => {
-  const [repos, setRepos] = useState<Repo[]>([]);
-  const [selectedRepo, setSelectedRepo] = useState<Repo | null>(null);
-  const [pullRequests, setPullRequests] = useState<PullRequest[]>([]);
-  useEffect(() => {
-    const fetchRepos = async () => {
-      try {
-        const response = await octokit.repos.listForOrg({
-          org: 'OpenDevin',
-          type: 'all',
-        });
-        const repoOptions = response.data.map(repo => ({
-          value: repo.name,
-          label: repo.name,
-        }));
-        setRepos(repoOptions);
-      } catch (error) {
-        console.error('Error fetching repos:', error);
-      }
-    };
-    fetchRepos();
-  }, []);
-  useEffect(() => {
-    const fetchPullRequests = async () => {
-      if (selectedRepo) {
-        try {
-          let allPullRequests: PullRequest[] = [];
-          let page = 1;
-          let hasNextPage = true;
-          while (hasNextPage) {
-            const response = await octokit.pulls.list({
-              owner: 'OpenDevin',
-              repo: selectedRepo.value,
-              state: 'open',
-              per_page: 100,
-              page: page,
-            });
-            allPullRequests = [...allPullRequests, ...response.data];
-            if (response.data.length < 100) {
-              hasNextPage = false;
-            } else {
-              page++;
-            }
-          }
-          setPullRequests(allPullRequests);
-        } catch (error) {
-          console.error('Error fetching pull requests:', error);
-        }
-      }
-    };
-    fetchPullRequests();
-  }, [selectedRepo]);
-  return (
-    <div>
-      <h1>Pull Request Viewer</h1>
-      <Select
-        options={repos}
-        value={selectedRepo}
-        onChange={(option) => setSelectedRepo(option as Repo)}
-        placeholder="Select a repository"
-        aria-label="Select a repository"
-      />
-      {pullRequests.length > 0 ? (
-        <ul>
-          {pullRequests.map((pr) => (
-            <li key={pr.html_url}>
-              <a href={pr.html_url} target="_blank" rel="noopener noreferrer">
-                {pr.title}
-              </a>
-              {' by '}
-              {pr.user.login}
-            </li>
-          ))}
-        </ul>
-      ) : (
-        <p>No open pull requests found.</p>
-      )}
-    </div>
-  );
-};
-export default PullRequestViewer;

tests/unit/resolver/test_issue_handler_factory.py DELETED Viewed

@@ -1,77 +0,0 @@
-from typing import Type
-from unittest.mock import MagicMock
-import pytest
-from pydantic import SecretStr
-from openhands.core.config import LLMConfig
-from openhands.integrations.provider import ProviderType
-from openhands.resolver.interfaces.github import GithubIssueHandler, GithubPRHandler
-from openhands.resolver.interfaces.gitlab import GitlabIssueHandler, GitlabPRHandler
-from openhands.resolver.issue_handler_factory import IssueHandlerFactory
-from openhands.resolver.interfaces.issue_definitions import (
-    ServiceContextIssue,
-    ServiceContextPR,
-)
-@pytest.fixture
-def llm_config():
-    return LLMConfig(
-        model='test-model',
-        api_key=SecretStr('test-key'),
-    )
-@pytest.fixture
-def factory_params(llm_config):
-    return {
-        'owner': 'test-owner',
-        'repo': 'test-repo',
-        'token': 'test-token',
-        'username': 'test-user',
-        'base_domain': 'github.com',
-        'llm_config': llm_config,
-    }
-test_cases = [
-    # platform, issue_type, expected_context_type, expected_handler_type
-    (ProviderType.GITHUB, 'issue', ServiceContextIssue, GithubIssueHandler),
-    (ProviderType.GITHUB, 'pr', ServiceContextPR, GithubPRHandler),
-    (ProviderType.GITLAB, 'issue', ServiceContextIssue, GitlabIssueHandler),
-    (ProviderType.GITLAB, 'pr', ServiceContextPR, GitlabPRHandler),
-]
-@pytest.mark.parametrize(
-    'platform,issue_type,expected_context_type,expected_handler_type',
-    test_cases
-)
-def test_handler_creation(
-    factory_params,
-    platform: ProviderType,
-    issue_type: str,
-    expected_context_type: Type,
-    expected_handler_type: Type,
-):
-    factory = IssueHandlerFactory(
-        **factory_params,
-        platform=platform,
-        issue_type=issue_type
-    )
-    handler = factory.create()
-    assert isinstance(handler, expected_context_type)
-    assert isinstance(handler._strategy, expected_handler_type)
-def test_invalid_issue_type(factory_params):
-    factory = IssueHandlerFactory(
-        **factory_params,
-        platform=ProviderType.GITHUB,
-        issue_type='invalid'
-    )
-    with pytest.raises(ValueError, match='Invalid issue type: invalid'):
-        factory.create()

tests/unit/resolver/test_issue_references.py DELETED Viewed

@@ -1,56 +0,0 @@
-from openhands.resolver.utils import extract_issue_references
-def test_extract_issue_references():
-    # Test basic issue reference
-    assert extract_issue_references('Fixes #123') == [123]
-    # Test multiple issue references
-    assert extract_issue_references('Fixes #123, #456') == [123, 456]
-    # Test issue references in code blocks should be ignored
-    assert extract_issue_references("""
-    Here's a code block:
-    ```python
-    # This is a comment with #123
-    def func():
-        pass  # Another #456
-    ```
-    But this #789 should be extracted
-    """) == [789]
-    # Test issue references in inline code should be ignored
-    assert extract_issue_references(
-        'This `#123` should be ignored but #456 should be extracted'
-    ) == [456]
-    assert extract_issue_references(
-        'This `#123` should be ignored but #456 should be extracted'
-    ) == [456]
-    # Test issue references in URLs should be ignored
-    assert extract_issue_references(
-        'Check http://example.com/#123 but #456 should be extracted'
-    ) == [456]
-    assert extract_issue_references(
-        'Check http://example.com/#123 but #456 should be extracted'
-    ) == [456]
-    # Test issue references in markdown links should be extracted
-    assert extract_issue_references('[Link to #123](http://example.com) and #456') == [
-        123,
-        456,
-    ]
-    assert extract_issue_references('[Link to #123](http://example.com) and #456') == [
-        123,
-        456,
-    ]
-    # Test issue references with text around them
-    assert extract_issue_references('Issue #123 is fixed and #456 is pending') == [
-        123,
-        456,
-    ]
-    assert extract_issue_references('Issue #123 is fixed and #456 is pending') == [
-        123,
-        456,
-    ]

tests/unit/resolver/test_patch_apply.py DELETED Viewed

@@ -1,47 +0,0 @@
-from openhands.resolver.patching.apply import apply_diff
-from openhands.resolver.patching.patch import diffobj, parse_diff
-def test_patch_apply_with_empty_lines():
-    # The original file has no indentation and uses \n line endings
-    original_content = '# PR Viewer\n\nThis React application allows you to view open pull requests from GitHub repositories in a GitHub organization. By default, it uses the All-Hands-AI organization.\n\n## Setup'
-    # The patch has spaces at the start of each line and uses \n line endings
-    patch = """diff --git a/README.md b/README.md
-index b760a53..5071727 100644
---- a/README.md
-+++ b/README.md
-@@ -1,3 +1,3 @@
- # PR Viewer
--This React application allows you to view open pull requests from GitHub repositories in a GitHub organization. By default, it uses the All-Hands-AI organization.
-+This React application was created by Graham Neubig and OpenHands. It allows you to view open pull requests from GitHub repositories in a GitHub organization. By default, it uses the All-Hands-AI organization."""
-    print('Original content lines:')
-    for i, line in enumerate(original_content.splitlines(), 1):
-        print(f'{i}: {repr(line)}')
-    print('\nPatch lines:')
-    for i, line in enumerate(patch.splitlines(), 1):
-        print(f'{i}: {repr(line)}')
-    changes = parse_diff(patch)
-    print('\nParsed changes:')
-    for change in changes:
-        print(
-            f'Change(old={change.old}, new={change.new}, line={repr(change.line)}, hunk={change.hunk})'
-        )
-    diff = diffobj(header=None, changes=changes, text=patch)
-    # Apply the patch
-    result = apply_diff(diff, original_content)
-    # The patch should be applied successfully
-    expected_result = [
-        '# PR Viewer',
-        '',
-        'This React application was created by Graham Neubig and OpenHands. It allows you to view open pull requests from GitHub repositories in a GitHub organization. By default, it uses the All-Hands-AI organization.',
-        '',
-        '## Setup',
-    ]
-    assert result == expected_result

tests/unit/resolver/test_resolve_issue.py DELETED Viewed

@@ -1,171 +0,0 @@
-from unittest import mock
-import pytest
-from openhands.core.config import SandboxConfig,OpenHandsConfig
-from openhands.events.action import CmdRunAction
-from openhands.resolver.issue_resolver import IssueResolver
-def assert_sandbox_config(
-    config: SandboxConfig,
-    base_container_image=SandboxConfig.model_fields['base_container_image'].default,
-    runtime_container_image='ghcr.io/all-hands-ai/runtime:mock-nikolaik',  # Default to mock version
-    local_runtime_url=SandboxConfig.model_fields['local_runtime_url'].default,
-):
-    """Helper function to assert the properties of the SandboxConfig object."""
-    assert isinstance(config, SandboxConfig)
-    assert config.base_container_image == base_container_image
-    assert config.runtime_container_image == runtime_container_image
-    assert config.enable_auto_lint is False
-    assert config.use_host_network is False
-    assert config.timeout == 300
-    assert config.local_runtime_url == local_runtime_url
-def test_setup_sandbox_config_default():
-    """Test default configuration when no images provided and not experimental"""
-    with mock.patch('openhands.__version__', 'mock'):
-        openhands_config = OpenHandsConfig()
-        IssueResolver.update_sandbox_config(
-            openhands_config=openhands_config,
-            base_container_image=None,
-            runtime_container_image=None,
-            is_experimental=False,
-        )
-        assert_sandbox_config(
-            openhands_config.sandbox, runtime_container_image='ghcr.io/all-hands-ai/runtime:mock-nikolaik'
-        )
-def test_setup_sandbox_config_both_images():
-    """Test that providing both container images raises ValueError"""
-    with pytest.raises(
-        ValueError, match='Cannot provide both runtime and base container images.'
-    ):
-        openhands_config = OpenHandsConfig()
-        IssueResolver.update_sandbox_config(
-            openhands_config=openhands_config,
-            base_container_image='base-image',
-            runtime_container_image='runtime-image',
-            is_experimental=False,
-        )
-def test_setup_sandbox_config_base_only():
-    """Test configuration when only base_container_image is provided"""
-    base_image = 'custom-base-image'
-    openhands_config = OpenHandsConfig()
-    IssueResolver.update_sandbox_config(
-        openhands_config=openhands_config,
-        base_container_image=base_image,
-        runtime_container_image=None,
-        is_experimental=False,
-    )
-    assert_sandbox_config(
-        openhands_config.sandbox, base_container_image=base_image, runtime_container_image=None
-    )
-def test_setup_sandbox_config_runtime_only():
-    """Test configuration when only runtime_container_image is provided"""
-    runtime_image = 'custom-runtime-image'
-    openhands_config = OpenHandsConfig()
-    IssueResolver.update_sandbox_config(
-        openhands_config=openhands_config,
-        base_container_image=None,
-        runtime_container_image=runtime_image,
-        is_experimental=False,
-    )
-    assert_sandbox_config(openhands_config.sandbox, runtime_container_image=runtime_image)
-def test_setup_sandbox_config_experimental():
-    """Test configuration when experimental mode is enabled"""
-    with mock.patch('openhands.__version__', 'mock'):
-        openhands_config = OpenHandsConfig()
-        IssueResolver.update_sandbox_config(
-            openhands_config=openhands_config,
-            base_container_image=None,
-            runtime_container_image=None,
-            is_experimental=True,
-        )
-        assert_sandbox_config(openhands_config.sandbox, runtime_container_image=None)
-@mock.patch('openhands.resolver.issue_resolver.os.getuid', return_value=0)
-@mock.patch('openhands.resolver.issue_resolver.get_unique_uid', return_value=1001)
-def test_setup_sandbox_config_gitlab_ci(mock_get_unique_uid, mock_getuid):
-    """Test GitLab CI specific configuration when running as root"""
-    with mock.patch('openhands.__version__', 'mock'):
-        with mock.patch.object(IssueResolver, 'GITLAB_CI', True):
-            openhands_config = OpenHandsConfig()
-            IssueResolver.update_sandbox_config(
-                openhands_config=openhands_config,
-                base_container_image=None,
-                runtime_container_image=None,
-                is_experimental=False,
-            )
-            assert_sandbox_config(openhands_config.sandbox, local_runtime_url='http://localhost')
-@mock.patch('openhands.resolver.issue_resolver.os.getuid', return_value=1000)
-def test_setup_sandbox_config_gitlab_ci_non_root(mock_getuid):
-    """Test GitLab CI configuration when not running as root"""
-    with mock.patch('openhands.__version__', 'mock'):
-        with mock.patch.object(IssueResolver, 'GITLAB_CI', True):
-            openhands_config = OpenHandsConfig()
-            IssueResolver.update_sandbox_config(
-                openhands_config=openhands_config,
-                base_container_image=None,
-                runtime_container_image=None,
-                is_experimental=False,
-            )
-            assert_sandbox_config(openhands_config.sandbox, local_runtime_url='http://localhost')
-@mock.patch('openhands.events.observation.CmdOutputObservation')
-@mock.patch('openhands.runtime.base.Runtime')
-def test_initialize_runtime_runs_setup_script_and_git_hooks(
-    mock_runtime, mock_cmd_output
-):
-    """Test that initialize_runtime calls maybe_run_setup_script and maybe_setup_git_hooks"""
-    # Create a minimal resolver instance with just the methods we need
-    class MinimalResolver:
-        def initialize_runtime(self, runtime):
-            # This is the method we're testing
-            action = CmdRunAction(command='git config --global core.pager ""')
-            runtime.run_action(action)
-            # Run setup script if it exists
-            runtime.maybe_run_setup_script()
-            # Setup git hooks if they exist
-            runtime.maybe_setup_git_hooks()
-    resolver = MinimalResolver()
-    # Mock the runtime's run_action method to return a successful CmdOutputObservation
-    mock_cmd_output.return_value.exit_code = 0
-    mock_runtime.run_action.return_value = mock_cmd_output.return_value
-    # Call the method
-    resolver.initialize_runtime(mock_runtime)
-    # Verify that both methods were called
-    mock_runtime.maybe_run_setup_script.assert_called_once()
-    mock_runtime.maybe_setup_git_hooks.assert_called_once()

tests/unit/test_acompletion.py DELETED Viewed

@@ -1,196 +0,0 @@
-import asyncio
-from contextlib import contextmanager
-from unittest.mock import AsyncMock, MagicMock, patch
-import pytest
-from openhands.core.config import load_openhands_config
-from openhands.core.exceptions import UserCancelledError
-from openhands.llm.async_llm import AsyncLLM
-from openhands.llm.llm import LLM
-from openhands.llm.streaming_llm import StreamingLLM
-config = load_openhands_config()
-@pytest.fixture
-def test_llm():
-    return _get_llm(LLM)
-def _get_llm(type_: type[LLM]):
-    with _patch_http():
-        return type_(config=config.get_llm_config())
-@pytest.fixture
-def mock_response():
-    return [
-        {'choices': [{'delta': {'content': 'This is a'}}]},
-        {'choices': [{'delta': {'content': ' test'}}]},
-        {'choices': [{'delta': {'content': ' message.'}}]},
-        {'choices': [{'delta': {'content': ' It is'}}]},
-        {'choices': [{'delta': {'content': ' a bit'}}]},
-        {'choices': [{'delta': {'content': ' longer'}}]},
-        {'choices': [{'delta': {'content': ' than'}}]},
-        {'choices': [{'delta': {'content': ' the'}}]},
-        {'choices': [{'delta': {'content': ' previous'}}]},
-        {'choices': [{'delta': {'content': ' one,'}}]},
-        {'choices': [{'delta': {'content': ' but'}}]},
-        {'choices': [{'delta': {'content': ' hopefully'}}]},
-        {'choices': [{'delta': {'content': ' still'}}]},
-        {'choices': [{'delta': {'content': ' short'}}]},
-        {'choices': [{'delta': {'content': ' enough.'}}]},
-    ]
-@contextmanager
-def _patch_http():
-    with patch('openhands.llm.llm.httpx.get', MagicMock()) as mock_http:
-        mock_http.json.return_value = {
-            'data': [
-                {'model_name': 'some_model'},
-                {'model_name': 'another_model'},
-            ]
-        }
-        yield
-@pytest.mark.asyncio
-async def test_acompletion_non_streaming():
-    with patch.object(AsyncLLM, '_call_acompletion') as mock_call_acompletion:
-        mock_response = {
-            'choices': [{'message': {'content': 'This is a test message.'}}]
-        }
-        mock_call_acompletion.return_value = mock_response
-        test_llm = _get_llm(AsyncLLM)
-        response = await test_llm.async_completion(
-            messages=[{'role': 'user', 'content': 'Hello!'}],
-            stream=False,
-            drop_params=True,
-        )
-        # Assertions for non-streaming completion
-        assert response['choices'][0]['message']['content'] != ''
-@pytest.mark.asyncio
-async def test_acompletion_streaming(mock_response):
-    with patch.object(StreamingLLM, '_call_acompletion') as mock_call_acompletion:
-        mock_call_acompletion.return_value.__aiter__.return_value = iter(mock_response)
-        test_llm = _get_llm(StreamingLLM)
-        async for chunk in test_llm.async_streaming_completion(
-            messages=[{'role': 'user', 'content': 'Hello!'}], stream=True
-        ):
-            print(f'Chunk: {chunk["choices"][0]["delta"]["content"]}')
-            # Assertions for streaming completion
-            assert chunk['choices'][0]['delta']['content'] in [
-                r['choices'][0]['delta']['content'] for r in mock_response
-            ]
-@pytest.mark.asyncio
-async def test_completion(test_llm):
-    with patch.object(LLM, 'completion') as mock_completion:
-        mock_completion.return_value = {
-            'choices': [{'message': {'content': 'This is a test message.'}}]
-        }
-        response = test_llm.completion(messages=[{'role': 'user', 'content': 'Hello!'}])
-        assert response['choices'][0]['message']['content'] == 'This is a test message.'
-@pytest.mark.asyncio
-@pytest.mark.parametrize('cancel_delay', [0.1, 0.3, 0.5, 0.7, 0.9])
-async def test_async_completion_with_user_cancellation(cancel_delay):
-    cancel_event = asyncio.Event()
-    async def mock_on_cancel_requested():
-        is_set = cancel_event.is_set()
-        print(f'Cancel requested: {is_set}')
-        return is_set
-    async def mock_acompletion(*args, **kwargs):
-        print('Starting mock_acompletion')
-        for i in range(20):  # Increased iterations for longer running task
-            print(f'mock_acompletion iteration {i}')
-            await asyncio.sleep(0.1)
-            if await mock_on_cancel_requested():
-                print('Cancellation detected in mock_acompletion')
-                raise UserCancelledError('LLM request cancelled by user')
-        print('Completing mock_acompletion without cancellation')
-        return {'choices': [{'message': {'content': 'This is a test message.'}}]}
-    with patch.object(
-        AsyncLLM, '_call_acompletion', new_callable=AsyncMock
-    ) as mock_call_acompletion:
-        mock_call_acompletion.side_effect = mock_acompletion
-        test_llm = _get_llm(AsyncLLM)
-        async def cancel_after_delay():
-            print(f'Starting cancel_after_delay with delay {cancel_delay}')
-            await asyncio.sleep(cancel_delay)
-            print('Setting cancel event')
-            cancel_event.set()
-        with pytest.raises(UserCancelledError):
-            await asyncio.gather(
-                test_llm.async_completion(
-                    messages=[{'role': 'user', 'content': 'Hello!'}],
-                    stream=False,
-                ),
-                cancel_after_delay(),
-            )
-    # Ensure the mock was called
-    mock_call_acompletion.assert_called_once()
-@pytest.mark.asyncio
-@pytest.mark.parametrize('cancel_after_chunks', [1, 3, 5, 7, 9])
-async def test_async_streaming_completion_with_user_cancellation(cancel_after_chunks):
-    cancel_requested = False
-    test_messages = [
-        'This is ',
-        'a test ',
-        'message ',
-        'with ',
-        'multiple ',
-        'chunks ',
-        'to ',
-        'simulate ',
-        'a ',
-        'longer ',
-        'streaming ',
-        'response.',
-    ]
-    async def mock_acompletion(*args, **kwargs):
-        for i, content in enumerate(test_messages):
-            yield {'choices': [{'delta': {'content': content}}]}
-            if i + 1 == cancel_after_chunks:
-                nonlocal cancel_requested
-                cancel_requested = True
-            if cancel_requested:
-                raise UserCancelledError('LLM request cancelled by user')
-            await asyncio.sleep(0.05)  # Simulate some delay between chunks
-    with patch.object(
-        AsyncLLM, '_call_acompletion', new_callable=AsyncMock
-    ) as mock_call_acompletion:
-        mock_call_acompletion.return_value = mock_acompletion()
-        test_llm = _get_llm(StreamingLLM)
-        received_chunks = []
-        with pytest.raises(UserCancelledError):
-            async for chunk in test_llm.async_streaming_completion(
-                messages=[{'role': 'user', 'content': 'Hello!'}], stream=True
-            ):
-                received_chunks.append(chunk['choices'][0]['delta']['content'])
-                print(f'Chunk: {chunk["choices"][0]["delta"]["content"]}')
-        # Assert that we received the expected number of chunks before cancellation
-        assert len(received_chunks) == cancel_after_chunks
-        assert received_chunks == test_messages[:cancel_after_chunks]
-    # Ensure the mock was called
-    mock_call_acompletion.assert_called_once()