from __future__ import annotations from dataclasses import dataclass from evaluation.benchmarks.testgeneval.constants import ( COVERAGE_PREFIX, KEY_INSTANCE_ID, MAP_REPO_VERSION_TO_SPECS, TESTS_FAILED, TESTS_SUFFIX, UPDATE_TOX, TestGenEvalInstance, ) from evaluation.benchmarks.testgeneval.utils import ( get_test_directives, ) DIFF_MODIFIED_FILE_REGEX = r'--- a/(.*)' @dataclass class TestSpec: """ A dataclass that represents a test specification for a single instance of SWE-bench. """ instance_id: str id: str repo: str version: str test_cmd: str code_file: str test_file: str baseline_covs: dict local_imports: list[str] test_script_list: list[str] mutation_script_list: list[str] @property def test_script(self): return ( '\n'.join(['#!/bin/bash', 'set -uo pipefail'] + self.test_script_list) + '\n' ) # Don't exit early because we need to revert tests at the end @property def mutation_script(self): return ( '\n'.join(['#!/bin/bash', 'set -uo pipefail'] + self.mutation_script_list) + '\n' ) # Don't exit early because we need to revert tests at the end def make_test_setup(specs, env_name, repo_directory, includes_tox=False): eval_commands = [] if includes_tox: eval_commands.append(UPDATE_TOX) eval_commands += [ 'source /opt/miniconda3/bin/activate', f'conda activate {env_name}', f'cd {repo_directory}', ] if 'eval_commands' in specs: eval_commands += specs['eval_commands'] eval_commands += [ f'git config --global --add safe.directory {repo_directory}', # for nonroot user f'cd {repo_directory}', # This is just informational, so we have a record 'git status', 'git show', 'source /opt/miniconda3/bin/activate', f'conda activate {env_name}', ] if 'install' in specs: eval_commands.append(specs['install']) if includes_tox: eval_commands.append('add_coverage_tox "tox.ini"') eval_commands.append('[ -f ".coveragerc" ] && rm ".coveragerc"') return eval_commands def make_test_script_list(test_cmd, specs, env_name, repo_directory): """ Runs the tests. """ includes_tox = 'tox' in test_cmd eval_commands = make_test_setup(specs, env_name, repo_directory, includes_tox) eval_commands += [ f'{test_cmd} || {{ echo "{TESTS_FAILED}\n{TESTS_SUFFIX}\n" && exit 1; }}', f'echo "{TESTS_SUFFIX}"\n', 'coverage json -o coverage.json', f'echo "{COVERAGE_PREFIX}"\n', 'cat coverage.json', ] return eval_commands def make_mutation_script_list(specs, env_name, repo_directory, mutation_timeout): """ Runs the tests. """ eval_commands = make_test_setup(specs, env_name, repo_directory) eval_commands += [ 'cosmic-ray init mutation.toml mutation.sqlite', f'timeout {mutation_timeout}s cosmic-ray exec mutation.toml mutation.sqlite', 'cr-report mutation.sqlite', 'cr-rate mutation.sqlite --estimate --confidence 95.0', ] return eval_commands def make_test_spec( instance: TestGenEvalInstance, mutation_timeout: int, buffer: int ) -> TestSpec: if isinstance(instance, TestSpec): return instance instance_id = instance[KEY_INSTANCE_ID] id = instance['id'] repo = instance['repo'] version = instance['version'] baseline_covs = instance['baseline_covs'] code_file = instance['code_file'] test_file = instance['test_file'] local_imports = instance['local_imports'] env_name = 'testbed' repo_directory = f'/{env_name}' specs = MAP_REPO_VERSION_TO_SPECS[repo][version] test_cmd = ' '.join( [ MAP_REPO_VERSION_TO_SPECS[instance['repo']][instance['version']][ 'test_cmd' ], *get_test_directives(instance), ] ) test_script_list = make_test_script_list(test_cmd, specs, env_name, repo_directory) mutation_script_list = make_mutation_script_list( specs, env_name, repo_directory, mutation_timeout - buffer ) return TestSpec( instance_id=instance_id, id=id, repo=repo, test_script_list=test_script_list, test_cmd=test_cmd, local_imports=local_imports, mutation_script_list=mutation_script_list, code_file=code_file, test_file=test_file, baseline_covs=baseline_covs, version=version, )