Spaces:
Build error
Build error
from __future__ import annotations | |
from dataclasses import dataclass | |
from evaluation.benchmarks.testgeneval.constants import ( | |
COVERAGE_PREFIX, | |
KEY_INSTANCE_ID, | |
MAP_REPO_VERSION_TO_SPECS, | |
TESTS_FAILED, | |
TESTS_SUFFIX, | |
UPDATE_TOX, | |
TestGenEvalInstance, | |
) | |
from evaluation.benchmarks.testgeneval.utils import ( | |
get_test_directives, | |
) | |
DIFF_MODIFIED_FILE_REGEX = r'--- a/(.*)' | |
class TestSpec: | |
""" | |
A dataclass that represents a test specification for a single instance of SWE-bench. | |
""" | |
instance_id: str | |
id: str | |
repo: str | |
version: str | |
test_cmd: str | |
code_file: str | |
test_file: str | |
baseline_covs: dict | |
local_imports: list[str] | |
test_script_list: list[str] | |
mutation_script_list: list[str] | |
def test_script(self): | |
return ( | |
'\n'.join(['#!/bin/bash', 'set -uo pipefail'] + self.test_script_list) | |
+ '\n' | |
) | |
# Don't exit early because we need to revert tests at the end | |
def mutation_script(self): | |
return ( | |
'\n'.join(['#!/bin/bash', 'set -uo pipefail'] + self.mutation_script_list) | |
+ '\n' | |
) | |
# Don't exit early because we need to revert tests at the end | |
def make_test_setup(specs, env_name, repo_directory, includes_tox=False): | |
eval_commands = [] | |
if includes_tox: | |
eval_commands.append(UPDATE_TOX) | |
eval_commands += [ | |
'source /opt/miniconda3/bin/activate', | |
f'conda activate {env_name}', | |
f'cd {repo_directory}', | |
] | |
if 'eval_commands' in specs: | |
eval_commands += specs['eval_commands'] | |
eval_commands += [ | |
f'git config --global --add safe.directory {repo_directory}', # for nonroot user | |
f'cd {repo_directory}', | |
# This is just informational, so we have a record | |
'git status', | |
'git show', | |
'source /opt/miniconda3/bin/activate', | |
f'conda activate {env_name}', | |
] | |
if 'install' in specs: | |
eval_commands.append(specs['install']) | |
if includes_tox: | |
eval_commands.append('add_coverage_tox "tox.ini"') | |
eval_commands.append('[ -f ".coveragerc" ] && rm ".coveragerc"') | |
return eval_commands | |
def make_test_script_list(test_cmd, specs, env_name, repo_directory): | |
""" | |
Runs the tests. | |
""" | |
includes_tox = 'tox' in test_cmd | |
eval_commands = make_test_setup(specs, env_name, repo_directory, includes_tox) | |
eval_commands += [ | |
f'{test_cmd} || {{ echo "{TESTS_FAILED}\n{TESTS_SUFFIX}\n" && exit 1; }}', | |
f'echo "{TESTS_SUFFIX}"\n', | |
'coverage json -o coverage.json', | |
f'echo "{COVERAGE_PREFIX}"\n', | |
'cat coverage.json', | |
] | |
return eval_commands | |
def make_mutation_script_list(specs, env_name, repo_directory, mutation_timeout): | |
""" | |
Runs the tests. | |
""" | |
eval_commands = make_test_setup(specs, env_name, repo_directory) | |
eval_commands += [ | |
'cosmic-ray init mutation.toml mutation.sqlite', | |
f'timeout {mutation_timeout}s cosmic-ray exec mutation.toml mutation.sqlite', | |
'cr-report mutation.sqlite', | |
'cr-rate mutation.sqlite --estimate --confidence 95.0', | |
] | |
return eval_commands | |
def make_test_spec( | |
instance: TestGenEvalInstance, mutation_timeout: int, buffer: int | |
) -> TestSpec: | |
if isinstance(instance, TestSpec): | |
return instance | |
instance_id = instance[KEY_INSTANCE_ID] | |
id = instance['id'] | |
repo = instance['repo'] | |
version = instance['version'] | |
baseline_covs = instance['baseline_covs'] | |
code_file = instance['code_file'] | |
test_file = instance['test_file'] | |
local_imports = instance['local_imports'] | |
env_name = 'testbed' | |
repo_directory = f'/{env_name}' | |
specs = MAP_REPO_VERSION_TO_SPECS[repo][version] | |
test_cmd = ' '.join( | |
[ | |
MAP_REPO_VERSION_TO_SPECS[instance['repo']][instance['version']][ | |
'test_cmd' | |
], | |
*get_test_directives(instance), | |
] | |
) | |
test_script_list = make_test_script_list(test_cmd, specs, env_name, repo_directory) | |
mutation_script_list = make_mutation_script_list( | |
specs, env_name, repo_directory, mutation_timeout - buffer | |
) | |
return TestSpec( | |
instance_id=instance_id, | |
id=id, | |
repo=repo, | |
test_script_list=test_script_list, | |
test_cmd=test_cmd, | |
local_imports=local_imports, | |
mutation_script_list=mutation_script_list, | |
code_file=code_file, | |
test_file=test_file, | |
baseline_covs=baseline_covs, | |
version=version, | |
) | |