Spaces:

Backup-bdg
/

OpenHands

Build error

App Files Files Community

OpenHands / evaluation /benchmarks /testgeneval /test_spec.py

Backup-bdg

Upload 964 files

51ff9e5 verified 7 days ago

raw

history blame contribute delete

4.64 kB

	from __future__ import annotations

	from dataclasses import dataclass

	from evaluation.benchmarks.testgeneval.constants import (
	COVERAGE_PREFIX,
	KEY_INSTANCE_ID,
	MAP_REPO_VERSION_TO_SPECS,
	TESTS_FAILED,
	TESTS_SUFFIX,
	UPDATE_TOX,
	TestGenEvalInstance,
	)
	from evaluation.benchmarks.testgeneval.utils import (
	get_test_directives,
	)

	DIFF_MODIFIED_FILE_REGEX = r'--- a/(.*)'


	@dataclass
	class TestSpec:
	"""
	A dataclass that represents a test specification for a single instance of SWE-bench.
	"""

	instance_id: str
	id: str
	repo: str
	version: str
	test_cmd: str
	code_file: str
	test_file: str
	baseline_covs: dict
	local_imports: list[str]
	test_script_list: list[str]
	mutation_script_list: list[str]

	@property
	def test_script(self):
	return (
	'\n'.join(['#!/bin/bash', 'set -uo pipefail'] + self.test_script_list)
	+ '\n'
	)
	# Don't exit early because we need to revert tests at the end

	@property
	def mutation_script(self):
	return (
	'\n'.join(['#!/bin/bash', 'set -uo pipefail'] + self.mutation_script_list)
	+ '\n'
	)
	# Don't exit early because we need to revert tests at the end


	def make_test_setup(specs, env_name, repo_directory, includes_tox=False):
	eval_commands = []

	if includes_tox:
	eval_commands.append(UPDATE_TOX)

	eval_commands += [
	'source /opt/miniconda3/bin/activate',
	f'conda activate {env_name}',
	f'cd {repo_directory}',
	]
	if 'eval_commands' in specs:
	eval_commands += specs['eval_commands']
	eval_commands += [
	f'git config --global --add safe.directory {repo_directory}', # for nonroot user
	f'cd {repo_directory}',
	# This is just informational, so we have a record
	'git status',
	'git show',
	'source /opt/miniconda3/bin/activate',
	f'conda activate {env_name}',
	]
	if 'install' in specs:
	eval_commands.append(specs['install'])

	if includes_tox:
	eval_commands.append('add_coverage_tox "tox.ini"')

	eval_commands.append('[ -f ".coveragerc" ] && rm ".coveragerc"')
	return eval_commands


	def make_test_script_list(test_cmd, specs, env_name, repo_directory):
	"""
	Runs the tests.
	"""

	includes_tox = 'tox' in test_cmd
	eval_commands = make_test_setup(specs, env_name, repo_directory, includes_tox)
	eval_commands += [
	f'{test_cmd} \|\| {{ echo "{TESTS_FAILED}\n{TESTS_SUFFIX}\n" && exit 1; }}',
	f'echo "{TESTS_SUFFIX}"\n',
	'coverage json -o coverage.json',
	f'echo "{COVERAGE_PREFIX}"\n',
	'cat coverage.json',
	]

	return eval_commands


	def make_mutation_script_list(specs, env_name, repo_directory, mutation_timeout):
	"""
	Runs the tests.
	"""

	eval_commands = make_test_setup(specs, env_name, repo_directory)
	eval_commands += [
	'cosmic-ray init mutation.toml mutation.sqlite',
	f'timeout {mutation_timeout}s cosmic-ray exec mutation.toml mutation.sqlite',
	'cr-report mutation.sqlite',
	'cr-rate mutation.sqlite --estimate --confidence 95.0',
	]
	return eval_commands


	def make_test_spec(
	instance: TestGenEvalInstance, mutation_timeout: int, buffer: int
	) -> TestSpec:
	if isinstance(instance, TestSpec):
	return instance
	instance_id = instance[KEY_INSTANCE_ID]
	id = instance['id']
	repo = instance['repo']
	version = instance['version']
	baseline_covs = instance['baseline_covs']
	code_file = instance['code_file']
	test_file = instance['test_file']
	local_imports = instance['local_imports']

	env_name = 'testbed'
	repo_directory = f'/{env_name}'
	specs = MAP_REPO_VERSION_TO_SPECS[repo][version]

	test_cmd = ' '.join(
	[
	MAP_REPO_VERSION_TO_SPECS[instance['repo']][instance['version']][
	'test_cmd'
	],
	*get_test_directives(instance),
	]
	)

	test_script_list = make_test_script_list(test_cmd, specs, env_name, repo_directory)

	mutation_script_list = make_mutation_script_list(
	specs, env_name, repo_directory, mutation_timeout - buffer
	)

	return TestSpec(
	instance_id=instance_id,
	id=id,
	repo=repo,
	test_script_list=test_script_list,
	test_cmd=test_cmd,
	local_imports=local_imports,
	mutation_script_list=mutation_script_list,
	code_file=code_file,
	test_file=test_file,
	baseline_covs=baseline_covs,
	version=version,
	)