Spaces:

Backup-bdg
/

OpenHands

Build error

App Files Files Community

OpenHands / evaluation /benchmarks /lca_ci_build_repair /eval_infer.py

Backup-bdg

Upload 964 files

51ff9e5 verified 7 days ago

raw

history blame contribute delete

7.88 kB

	"""Implements evaluation on JetBrains CI builds repair baselines

	Please see https://github.com/JetBrains-Research/lca-baselines/tree/main/ci-builds-repair
	and https://huggingface.co/datasets/JetBrains-Research/lca-ci-builds-repair

	TODOs:
	- Add more flags
	"""

	import json
	import os
	from pathlib import Path

	import ruamel.yaml

	from evaluation.utils.shared import (
	EvalMetadata,
	get_default_sandbox_config_for_eval,
	make_metadata,
	)
	from openhands.core.config import (
	LLMConfig,
	OpenHandsConfig,
	get_parser,
	load_openhands_config,
	)
	from openhands.core.logger import openhands_logger as logger
	from openhands.core.main import create_runtime
	from openhands.events.action import CmdRunAction
	from openhands.events.observation import CmdOutputObservation
	from openhands.runtime.base import Runtime
	from openhands.utils.async_utils import call_async_from_sync


	def get_config(
	metadata: EvalMetadata,
	) -> OpenHandsConfig:
	sandbox_config = get_default_sandbox_config_for_eval()
	sandbox_config.base_container_image = 'python:3.12-bookworm'
	config = OpenHandsConfig(
	default_agent=metadata.agent_class,
	run_as_openhands=False,
	runtime='docker',
	max_iterations=metadata.max_iterations,
	sandbox=sandbox_config,
	# do not mount workspace
	workspace_base=None,
	workspace_mount_path=None,
	)
	config.set_llm_config(metadata.llm_config)
	agent_config = config.get_agent_config(metadata.agent_class)
	agent_config.enable_prompt_extensions = False
	return config


	config = load_openhands_config()


	def load_bench_config():
	script_dir = os.path.dirname(
	os.path.abspath(__file__)
	) # Get the absolute path of the script
	config_path = os.path.join(script_dir, 'config.yaml')
	yaml = ruamel.yaml.YAML(typ='rt')
	with open(config_path, 'r') as file:
	return yaml.load(file)


	bench_config = load_bench_config()


	def run_eval(
	runtime: Runtime,
	):
	"""Run the evaluation and create report"""
	logger.info(f'{"-" * 50} BEGIN Runtime Initialization Fn {"-" * 50}')
	obs: CmdOutputObservation

	lca_path = bench_config['LCA_PATH']
	lca_ci_path = os.path.join(
	lca_path, 'lca-baselines', 'ci-builds-repair', 'ci-builds-repair-benchmark'
	)

	model_name = bench_config['model_name']

	action = CmdRunAction(command=f'mkdir {lca_path}')
	logger.info(action, extra={'msg_type': 'ACTION'})
	obs = runtime.run_action(action)
	assert obs.exit_code == 0

	action = CmdRunAction(command=f'cd {lca_path}')
	logger.info(action, extra={'msg_type': 'ACTION'})
	obs = runtime.run_action(action)
	assert obs.exit_code == 0

	lca_repo_url = 'https://github.com/juanmichelini/lca-baselines'
	action = CmdRunAction(command=f'git clone {lca_repo_url}')
	logger.info(action, extra={'msg_type': 'ACTION'})
	obs = runtime.run_action(action)
	assert obs.exit_code == 0

	action = CmdRunAction(command=f'cd {lca_ci_path}')
	logger.info(action, extra={'msg_type': 'ACTION'})
	obs = runtime.run_action(action)
	assert obs.exit_code == 0

	action = CmdRunAction(command='git switch open-hands-integration')
	logger.info(action, extra={'msg_type': 'ACTION'})
	obs = runtime.run_action(action)
	assert obs.exit_code == 0

	script_dir = os.path.dirname(
	os.path.abspath(__file__)
	) # Get the absolute path of the script
	config_path = os.path.join(script_dir, 'config.yaml')
	runtime.copy_to(config_path, lca_ci_path)

	token_gh = bench_config['token_gh']
	commandf = f'export TOKEN_GH={token_gh}'
	action = CmdRunAction(command=commandf)
	logger.info(action, extra={'msg_type': 'ACTION'})
	obs = runtime.run_action(action)

	action = CmdRunAction(command='poetry install')
	logger.info(action, extra={'msg_type': 'ACTION'})
	obs = runtime.run_action(action)

	# Set up the task environment
	commandf = f'poetry run python run_eval_jobs.py --model-name "{model_name}" --config-path "{lca_ci_path}/config.yaml" --job-ids-file "/tmp/output_lca.jsonl" --result-filename "testfile.jsonl" > /tmp/single_output.txt'
	action = CmdRunAction(command=commandf)
	logger.info(action, extra={'msg_type': 'ACTION'})
	obs = runtime.run_action(action)
	logger.info(f'run_eval_jobs.py gave {obs.content} !')
	# assert obs.exit_code == 0

	commandf = 'cat /tmp/single_output.txt'
	action = CmdRunAction(command=commandf)
	logger.info(action, extra={'msg_type': 'ACTION'})
	obs = runtime.run_action(action)
	logger.info(f' {commandf} gave {obs.content}!')

	testfile_path = os.path.join(bench_config['out_folder'], 'testfile.jsonl')
	commandf = f'cat {testfile_path}'
	action = CmdRunAction(command=commandf)
	logger.info(action, extra={'msg_type': 'ACTION'})
	obs = runtime.run_action(action)
	report_str = obs.content

	logger.info(f'{"-" * 50} END Runtime Initialization Fn {"-" * 50}')
	return report_str


	def process_predictions(predictions_path: str):
	output_path = Path(predictions_path)
	if output_path.suffix != '.jsonl':
	raise ValueError('output_path must end in .jsonl')

	output_lca_path = output_path.with_name(output_path.stem + '_lca.jsonl')

	with output_path.open() as infile, output_lca_path.open('w') as outfile:
	for line in infile:
	data = json.loads(line)
	json.dump(data.get('test_result'), outfile)
	outfile.write('\n')

	return str(output_lca_path)


	if __name__ == '__main__':
	parser = get_parser()
	parser.add_argument(
	'-s',
	'--eval-split',
	type=str,
	default='test',
	choices=['test'],
	help='data split to evaluate on, must be test',
	)
	parser.add_argument(
	'--predictions-path',
	type=str,
	help='Path to the directory containing the output.jsonl with the predictions.',
	)
	args, _ = parser.parse_known_args()

	data_split = args.eval_split

	llm_config = LLMConfig(model='dummy_model')

	metadata = make_metadata(
	llm_config,
	f'jetbrains-lca-ci--{data_split}',
	args.agent_cls,
	args.max_iterations,
	args.eval_note,
	args.predictions_path,
	)

	# prepare image
	config = get_config(metadata)
	runtime = create_runtime(config)
	call_async_from_sync(runtime.connect)
	logger.info('Converting output.jsonl into output_lca.jsonl')
	predictions_lca_path = process_predictions(
	os.path.join(args.predictions_path, 'output.jsonl')
	)
	runtime.copy_to(predictions_lca_path, '/tmp')

	# get results
	results_str = run_eval(runtime)
	results_path = os.path.join(args.predictions_path, 'results.jsonl')
	with open(results_path, 'w') as file:
	file.write(results_str)
	logger.info(f'Saved results to {results_path}')

	# make a summary
	resolved_instances = []
	unresolved_instances = []
	for line in results_str.strip().splitlines():
	data = json.loads(line)
	conclusion = data.get('conclusion')
	if conclusion == 'success':
	resolved_instances.append(data)
	elif conclusion == 'failure':
	unresolved_instances.append(data)

	completed_instances = resolved_instances + unresolved_instances

	report = {
	'success': len(resolved_instances),
	'failure': len(unresolved_instances),
	'resolved_instances': resolved_instances,
	'unresolved_instances': unresolved_instances,
	'completed_instances': completed_instances,
	}

	print(f'Results: {report}')
	report_path = os.path.join(args.predictions_path, 'report.jsonl')
	with open(report_path, 'w') as out_f:
	out_f.write(json.dumps(report) + '\n')

	logger.info(f'Saved report of results in swebench format to {report_path}')