import contextlib import multiprocessing import os import subprocess import tempfile def check_correctness(candidate, reference, task_id, completion_id): """ Evaluates the functional correctness of a completion by running the test suite provided in the problem. :param completion_id: an optional completion ID so we can match the results later even if execution finishes asynchronously. """ manager = multiprocessing.Manager() base_run_result = manager.list() process_case( unsafe_execute_cpp, candidate["base"], reference["tests"], base_run_result, ) sfinae_run_result = manager.list() process_case( unsafe_execute_cpp, candidate["sfinae"], reference["tests"], sfinae_run_result, ) concepts_run_result = manager.list() process_case( unsafe_execute_cpp, candidate["concepts"], reference["tests"], concepts_run_result, ) return dict( task_id=task_id, completion_id=completion_id, base_run_passed=base_run_result[0] == "passed", base_run_result=base_run_result[0], sfinae_run_passed=sfinae_run_result[0] == "passed", sfinae_run_result=sfinae_run_result[0], concepts_run_passed=concepts_run_result[0] == "passed", concepts_run_result=concepts_run_result[0], ) def process_case(target, candidate, reference, result): timeout = 60 p = multiprocessing.Process( target=target, args=(candidate, reference, result, timeout), ) p.start() p.join(timeout=timeout + 5) if p.is_alive(): p.kill() if not result: result.append("timed out") def unsafe_execute_cpp(candidate, reference, result, timeout): with create_tempdir(): code = "#include \n" + candidate + reference open(f"test.cpp", "w").write(code) cpp_compiler = os.getenv("GENERICIFY_CLANG") compilation_result = subprocess.run( [cpp_compiler, "-std=c++20", "test.cpp"], timeout=timeout, capture_output=True, ) if compilation_result.returncode != 0: if compilation_result.stderr: err = compilation_result.stderr.decode() else: err = compilation_result.stdout.decode() result.append(f"failed: compilation error: {err}") else: try: exec_result = subprocess.run( ["./a.out"], timeout=timeout, capture_output=True ) if exec_result.returncode == 0: result.append("passed") else: if exec_result.stderr: try: err = exec_result.stderr.decode() except: err = exec_result.stderr else: try: err = exec_result.stdout.decode() except: err = exec_result.stdout result.append(f"failed: {err}") except subprocess.TimeoutExpired as e: result.append("timed out") @contextlib.contextmanager def create_tempdir(): with tempfile.TemporaryDirectory() as dirname: with chdir(dirname): yield dirname @contextlib.contextmanager def chdir(root): if root == ".": yield return cwd = os.getcwd() os.chdir(root) try: yield except BaseException as exc: raise exc finally: os.chdir(cwd)