|
import contextlib |
|
import multiprocessing |
|
import os |
|
import subprocess |
|
import tempfile |
|
|
|
HEADERS: str = """ |
|
#include <bits/stdc++.h> |
|
""" |
|
|
|
|
|
def check_correctness(candidate, reference, cpp_type, task_id, completion_id): |
|
""" |
|
Evaluates the functional correctness of a completion by running the test |
|
suite provided in the problem. |
|
|
|
:param completion_id: an optional completion ID so we can match |
|
the results later even if execution finishes asynchronously. |
|
""" |
|
|
|
manager = multiprocessing.Manager() |
|
|
|
result = dict( |
|
task_id=task_id, |
|
completion_id=completion_id, |
|
) |
|
|
|
if cpp_type == "base": |
|
base_run_result = manager.list() |
|
process_case( |
|
unsafe_execute_cpp, |
|
candidate, |
|
reference["tests"], |
|
base_run_result, |
|
"c++17", |
|
) |
|
base_run_result = base_run_result[0] |
|
base_run_passed = base_run_result == "passed" |
|
base_run_compiled = ( |
|
base_run_result == "passed" |
|
or base_run_result.startswith("failed: runtime error:") |
|
) |
|
result = { |
|
**result, |
|
**dict( |
|
base_run_passed=base_run_passed, |
|
base_run_compiled=base_run_compiled, |
|
base_run_result=base_run_result, |
|
), |
|
} |
|
elif cpp_type == "sfinae": |
|
sfinae_run_result = manager.list() |
|
process_case( |
|
unsafe_execute_cpp, |
|
candidate, |
|
reference["tests"], |
|
sfinae_run_result, |
|
"c++17", |
|
) |
|
sfinae_constrain_result = manager.list() |
|
process_case( |
|
invalid_compile_cpp, |
|
candidate, |
|
reference["invalids"], |
|
sfinae_constrain_result, |
|
"c++17", |
|
) |
|
sfinae_run_result = sfinae_run_result[0] |
|
sfinae_constrain_result = sfinae_constrain_result[0] |
|
sfinae_run_passed = sfinae_run_result == "passed" |
|
sfinae_run_compiled = sfinae_run_passed or sfinae_run_result.startswith( |
|
"failed: runtime error:" |
|
) |
|
sfinae_constrain_passed = ( |
|
sfinae_constrain_result == "passed" and sfinae_run_compiled |
|
) |
|
result = { |
|
**result, |
|
**dict( |
|
sfinae_run_passed=sfinae_run_passed, |
|
sfinae_run_compiled=sfinae_run_compiled, |
|
sfinae_run_result=sfinae_run_result, |
|
sfinae_constrain_passed=sfinae_constrain_passed, |
|
sfinae_constrain_result=sfinae_constrain_result, |
|
), |
|
} |
|
elif cpp_type == "concepts": |
|
concepts_run_result = manager.list() |
|
process_case( |
|
unsafe_execute_cpp, |
|
candidate, |
|
reference["tests"], |
|
concepts_run_result, |
|
"c++20", |
|
) |
|
concepts_constrain_result = manager.list() |
|
process_case( |
|
invalid_compile_cpp, |
|
candidate, |
|
reference["invalids"], |
|
concepts_constrain_result, |
|
"c++20", |
|
) |
|
concepts_run_result = concepts_run_result[0] |
|
concepts_constrain_result = concepts_constrain_result[0] |
|
concepts_run_passed = concepts_run_result == "passed" |
|
concepts_run_compiled = ( |
|
concepts_run_passed |
|
or concepts_run_result.startswith("failed: runtime error:") |
|
) |
|
concepts_constrain_passed = ( |
|
concepts_constrain_result == "passed" and concepts_run_compiled |
|
) |
|
result = { |
|
**result, |
|
**dict( |
|
concepts_run_passed=concepts_run_passed, |
|
concepts_run_compiled=concepts_run_compiled, |
|
concepts_run_result=concepts_run_result, |
|
concepts_constrain_passed=concepts_constrain_passed, |
|
concepts_constrain_result=concepts_constrain_result, |
|
), |
|
} |
|
else: |
|
raise ValueError(f"Unknown cpp_type: {cpp_type}") |
|
|
|
return result |
|
|
|
|
|
def process_case(target, candidate, reference, result, cppstd): |
|
timeout = 60 |
|
|
|
p = multiprocessing.Process( |
|
target=target, |
|
args=(candidate, reference, result, timeout, cppstd), |
|
) |
|
|
|
p.start() |
|
p.join(timeout=timeout + 5) |
|
if p.is_alive(): |
|
p.kill() |
|
|
|
if not result: |
|
result.append("timed out") |
|
|
|
|
|
def unsafe_execute_cpp(candidate, reference, result, timeout, cppstd): |
|
with create_tempdir(): |
|
code = "\n".join([HEADERS, candidate, reference]) |
|
open(f"test.cpp", "w").write(code) |
|
|
|
cpp_compiler = os.getenv("GENERICIFY_CLANG") |
|
compilation_result = subprocess.run( |
|
[cpp_compiler, f"-std={cppstd}", "test.cpp"], |
|
timeout=timeout, |
|
capture_output=True, |
|
) |
|
if compilation_result.returncode != 0: |
|
if compilation_result.stderr: |
|
err = compilation_result.stderr.decode() |
|
else: |
|
err = compilation_result.stdout.decode() |
|
result.append(f"failed: compilation error: {err}") |
|
else: |
|
try: |
|
exec_result = subprocess.run( |
|
["./a.out"], timeout=timeout, capture_output=True |
|
) |
|
|
|
if exec_result.returncode == 0: |
|
result.append("passed") |
|
else: |
|
if exec_result.stderr: |
|
try: |
|
err = exec_result.stderr.decode() |
|
except: |
|
err = exec_result.stderr |
|
else: |
|
try: |
|
err = exec_result.stdout.decode() |
|
except: |
|
err = exec_result.stdout |
|
result.append(f"failed: runtime error: {err}") |
|
|
|
except subprocess.TimeoutExpired as e: |
|
result.append("timed out") |
|
|
|
|
|
def invalid_compile_cpp(candidate, reference, result, timeout, cppstd): |
|
with create_tempdir(): |
|
code = "\n".join([HEADERS, candidate, reference]) |
|
open(f"invalid.cpp", "w").write(code) |
|
|
|
cpp_compiler = os.getenv("GENERICIFY_CLANG") |
|
compilation_result = subprocess.run( |
|
[cpp_compiler, f"-std={cppstd}", "invalid.cpp"], |
|
timeout=timeout, |
|
capture_output=True, |
|
) |
|
|
|
if compilation_result.stderr: |
|
err = compilation_result.stderr.decode() |
|
else: |
|
err = compilation_result.stdout.decode() |
|
|
|
if compilation_result.returncode != 1: |
|
result.append(f"failed: compilation succeeded: {err}") |
|
else: |
|
if "note: candidate template ignored" in err: |
|
result.append("passed") |
|
else: |
|
result.append("failed: improperly constrained: {err}") |
|
|
|
|
|
@contextlib.contextmanager |
|
def create_tempdir(): |
|
with tempfile.TemporaryDirectory() as dirname: |
|
with chdir(dirname): |
|
yield dirname |
|
|
|
|
|
@contextlib.contextmanager |
|
def chdir(root): |
|
if root == ".": |
|
yield |
|
return |
|
cwd = os.getcwd() |
|
os.chdir(root) |
|
try: |
|
yield |
|
except BaseException as exc: |
|
raise exc |
|
finally: |
|
os.chdir(cwd) |
|
|