red1bluelost's picture
Updates constrain usage.
7b93847
raw
history blame
7.26 kB
import contextlib
import multiprocessing
import os
import subprocess
import tempfile
HEADERS: str = """
#include <bits/stdc++.h>
"""
def check_correctness(candidate, reference, cpp_type, task_id, completion_id):
"""
Evaluates the functional correctness of a completion by running the test
suite provided in the problem.
:param completion_id: an optional completion ID so we can match
the results later even if execution finishes asynchronously.
"""
manager = multiprocessing.Manager()
result = dict(
task_id=task_id,
completion_id=completion_id,
)
if cpp_type == "base":
base_run_result = manager.list()
process_case(
unsafe_execute_cpp,
candidate,
reference["tests"],
base_run_result,
"c++17",
)
base_run_result = base_run_result[0]
base_run_passed = base_run_result == "passed"
base_run_compiled = (
base_run_result == "passed"
or base_run_result.startswith("failed: runtime error:")
)
result = {
**result,
**dict(
base_run_passed=base_run_passed,
base_run_compiled=base_run_compiled,
base_run_result=base_run_result,
),
}
elif cpp_type == "sfinae":
sfinae_run_result = manager.list()
process_case(
unsafe_execute_cpp,
candidate,
reference["tests"],
sfinae_run_result,
"c++17",
)
sfinae_constrain_result = manager.list()
process_case(
invalid_compile_cpp,
candidate,
reference["invalids"],
sfinae_constrain_result,
"c++17",
)
sfinae_run_result = sfinae_run_result[0]
sfinae_constrain_result = sfinae_constrain_result[0]
sfinae_run_passed = sfinae_run_result == "passed"
sfinae_run_compiled = sfinae_run_passed or sfinae_run_result.startswith(
"failed: runtime error:"
)
sfinae_constrain_passed = (
sfinae_constrain_result == "passed" and sfinae_run_compiled
)
result = {
**result,
**dict(
sfinae_run_passed=sfinae_run_passed,
sfinae_run_compiled=sfinae_run_compiled,
sfinae_run_result=sfinae_run_result,
sfinae_constrain_passed=sfinae_constrain_passed,
sfinae_constrain_result=sfinae_constrain_result,
),
}
elif cpp_type == "concepts":
concepts_run_result = manager.list()
process_case(
unsafe_execute_cpp,
candidate,
reference["tests"],
concepts_run_result,
"c++20",
)
concepts_constrain_result = manager.list()
process_case(
invalid_compile_cpp,
candidate,
reference["invalids"],
concepts_constrain_result,
"c++20",
)
concepts_run_result = concepts_run_result[0]
concepts_constrain_result = concepts_constrain_result[0]
concepts_run_passed = concepts_run_result == "passed"
concepts_run_compiled = (
concepts_run_passed
or concepts_run_result.startswith("failed: runtime error:")
)
concepts_constrain_passed = (
concepts_constrain_result == "passed" and concepts_run_compiled
)
result = {
**result,
**dict(
concepts_run_passed=concepts_run_passed,
concepts_run_compiled=concepts_run_compiled,
concepts_run_result=concepts_run_result,
concepts_constrain_passed=concepts_constrain_passed,
concepts_constrain_result=concepts_constrain_result,
),
}
else:
raise ValueError(f"Unknown cpp_type: {cpp_type}")
return result
def process_case(target, candidate, reference, result, cppstd):
timeout = 60
p = multiprocessing.Process(
target=target,
args=(candidate, reference, result, timeout, cppstd),
)
p.start()
p.join(timeout=timeout + 5)
if p.is_alive():
p.kill()
if not result:
result.append("timed out")
def unsafe_execute_cpp(candidate, reference, result, timeout, cppstd):
with create_tempdir():
code = "\n".join([HEADERS, candidate, reference])
open(f"test.cpp", "w").write(code)
cpp_compiler = os.getenv("GENERICIFY_CLANG")
compilation_result = subprocess.run(
[cpp_compiler, f"-std={cppstd}", "test.cpp"],
timeout=timeout,
capture_output=True,
)
if compilation_result.returncode != 0:
if compilation_result.stderr:
err = compilation_result.stderr.decode()
else:
err = compilation_result.stdout.decode()
result.append(f"failed: compilation error: {err}")
else:
try:
exec_result = subprocess.run(
["./a.out"], timeout=timeout, capture_output=True
)
if exec_result.returncode == 0:
result.append("passed")
else:
if exec_result.stderr:
try:
err = exec_result.stderr.decode()
except:
err = exec_result.stderr
else:
try:
err = exec_result.stdout.decode()
except:
err = exec_result.stdout
result.append(f"failed: runtime error: {err}")
except subprocess.TimeoutExpired as e:
result.append("timed out")
def invalid_compile_cpp(candidate, reference, result, timeout, cppstd):
with create_tempdir():
code = "\n".join([HEADERS, candidate, reference])
open(f"invalid.cpp", "w").write(code)
cpp_compiler = os.getenv("GENERICIFY_CLANG")
compilation_result = subprocess.run(
[cpp_compiler, f"-std={cppstd}", "invalid.cpp"],
timeout=timeout,
capture_output=True,
)
if compilation_result.stderr:
err = compilation_result.stderr.decode()
else:
err = compilation_result.stdout.decode()
if compilation_result.returncode != 1:
result.append(f"failed: compilation succeeded: {err}")
else:
if "note: candidate template ignored" in err:
result.append("passed")
else:
result.append("failed: improperly constrained: {err}")
@contextlib.contextmanager
def create_tempdir():
with tempfile.TemporaryDirectory() as dirname:
with chdir(dirname):
yield dirname
@contextlib.contextmanager
def chdir(root):
if root == ".":
yield
return
cwd = os.getcwd()
os.chdir(root)
try:
yield
except BaseException as exc:
raise exc
finally:
os.chdir(cwd)