red1bluelost commited on
Commit
9fdbbcb
·
1 Parent(s): 6bd9122

Adds contraint tests for cpp.

Browse files
Files changed (3) hide show
  1. evaluate_genericify_cpp.py +3 -0
  2. execute.py +67 -5
  3. tests.py +86 -14
evaluate_genericify_cpp.py CHANGED
@@ -167,8 +167,11 @@ class EvaluateGenericifyCpp(evaluate.Metric):
167
  result.sort()
168
  for pt in [
169
  "base_run_passed",
 
170
  "sfinae_run_passed",
 
171
  "concepts_run_passed",
 
172
  ]:
173
  passed = [r[1][pt] for r in result]
174
  totals[pt].append(len(passed))
 
167
  result.sort()
168
  for pt in [
169
  "base_run_passed",
170
+ "base_run_compiled",
171
  "sfinae_run_passed",
172
+ "sfinae_run_compiled",
173
  "concepts_run_passed",
174
+ "concepts_run_compiled",
175
  ]:
176
  passed = [r[1][pt] for r in result]
177
  totals[pt].append(len(passed))
execute.py CHANGED
@@ -21,6 +21,7 @@ def check_correctness(candidate, reference, task_id, completion_id):
21
  candidate["base"],
22
  reference["tests"],
23
  base_run_result,
 
24
  )
25
  sfinae_run_result = manager.list()
26
  process_case(
@@ -28,6 +29,7 @@ def check_correctness(candidate, reference, task_id, completion_id):
28
  candidate["sfinae"],
29
  reference["tests"],
30
  sfinae_run_result,
 
31
  )
32
  concepts_run_result = manager.list()
33
  process_case(
@@ -35,26 +37,60 @@ def check_correctness(candidate, reference, task_id, completion_id):
35
  candidate["concepts"],
36
  reference["tests"],
37
  concepts_run_result,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  )
39
 
40
  return dict(
41
  task_id=task_id,
42
  completion_id=completion_id,
43
  base_run_passed=base_run_result[0] == "passed",
 
 
 
 
44
  base_run_result=base_run_result[0],
45
  sfinae_run_passed=sfinae_run_result[0] == "passed",
 
 
 
 
46
  sfinae_run_result=sfinae_run_result[0],
47
  concepts_run_passed=concepts_run_result[0] == "passed",
 
 
 
 
48
  concepts_run_result=concepts_run_result[0],
 
 
 
 
49
  )
50
 
51
 
52
- def process_case(target, candidate, reference, result):
53
  timeout = 60
54
 
55
  p = multiprocessing.Process(
56
  target=target,
57
- args=(candidate, reference, result, timeout),
58
  )
59
 
60
  p.start()
@@ -66,14 +102,14 @@ def process_case(target, candidate, reference, result):
66
  result.append("timed out")
67
 
68
 
69
- def unsafe_execute_cpp(candidate, reference, result, timeout):
70
  with create_tempdir():
71
  code = "#include <bits/stdc++.h>\n" + candidate + reference
72
  open(f"test.cpp", "w").write(code)
73
 
74
  cpp_compiler = os.getenv("GENERICIFY_CLANG")
75
  compilation_result = subprocess.run(
76
- [cpp_compiler, "-std=c++20", "test.cpp"],
77
  timeout=timeout,
78
  capture_output=True,
79
  )
@@ -102,12 +138,38 @@ def unsafe_execute_cpp(candidate, reference, result, timeout):
102
  err = exec_result.stdout.decode()
103
  except:
104
  err = exec_result.stdout
105
- result.append(f"failed: {err}")
106
 
107
  except subprocess.TimeoutExpired as e:
108
  result.append("timed out")
109
 
110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  @contextlib.contextmanager
112
  def create_tempdir():
113
  with tempfile.TemporaryDirectory() as dirname:
 
21
  candidate["base"],
22
  reference["tests"],
23
  base_run_result,
24
+ "c++17",
25
  )
26
  sfinae_run_result = manager.list()
27
  process_case(
 
29
  candidate["sfinae"],
30
  reference["tests"],
31
  sfinae_run_result,
32
+ "c++17",
33
  )
34
  concepts_run_result = manager.list()
35
  process_case(
 
37
  candidate["concepts"],
38
  reference["tests"],
39
  concepts_run_result,
40
+ "c++20",
41
+ )
42
+
43
+ sfinae_constrain_result = manager.list()
44
+ process_case(
45
+ invalid_compile_cpp,
46
+ candidate["sfinae"],
47
+ reference["invalids"],
48
+ sfinae_constrain_result,
49
+ "c++17",
50
+ )
51
+ concepts_constrain_result = manager.list()
52
+ process_case(
53
+ invalid_compile_cpp,
54
+ candidate["concepts"],
55
+ reference["invalids"],
56
+ concepts_constrain_result,
57
+ "c++20",
58
  )
59
 
60
  return dict(
61
  task_id=task_id,
62
  completion_id=completion_id,
63
  base_run_passed=base_run_result[0] == "passed",
64
+ base_run_compiled=(
65
+ base_run_result[0] == "passed"
66
+ or base_run_result[0].startswith("failed: runtime error:")
67
+ ),
68
  base_run_result=base_run_result[0],
69
  sfinae_run_passed=sfinae_run_result[0] == "passed",
70
+ sfinae_run_compiled=(
71
+ sfinae_run_result[0] == "passed"
72
+ or sfinae_run_result[0].startswith("failed: runtime error:")
73
+ ),
74
  sfinae_run_result=sfinae_run_result[0],
75
  concepts_run_passed=concepts_run_result[0] == "passed",
76
+ concepts_run_compiled=(
77
+ concepts_run_result[0] == "passed"
78
+ or concepts_run_result[0].startswith("failed: runtime error:")
79
+ ),
80
  concepts_run_result=concepts_run_result[0],
81
+ sfinae_constrain_passed=sfinae_constrain_result[0] == "passed",
82
+ sfinae_constrain_result=sfinae_constrain_result[0],
83
+ concepts_constrain_passed=concepts_constrain_result[0] == "passed",
84
+ concepts_constrain_result=concepts_constrain_result[0],
85
  )
86
 
87
 
88
+ def process_case(target, candidate, reference, result, cppstd):
89
  timeout = 60
90
 
91
  p = multiprocessing.Process(
92
  target=target,
93
+ args=(candidate, reference, result, timeout, cppstd),
94
  )
95
 
96
  p.start()
 
102
  result.append("timed out")
103
 
104
 
105
+ def unsafe_execute_cpp(candidate, reference, result, timeout, cppstd):
106
  with create_tempdir():
107
  code = "#include <bits/stdc++.h>\n" + candidate + reference
108
  open(f"test.cpp", "w").write(code)
109
 
110
  cpp_compiler = os.getenv("GENERICIFY_CLANG")
111
  compilation_result = subprocess.run(
112
+ [cpp_compiler, f"-std={cppstd}", "test.cpp"],
113
  timeout=timeout,
114
  capture_output=True,
115
  )
 
138
  err = exec_result.stdout.decode()
139
  except:
140
  err = exec_result.stdout
141
+ result.append(f"failed: runtime error: {err}")
142
 
143
  except subprocess.TimeoutExpired as e:
144
  result.append("timed out")
145
 
146
 
147
+ def invalid_compile_cpp(candidate, reference, result, timeout, cppstd):
148
+ with create_tempdir():
149
+ code = "#include <bits/stdc++.h>\n" + candidate + reference
150
+ open(f"invalid.cpp", "w").write(code)
151
+
152
+ cpp_compiler = os.getenv("GENERICIFY_CLANG")
153
+ compilation_result = subprocess.run(
154
+ [cpp_compiler, f"-std={cppstd}", "invalid.cpp"],
155
+ timeout=timeout,
156
+ capture_output=True,
157
+ )
158
+
159
+ if compilation_result.stderr:
160
+ err = compilation_result.stderr.decode()
161
+ else:
162
+ err = compilation_result.stdout.decode()
163
+
164
+ if compilation_result.returncode != 1:
165
+ result.append(f"failed: compilation succeeded: {err}")
166
+ else:
167
+ if "note: candidate template ignored" in err:
168
+ result.append("passed")
169
+ else:
170
+ result.append("failed: improperly constrained: {err}")
171
+
172
+
173
  @contextlib.contextmanager
174
  def create_tempdir():
175
  with tempfile.TemporaryDirectory() as dirname:
tests.py CHANGED
@@ -1,17 +1,89 @@
1
  test_cases = [
2
  {
3
- "predictions": [0, 0],
4
- "references": [1, 1],
5
- "result": {"metric_score": 0}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  },
7
- {
8
- "predictions": [1, 1],
9
- "references": [1, 1],
10
- "result": {"metric_score": 1}
11
- },
12
- {
13
- "predictions": [1, 0],
14
- "references": [1, 1],
15
- "result": {"metric_score": 0.5}
16
- }
17
- ]
 
1
  test_cases = [
2
  {
3
+ "predictions": [
4
+ [
5
+ {
6
+ "base": r"""
7
+ template <typename T>
8
+ bool has_close_elements(T numbers, float threshold) {
9
+ for (int i = 0; i < numbers.size(); i++)
10
+ for (int j = i + 1; j < numbers.size(); j++)
11
+ if (std::abs(numbers[i] - numbers[j]) < threshold)
12
+ return true;
13
+
14
+ return false;
15
+ }
16
+ """,
17
+ "sfinae": r"""
18
+ template <
19
+ typename T,
20
+ std::enable_if_t<std::is_same_v<typename T::value_type, float>, int> = 0>
21
+ bool has_close_elements(T numbers, float threshold) {
22
+ for (int i = 0; i < numbers.size(); i++)
23
+ for (int j = i + 1; j < numbers.size(); j++)
24
+ if (std::abs(numbers[i] - numbers[j]) < threshold)
25
+ return true;
26
+
27
+ return false;
28
+ }
29
+ """,
30
+ "concepts": r"""
31
+ template <typename T>
32
+ requires std::same_as<typename T::value_type, float>
33
+ bool has_close_elements(T numbers, float threshold) {
34
+ for (int i = 0; i < numbers.size(); i++)
35
+ for (int j = i + 1; j < numbers.size(); j++)
36
+ if (std::abs(numbers[i] - numbers[j]) < threshold)
37
+ return true;
38
+
39
+ return false;
40
+ }
41
+ """,
42
+ }
43
+ ]
44
+ ],
45
+ "references": [
46
+ {
47
+ "tests": r"""
48
+ #define ASSERT(...) \
49
+ do { \
50
+ if (!(__VA_ARGS__)) { \
51
+ std::exit(-1); \
52
+ } \
53
+ } while (false)
54
+
55
+ #define TEST_ON_TYPE(_type_) \
56
+ do { \
57
+ _type_ a = {1.0, 2.0, 3.9, 4.0, 5.0, 2.2}; \
58
+ ASSERT(has_close_elements(a, 0.3) == true); \
59
+ ASSERT(has_close_elements(a, 0.05) == false); \
60
+ ASSERT(has_close_elements(_type_{1.0, 2.0, 5.9, 4.0, 5.0}, 0.95) == true); \
61
+ ASSERT(has_close_elements(_type_{1.0, 2.0, 5.9, 4.0, 5.0}, 0.8) == false); \
62
+ ASSERT(has_close_elements(_type_{1.0, 2.0, 3.0, 4.0, 5.0}, 2.0) == true); \
63
+ ASSERT(has_close_elements(_type_{1.1, 2.2, 3.1, 4.1, 5.1}, 1.0) == true); \
64
+ ASSERT(has_close_elements(_type_{1.1, 2.2, 3.1, 4.1, 5.1}, 0.5) == false); \
65
+ } while (false)
66
+
67
+ int main() {
68
+ TEST_ON_TYPE(std::vector<float>);
69
+ TEST_ON_TYPE(std::deque<float>);
70
+ }
71
+ """,
72
+ "invalids": r"""
73
+ int main() {
74
+ std::string s{};
75
+ has_close_elements(s, 3.4);
76
+ }
77
+ """,
78
+ }
79
+ ],
80
+ "result": {
81
+ "base_run_passed@1": 1.0,
82
+ "base_run_compiled@1": 1.0,
83
+ "sfinae_run_passed@1": 1.0,
84
+ "sfinae_run_compiled@1": 1.0,
85
+ "concepts_run_passed@1": 1.0,
86
+ "concepts_run_compiled@1": 1.0,
87
+ },
88
  },
89
+ ]