Spaces:
Sleeping
Sleeping
Muennighoff
commited on
Commit
•
0e7922f
1
Parent(s):
ea58aa2
Add JS
Browse files- code_eval.py +2 -2
- execute.py +43 -2
code_eval.py
CHANGED
@@ -152,7 +152,7 @@ class CodeEval(evaluate.Metric):
|
|
152 |
license=_LICENSE,
|
153 |
)
|
154 |
|
155 |
-
def _compute(self, predictions, references, k=[1, 10, 100], num_workers=4, timeout=3.0):
|
156 |
"""Returns the scores"""
|
157 |
|
158 |
if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
|
@@ -170,7 +170,7 @@ class CodeEval(evaluate.Metric):
|
|
170 |
for task_id, (candidates, test_case) in enumerate(zip(predictions, references)):
|
171 |
for candidate in candidates:
|
172 |
test_program = candidate + "\n" + test_case
|
173 |
-
args = (test_program, timeout, task_id, completion_id[task_id])
|
174 |
future = executor.submit(check_correctness, *args)
|
175 |
futures.append(future)
|
176 |
completion_id[task_id] += 1
|
|
|
152 |
license=_LICENSE,
|
153 |
)
|
154 |
|
155 |
+
def _compute(self, predictions, references, k=[1, 10, 100], num_workers=4, timeout=3.0, language="python"):
|
156 |
"""Returns the scores"""
|
157 |
|
158 |
if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
|
|
|
170 |
for task_id, (candidates, test_case) in enumerate(zip(predictions, references)):
|
171 |
for candidate in candidates:
|
172 |
test_program = candidate + "\n" + test_case
|
173 |
+
args = (test_program, timeout, task_id, completion_id[task_id], language)
|
174 |
future = executor.submit(check_correctness, *args)
|
175 |
futures.append(future)
|
176 |
completion_id[task_id] += 1
|
execute.py
CHANGED
@@ -24,8 +24,12 @@ import platform
|
|
24 |
import signal
|
25 |
import tempfile
|
26 |
|
|
|
|
|
|
|
|
|
27 |
|
28 |
-
def check_correctness(check_program, timeout, task_id, completion_id):
|
29 |
"""
|
30 |
Evaluates the functional correctness of a completion by running the test
|
31 |
suite provided in the problem.
|
@@ -36,7 +40,8 @@ def check_correctness(check_program, timeout, task_id, completion_id):
|
|
36 |
manager = multiprocessing.Manager()
|
37 |
result = manager.list()
|
38 |
|
39 |
-
p = multiprocessing.Process(target=
|
|
|
40 |
p.start()
|
41 |
p.join(timeout=timeout + 1)
|
42 |
if p.is_alive():
|
@@ -85,6 +90,42 @@ def unsafe_execute(check_program, result, timeout):
|
|
85 |
os.rmdir = rmdir
|
86 |
os.chdir = chdir
|
87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
@contextlib.contextmanager
|
90 |
def time_limit(seconds):
|
|
|
24 |
import signal
|
25 |
import tempfile
|
26 |
|
27 |
+
LANGUAGE_TO_FUNC = {
|
28 |
+
"python": unsafe_execute,
|
29 |
+
"javascript": unsafe_execute_js,
|
30 |
+
}
|
31 |
|
32 |
+
def check_correctness(check_program, timeout, task_id, completion_id, language):
|
33 |
"""
|
34 |
Evaluates the functional correctness of a completion by running the test
|
35 |
suite provided in the problem.
|
|
|
40 |
manager = multiprocessing.Manager()
|
41 |
result = manager.list()
|
42 |
|
43 |
+
p = multiprocessing.Process(target=LANGUAGE_TO_FUNC[language], args=(check_program, result, timeout))
|
44 |
+
|
45 |
p.start()
|
46 |
p.join(timeout=timeout + 1)
|
47 |
if p.is_alive():
|
|
|
90 |
os.rmdir = rmdir
|
91 |
os.chdir = chdir
|
92 |
|
93 |
+
def unsafe_execute_js(check_program, result, timeout):
|
94 |
+
|
95 |
+
with create_tempdir():
|
96 |
+
|
97 |
+
open(f"test.js", 'w').write(check_program)
|
98 |
+
|
99 |
+
# These system calls are needed when cleaning up tempdir.
|
100 |
+
import os
|
101 |
+
import shutil
|
102 |
+
|
103 |
+
rmtree = shutil.rmtree
|
104 |
+
rmdir = os.rmdir
|
105 |
+
chdir = os.chdir
|
106 |
+
|
107 |
+
# Run program.
|
108 |
+
try:
|
109 |
+
exec_globals = {}
|
110 |
+
with time_limit(timeout):
|
111 |
+
exec_result = subprocess.run(["node", "test.js"], timeout=timeout, capture_output=True)
|
112 |
+
if exec_result.stderr.decode():
|
113 |
+
err = exec_result.stderr.decode()
|
114 |
+
result.append(f"failed: {err}")
|
115 |
+
elif exec_result.stdout.decode():
|
116 |
+
err = exec_result.stdout.decode()
|
117 |
+
result.append(f"failed: {err}")
|
118 |
+
else:
|
119 |
+
result.append("passed")
|
120 |
+
|
121 |
+
except TimeoutException:
|
122 |
+
result.append("timed out")
|
123 |
+
|
124 |
+
# Needed for cleaning up.
|
125 |
+
shutil.rmtree = rmtree
|
126 |
+
os.rmdir = rmdir
|
127 |
+
os.chdir = chdir
|
128 |
+
|
129 |
|
130 |
@contextlib.contextmanager
|
131 |
def time_limit(seconds):
|