Spaces:
Sleeping
Sleeping
Testing some additional changes
Browse files- README.md +2 -0
- restrictedpython_code_eval.py +79 -8
README.md
CHANGED
@@ -60,6 +60,8 @@ In addition, this metric supports three additional arguments, specifying which i
|
|
60 |
|
61 |
**`allowed_imports`** (`List[str] | None`): A list of allowed imports. Defaults to None.
|
62 |
|
|
|
|
|
63 |
As the new arguments are optional, this could be used as a drop-in replacement for `code_eval`.
|
64 |
|
65 |
Additionally, this metric sets several different `globals` if they are not provided as additional globals. The full list of globals set is: `__metaclass__, __name__, _getiter_, _iter_unpack_sequence_, _getitem_, getattr, _write_, _inplacevar_, _print_`. See the code for additional details.
|
|
|
60 |
|
61 |
**`allowed_imports`** (`List[str] | None`): A list of allowed imports. Defaults to None.
|
62 |
|
63 |
+
**`allow_str_format`**: (`bool`): Whether or not to allow the use of `str.format`. Defaults to False, as it's considered [harmful](http://lucumr.pocoo.org/2016/12/29/careful-with-str-format/).
|
64 |
+
|
65 |
As the new arguments are optional, this could be used as a drop-in replacement for `code_eval`.
|
66 |
|
67 |
Additionally, this metric sets several different `globals` if they are not provided as additional globals. The full list of globals set is: `__metaclass__, __name__, _getiter_, _iter_unpack_sequence_, _getitem_, getattr, _write_, _inplacevar_, _print_`. See the code for additional details.
|
restrictedpython_code_eval.py
CHANGED
@@ -15,7 +15,7 @@
|
|
15 |
to exectue the untrusted code returned by the model.
|
16 |
Lightly adapted and mostly copied verbatim from the implementation in `evaluate`.
|
17 |
"""
|
18 |
-
|
19 |
import contextlib
|
20 |
import faulthandler
|
21 |
import itertools
|
@@ -36,11 +36,75 @@ import evaluate
|
|
36 |
# from evaluate.metrics import code_eval
|
37 |
import datasets
|
38 |
import numpy as np
|
39 |
-
from RestrictedPython import compile_restricted, safe_builtins, limited_builtins, utility_builtins
|
|
|
40 |
from RestrictedPython.Eval import default_guarded_getiter, default_guarded_getitem
|
41 |
from RestrictedPython.Guards import guarded_iter_unpack_sequence, safer_getattr, guarded_unpack_sequence
|
42 |
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
# TODO: Add BibTeX citation
|
45 |
_CITATION = """\
|
46 |
@InProceedings{huggingface:module,
|
@@ -73,6 +137,7 @@ Args:
|
|
73 |
additional_globals: a optional dict of additional globals to pass to the RestrictedPython interpreter
|
74 |
additional_locals: a optional dict of additional locals to pass to the RestrictedPython interpreter
|
75 |
allowed_imports: an optional list of string, modules the tested code is allowed to import
|
|
|
76 |
|
77 |
Returns:
|
78 |
pass_at_k: dict with pass rates for each k
|
@@ -159,7 +224,7 @@ class RestrictedPythonCodeEval(evaluate.Metric):
|
|
159 |
def _compute(self, predictions, references, k=[1, 10, 100], num_workers=4, timeout=3.0,
|
160 |
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
|
161 |
additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
|
162 |
-
allowed_imports: Optional[List[str]] = None):
|
163 |
"""Returns the scores"""
|
164 |
|
165 |
if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
|
@@ -180,7 +245,8 @@ class RestrictedPythonCodeEval(evaluate.Metric):
|
|
180 |
args = (
|
181 |
test_program, timeout, task_id, completion_id[task_id],
|
182 |
use_safe_builtins, use_limited_builtins, use_utility_builtins,
|
183 |
-
additional_globals, additional_locals,
|
|
|
184 |
)
|
185 |
future = executor.submit(_check_correctness, *args)
|
186 |
futures.append(future)
|
@@ -228,7 +294,7 @@ def estimate_pass_at_k(num_samples, num_correct, k):
|
|
228 |
def _check_correctness(check_program, timeout, task_id, completion_id,
|
229 |
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
|
230 |
additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
|
231 |
-
allowed_imports: Optional[List[str]] = None):
|
232 |
"""
|
233 |
Evaluates the functional correctness of a completion by running the test
|
234 |
suite provided in the problem.
|
@@ -242,7 +308,8 @@ def _check_correctness(check_program, timeout, task_id, completion_id,
|
|
242 |
args = (
|
243 |
check_program, result, timeout,
|
244 |
use_safe_builtins, use_limited_builtins, use_utility_builtins,
|
245 |
-
additional_globals, additional_locals,
|
|
|
246 |
)
|
247 |
p = multiprocessing.Process(target=_unsafe_execute, args=args)
|
248 |
p.start()
|
@@ -315,7 +382,7 @@ class DefaultPrinter:
|
|
315 |
def _unsafe_execute(check_program, result, timeout,
|
316 |
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
|
317 |
additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
|
318 |
-
allowed_imports: Optional[List[str]] = None):
|
319 |
|
320 |
with create_tempdir():
|
321 |
|
@@ -358,6 +425,10 @@ def _unsafe_execute(check_program, result, timeout,
|
|
358 |
|
359 |
exec_globals['__builtins__']['__import__'] = AllowListImporter(allowed_imports)
|
360 |
|
|
|
|
|
|
|
|
|
361 |
if '__metaclass__' not in exec_globals:
|
362 |
exec_globals['__metaclass__'] = type # type: ignore
|
363 |
|
@@ -393,7 +464,7 @@ def _unsafe_execute(check_program, result, timeout,
|
|
393 |
|
394 |
with swallow_io():
|
395 |
with time_limit(timeout):
|
396 |
-
byte_code = compile_restricted(check_program, filename="<model output>", mode="exec")
|
397 |
exec(byte_code, exec_globals, additional_locals)
|
398 |
result.append("passed")
|
399 |
except EOFError:
|
|
|
15 |
to exectue the untrusted code returned by the model.
|
16 |
Lightly adapted and mostly copied verbatim from the implementation in `evaluate`.
|
17 |
"""
|
18 |
+
import ast
|
19 |
import contextlib
|
20 |
import faulthandler
|
21 |
import itertools
|
|
|
36 |
# from evaluate.metrics import code_eval
|
37 |
import datasets
|
38 |
import numpy as np
|
39 |
+
from RestrictedPython import compile_restricted, safe_builtins, limited_builtins, utility_builtins, RestrictingNodeTransformer
|
40 |
+
from RestrictedPython.transformer import copy_locations, IOPERATOR_TO_STR
|
41 |
from RestrictedPython.Eval import default_guarded_getiter, default_guarded_getitem
|
42 |
from RestrictedPython.Guards import guarded_iter_unpack_sequence, safer_getattr, guarded_unpack_sequence
|
43 |
|
44 |
|
45 |
+
# patch their list implementation to allow empty lists and tuples
|
46 |
+
def limited_list(seq=None):
|
47 |
+
if isinstance(seq, str):
|
48 |
+
raise TypeError('cannot convert string to list')
|
49 |
+
return list(seq) if seq is not None else list()
|
50 |
+
|
51 |
+
|
52 |
+
limited_builtins['list'] = limited_list
|
53 |
+
|
54 |
+
|
55 |
+
def limited_tuple(seq=None):
|
56 |
+
if isinstance(seq, str):
|
57 |
+
raise TypeError('cannot convert string to tuple')
|
58 |
+
return tuple(seq) if seq is not None else tuple()
|
59 |
+
|
60 |
+
|
61 |
+
limited_builtins['tuple'] = limited_tuple
|
62 |
+
|
63 |
+
|
64 |
+
def safer_getattr_allowing_string_format(object, name, default=None, getattr=getattr):
|
65 |
+
"""Getattr implementation allowing str.format(), but preventing access to
|
66 |
+
private attributes.
|
67 |
+
|
68 |
+
format() is considered harmful, so use at own risk:
|
69 |
+
http://lucumr.pocoo.org/2016/12/29/careful-with-str-format/
|
70 |
+
|
71 |
+
"""
|
72 |
+
if name.startswith('_'):
|
73 |
+
raise AttributeError(
|
74 |
+
'"{name}" is an invalid attribute name because it '
|
75 |
+
'starts with "_"'.format(name=name)
|
76 |
+
)
|
77 |
+
return getattr(object, name, default)
|
78 |
+
|
79 |
+
|
80 |
+
class CodeEvalRestrictingTransformer(RestrictingNodeTransformer):
|
81 |
+
def __init__(self, *args, **kwargs):
|
82 |
+
super().__init__(*args, **kwargs)
|
83 |
+
|
84 |
+
def visit_AugAssign(self, node):
|
85 |
+
# allow += and similar operations for list indices
|
86 |
+
if isinstance(node.target, ast.Subscript):
|
87 |
+
new_node = ast.Assign(
|
88 |
+
targets=[node.target],
|
89 |
+
value=ast.Call(
|
90 |
+
func=ast.Name('_inplacevar_', ast.Load()),
|
91 |
+
args=[
|
92 |
+
ast.Str(IOPERATOR_TO_STR[type(node.op)]),
|
93 |
+
node.target,
|
94 |
+
node.value
|
95 |
+
],
|
96 |
+
keywords=[]))
|
97 |
+
|
98 |
+
copy_locations(new_node, node)
|
99 |
+
return new_node
|
100 |
+
|
101 |
+
return super().visit_AugAssign(node)
|
102 |
+
|
103 |
+
# TODO: decide if I should override the method below to allow variable names that start with an underscore
|
104 |
+
# def check_name(self, node, name, allow_magic_methods=False):
|
105 |
+
|
106 |
+
|
107 |
+
|
108 |
# TODO: Add BibTeX citation
|
109 |
_CITATION = """\
|
110 |
@InProceedings{huggingface:module,
|
|
|
137 |
additional_globals: a optional dict of additional globals to pass to the RestrictedPython interpreter
|
138 |
additional_locals: a optional dict of additional locals to pass to the RestrictedPython interpreter
|
139 |
allowed_imports: an optional list of string, modules the tested code is allowed to import
|
140 |
+
allow_str_format: a bool indicating whether to allow the use of str.format() in the tested code
|
141 |
|
142 |
Returns:
|
143 |
pass_at_k: dict with pass rates for each k
|
|
|
224 |
def _compute(self, predictions, references, k=[1, 10, 100], num_workers=4, timeout=3.0,
|
225 |
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
|
226 |
additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
|
227 |
+
allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False):
|
228 |
"""Returns the scores"""
|
229 |
|
230 |
if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
|
|
|
245 |
args = (
|
246 |
test_program, timeout, task_id, completion_id[task_id],
|
247 |
use_safe_builtins, use_limited_builtins, use_utility_builtins,
|
248 |
+
additional_globals, additional_locals,
|
249 |
+
allowed_imports, allow_str_format,
|
250 |
)
|
251 |
future = executor.submit(_check_correctness, *args)
|
252 |
futures.append(future)
|
|
|
294 |
def _check_correctness(check_program, timeout, task_id, completion_id,
|
295 |
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
|
296 |
additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
|
297 |
+
allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False):
|
298 |
"""
|
299 |
Evaluates the functional correctness of a completion by running the test
|
300 |
suite provided in the problem.
|
|
|
308 |
args = (
|
309 |
check_program, result, timeout,
|
310 |
use_safe_builtins, use_limited_builtins, use_utility_builtins,
|
311 |
+
additional_globals, additional_locals,
|
312 |
+
allowed_imports, allow_str_format,
|
313 |
)
|
314 |
p = multiprocessing.Process(target=_unsafe_execute, args=args)
|
315 |
p.start()
|
|
|
382 |
def _unsafe_execute(check_program, result, timeout,
|
383 |
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
|
384 |
additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
|
385 |
+
allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False):
|
386 |
|
387 |
with create_tempdir():
|
388 |
|
|
|
425 |
|
426 |
exec_globals['__builtins__']['__import__'] = AllowListImporter(allowed_imports)
|
427 |
|
428 |
+
if allow_str_format:
|
429 |
+
exec_globals['getattr'] = safer_getattr_allowing_string_format # type: ignore
|
430 |
+
exec_globals['__builtins__']['getattr'] = safer_getattr_allowing_string_format
|
431 |
+
|
432 |
if '__metaclass__' not in exec_globals:
|
433 |
exec_globals['__metaclass__'] = type # type: ignore
|
434 |
|
|
|
464 |
|
465 |
with swallow_io():
|
466 |
with time_limit(timeout):
|
467 |
+
byte_code = compile_restricted(check_program, filename="<model output>", mode="exec", policy=CodeEvalRestrictingTransformer)
|
468 |
exec(byte_code, exec_globals, additional_locals)
|
469 |
result.append("passed")
|
470 |
except EOFError:
|