guydav commited on
Commit
491c03f
1 Parent(s): d5ea12a

Testing some additional changes

Browse files
Files changed (2) hide show
  1. README.md +2 -0
  2. restrictedpython_code_eval.py +79 -8
README.md CHANGED
@@ -60,6 +60,8 @@ In addition, this metric supports three additional arguments, specifying which i
60
 
61
  **`allowed_imports`** (`List[str] | None`): A list of allowed imports. Defaults to None.
62
 
 
 
63
  As the new arguments are optional, this could be used as a drop-in replacement for `code_eval`.
64
 
65
  Additionally, this metric sets several different `globals` if they are not provided as additional globals. The full list of globals set is: `__metaclass__, __name__, _getiter_, _iter_unpack_sequence_, _getitem_, getattr, _write_, _inplacevar_, _print_`. See the code for additional details.
 
60
 
61
  **`allowed_imports`** (`List[str] | None`): A list of allowed imports. Defaults to None.
62
 
63
+ **`allow_str_format`**: (`bool`): Whether or not to allow the use of `str.format`. Defaults to False, as it's considered [harmful](http://lucumr.pocoo.org/2016/12/29/careful-with-str-format/).
64
+
65
  As the new arguments are optional, this could be used as a drop-in replacement for `code_eval`.
66
 
67
  Additionally, this metric sets several different `globals` if they are not provided as additional globals. The full list of globals set is: `__metaclass__, __name__, _getiter_, _iter_unpack_sequence_, _getitem_, getattr, _write_, _inplacevar_, _print_`. See the code for additional details.
restrictedpython_code_eval.py CHANGED
@@ -15,7 +15,7 @@
15
  to exectue the untrusted code returned by the model.
16
  Lightly adapted and mostly copied verbatim from the implementation in `evaluate`.
17
  """
18
-
19
  import contextlib
20
  import faulthandler
21
  import itertools
@@ -36,11 +36,75 @@ import evaluate
36
  # from evaluate.metrics import code_eval
37
  import datasets
38
  import numpy as np
39
- from RestrictedPython import compile_restricted, safe_builtins, limited_builtins, utility_builtins
 
40
  from RestrictedPython.Eval import default_guarded_getiter, default_guarded_getitem
41
  from RestrictedPython.Guards import guarded_iter_unpack_sequence, safer_getattr, guarded_unpack_sequence
42
 
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  # TODO: Add BibTeX citation
45
  _CITATION = """\
46
  @InProceedings{huggingface:module,
@@ -73,6 +137,7 @@ Args:
73
  additional_globals: a optional dict of additional globals to pass to the RestrictedPython interpreter
74
  additional_locals: a optional dict of additional locals to pass to the RestrictedPython interpreter
75
  allowed_imports: an optional list of string, modules the tested code is allowed to import
 
76
 
77
  Returns:
78
  pass_at_k: dict with pass rates for each k
@@ -159,7 +224,7 @@ class RestrictedPythonCodeEval(evaluate.Metric):
159
  def _compute(self, predictions, references, k=[1, 10, 100], num_workers=4, timeout=3.0,
160
  use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
161
  additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
162
- allowed_imports: Optional[List[str]] = None):
163
  """Returns the scores"""
164
 
165
  if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
@@ -180,7 +245,8 @@ class RestrictedPythonCodeEval(evaluate.Metric):
180
  args = (
181
  test_program, timeout, task_id, completion_id[task_id],
182
  use_safe_builtins, use_limited_builtins, use_utility_builtins,
183
- additional_globals, additional_locals, allowed_imports
 
184
  )
185
  future = executor.submit(_check_correctness, *args)
186
  futures.append(future)
@@ -228,7 +294,7 @@ def estimate_pass_at_k(num_samples, num_correct, k):
228
  def _check_correctness(check_program, timeout, task_id, completion_id,
229
  use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
230
  additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
231
- allowed_imports: Optional[List[str]] = None):
232
  """
233
  Evaluates the functional correctness of a completion by running the test
234
  suite provided in the problem.
@@ -242,7 +308,8 @@ def _check_correctness(check_program, timeout, task_id, completion_id,
242
  args = (
243
  check_program, result, timeout,
244
  use_safe_builtins, use_limited_builtins, use_utility_builtins,
245
- additional_globals, additional_locals, allowed_imports
 
246
  )
247
  p = multiprocessing.Process(target=_unsafe_execute, args=args)
248
  p.start()
@@ -315,7 +382,7 @@ class DefaultPrinter:
315
  def _unsafe_execute(check_program, result, timeout,
316
  use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
317
  additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
318
- allowed_imports: Optional[List[str]] = None):
319
 
320
  with create_tempdir():
321
 
@@ -358,6 +425,10 @@ def _unsafe_execute(check_program, result, timeout,
358
 
359
  exec_globals['__builtins__']['__import__'] = AllowListImporter(allowed_imports)
360
 
 
 
 
 
361
  if '__metaclass__' not in exec_globals:
362
  exec_globals['__metaclass__'] = type # type: ignore
363
 
@@ -393,7 +464,7 @@ def _unsafe_execute(check_program, result, timeout,
393
 
394
  with swallow_io():
395
  with time_limit(timeout):
396
- byte_code = compile_restricted(check_program, filename="<model output>", mode="exec")
397
  exec(byte_code, exec_globals, additional_locals)
398
  result.append("passed")
399
  except EOFError:
 
15
  to exectue the untrusted code returned by the model.
16
  Lightly adapted and mostly copied verbatim from the implementation in `evaluate`.
17
  """
18
+ import ast
19
  import contextlib
20
  import faulthandler
21
  import itertools
 
36
  # from evaluate.metrics import code_eval
37
  import datasets
38
  import numpy as np
39
+ from RestrictedPython import compile_restricted, safe_builtins, limited_builtins, utility_builtins, RestrictingNodeTransformer
40
+ from RestrictedPython.transformer import copy_locations, IOPERATOR_TO_STR
41
  from RestrictedPython.Eval import default_guarded_getiter, default_guarded_getitem
42
  from RestrictedPython.Guards import guarded_iter_unpack_sequence, safer_getattr, guarded_unpack_sequence
43
 
44
 
45
+ # patch their list implementation to allow empty lists and tuples
46
+ def limited_list(seq=None):
47
+ if isinstance(seq, str):
48
+ raise TypeError('cannot convert string to list')
49
+ return list(seq) if seq is not None else list()
50
+
51
+
52
+ limited_builtins['list'] = limited_list
53
+
54
+
55
+ def limited_tuple(seq=None):
56
+ if isinstance(seq, str):
57
+ raise TypeError('cannot convert string to tuple')
58
+ return tuple(seq) if seq is not None else tuple()
59
+
60
+
61
+ limited_builtins['tuple'] = limited_tuple
62
+
63
+
64
+ def safer_getattr_allowing_string_format(object, name, default=None, getattr=getattr):
65
+ """Getattr implementation allowing str.format(), but preventing access to
66
+ private attributes.
67
+
68
+ format() is considered harmful, so use at own risk:
69
+ http://lucumr.pocoo.org/2016/12/29/careful-with-str-format/
70
+
71
+ """
72
+ if name.startswith('_'):
73
+ raise AttributeError(
74
+ '"{name}" is an invalid attribute name because it '
75
+ 'starts with "_"'.format(name=name)
76
+ )
77
+ return getattr(object, name, default)
78
+
79
+
80
+ class CodeEvalRestrictingTransformer(RestrictingNodeTransformer):
81
+ def __init__(self, *args, **kwargs):
82
+ super().__init__(*args, **kwargs)
83
+
84
+ def visit_AugAssign(self, node):
85
+ # allow += and similar operations for list indices
86
+ if isinstance(node.target, ast.Subscript):
87
+ new_node = ast.Assign(
88
+ targets=[node.target],
89
+ value=ast.Call(
90
+ func=ast.Name('_inplacevar_', ast.Load()),
91
+ args=[
92
+ ast.Str(IOPERATOR_TO_STR[type(node.op)]),
93
+ node.target,
94
+ node.value
95
+ ],
96
+ keywords=[]))
97
+
98
+ copy_locations(new_node, node)
99
+ return new_node
100
+
101
+ return super().visit_AugAssign(node)
102
+
103
+ # TODO: decide if I should override the method below to allow variable names that start with an underscore
104
+ # def check_name(self, node, name, allow_magic_methods=False):
105
+
106
+
107
+
108
  # TODO: Add BibTeX citation
109
  _CITATION = """\
110
  @InProceedings{huggingface:module,
 
137
  additional_globals: a optional dict of additional globals to pass to the RestrictedPython interpreter
138
  additional_locals: a optional dict of additional locals to pass to the RestrictedPython interpreter
139
  allowed_imports: an optional list of string, modules the tested code is allowed to import
140
+ allow_str_format: a bool indicating whether to allow the use of str.format() in the tested code
141
 
142
  Returns:
143
  pass_at_k: dict with pass rates for each k
 
224
  def _compute(self, predictions, references, k=[1, 10, 100], num_workers=4, timeout=3.0,
225
  use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
226
  additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
227
+ allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False):
228
  """Returns the scores"""
229
 
230
  if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
 
245
  args = (
246
  test_program, timeout, task_id, completion_id[task_id],
247
  use_safe_builtins, use_limited_builtins, use_utility_builtins,
248
+ additional_globals, additional_locals,
249
+ allowed_imports, allow_str_format,
250
  )
251
  future = executor.submit(_check_correctness, *args)
252
  futures.append(future)
 
294
  def _check_correctness(check_program, timeout, task_id, completion_id,
295
  use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
296
  additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
297
+ allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False):
298
  """
299
  Evaluates the functional correctness of a completion by running the test
300
  suite provided in the problem.
 
308
  args = (
309
  check_program, result, timeout,
310
  use_safe_builtins, use_limited_builtins, use_utility_builtins,
311
+ additional_globals, additional_locals,
312
+ allowed_imports, allow_str_format,
313
  )
314
  p = multiprocessing.Process(target=_unsafe_execute, args=args)
315
  p.start()
 
382
  def _unsafe_execute(check_program, result, timeout,
383
  use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
384
  additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
385
+ allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False):
386
 
387
  with create_tempdir():
388
 
 
425
 
426
  exec_globals['__builtins__']['__import__'] = AllowListImporter(allowed_imports)
427
 
428
+ if allow_str_format:
429
+ exec_globals['getattr'] = safer_getattr_allowing_string_format # type: ignore
430
+ exec_globals['__builtins__']['getattr'] = safer_getattr_allowing_string_format
431
+
432
  if '__metaclass__' not in exec_globals:
433
  exec_globals['__metaclass__'] = type # type: ignore
434
 
 
464
 
465
  with swallow_io():
466
  with time_limit(timeout):
467
+ byte_code = compile_restricted(check_program, filename="<model output>", mode="exec", policy=CodeEvalRestrictingTransformer)
468
  exec(byte_code, exec_globals, additional_locals)
469
  result.append("passed")
470
  except EOFError: