Spaces:

guydav
/

restrictedpython_code_eval

Sleeping

App Files Files Community

guydav commited on Jun 22, 2023

Commit

491c03f

1 Parent(s): d5ea12a

Testing some additional changes

Browse files

Files changed (2) hide show

README.md +2 -0
restrictedpython_code_eval.py +79 -8

README.md CHANGED Viewed

@@ -60,6 +60,8 @@ In addition, this metric supports three additional arguments, specifying which i
 **`allowed_imports`** (`List[str] | None`): A list of allowed imports. Defaults to None.
 As the new arguments are optional, this could be used as a drop-in replacement for `code_eval`.
 Additionally, this metric sets several different `globals` if they are not provided as additional globals. The full list of globals set is: `__metaclass__, __name__, _getiter_, _iter_unpack_sequence_, _getitem_, getattr, _write_, _inplacevar_, _print_`. See the code for additional details.

 **`allowed_imports`** (`List[str] | None`): A list of allowed imports. Defaults to None.
+**`allow_str_format`**: (`bool`): Whether or not to allow the use of `str.format`. Defaults to False, as it's considered [harmful](http://lucumr.pocoo.org/2016/12/29/careful-with-str-format/).
 As the new arguments are optional, this could be used as a drop-in replacement for `code_eval`.
 Additionally, this metric sets several different `globals` if they are not provided as additional globals. The full list of globals set is: `__metaclass__, __name__, _getiter_, _iter_unpack_sequence_, _getitem_, getattr, _write_, _inplacevar_, _print_`. See the code for additional details.

restrictedpython_code_eval.py CHANGED Viewed

@@ -15,7 +15,7 @@
 to exectue the untrusted code returned by the model.
 Lightly adapted and mostly copied verbatim from the implementation in `evaluate`.
 """
 import contextlib
 import faulthandler
 import itertools
@@ -36,11 +36,75 @@ import evaluate
 # from evaluate.metrics import code_eval
 import datasets
 import numpy as np
-from RestrictedPython import compile_restricted, safe_builtins, limited_builtins, utility_builtins
 from RestrictedPython.Eval import default_guarded_getiter, default_guarded_getitem
 from RestrictedPython.Guards import guarded_iter_unpack_sequence, safer_getattr, guarded_unpack_sequence
 # TODO: Add BibTeX citation
 _CITATION = """\
 @InProceedings{huggingface:module,
@@ -73,6 +137,7 @@ Args:
     additional_globals: a optional dict of additional globals to pass to the RestrictedPython interpreter
     additional_locals: a optional dict of additional locals to pass to the RestrictedPython interpreter
     allowed_imports: an optional list of string, modules the tested code is allowed to import
 Returns:
     pass_at_k: dict with pass rates for each k
@@ -159,7 +224,7 @@ class RestrictedPythonCodeEval(evaluate.Metric):
     def _compute(self, predictions, references, k=[1, 10, 100], num_workers=4, timeout=3.0,
                  use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
                  additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
-                 allowed_imports: Optional[List[str]] = None):
         """Returns the scores"""
         if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
@@ -180,7 +245,8 @@ class RestrictedPythonCodeEval(evaluate.Metric):
                     args = (
                         test_program, timeout, task_id, completion_id[task_id],
                         use_safe_builtins, use_limited_builtins, use_utility_builtins,
-                        additional_globals, additional_locals, allowed_imports
                     )
                     future = executor.submit(_check_correctness, *args)
                     futures.append(future)
@@ -228,7 +294,7 @@ def estimate_pass_at_k(num_samples, num_correct, k):
 def _check_correctness(check_program, timeout, task_id, completion_id,
                        use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
                        additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
-                       allowed_imports: Optional[List[str]] = None):
     """
     Evaluates the functional correctness of a completion by running the test
     suite provided in the problem.
@@ -242,7 +308,8 @@ def _check_correctness(check_program, timeout, task_id, completion_id,
     args = (
         check_program, result, timeout,
         use_safe_builtins, use_limited_builtins, use_utility_builtins,
-        additional_globals, additional_locals, allowed_imports
     )
     p = multiprocessing.Process(target=_unsafe_execute, args=args)
     p.start()
@@ -315,7 +382,7 @@ class DefaultPrinter:
 def _unsafe_execute(check_program, result, timeout,
                     use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
                     additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
-                    allowed_imports: Optional[List[str]] = None):
     with create_tempdir():
@@ -358,6 +425,10 @@ def _unsafe_execute(check_program, result, timeout,
                 exec_globals['__builtins__']['__import__'] = AllowListImporter(allowed_imports)
             if '__metaclass__' not in exec_globals:
                 exec_globals['__metaclass__'] = type  # type: ignore
@@ -393,7 +464,7 @@ def _unsafe_execute(check_program, result, timeout,
             with swallow_io():
                 with time_limit(timeout):
-                    byte_code = compile_restricted(check_program, filename="<model output>", mode="exec")
                     exec(byte_code, exec_globals, additional_locals)
             result.append("passed")
         except EOFError:

 to exectue the untrusted code returned by the model.
 Lightly adapted and mostly copied verbatim from the implementation in `evaluate`.
 """
+import ast
 import contextlib
 import faulthandler
 import itertools
 # from evaluate.metrics import code_eval
 import datasets
 import numpy as np
+from RestrictedPython import compile_restricted, safe_builtins, limited_builtins, utility_builtins, RestrictingNodeTransformer
+from RestrictedPython.transformer import copy_locations, IOPERATOR_TO_STR
 from RestrictedPython.Eval import default_guarded_getiter, default_guarded_getitem
 from RestrictedPython.Guards import guarded_iter_unpack_sequence, safer_getattr, guarded_unpack_sequence
+# patch their list implementation to allow empty lists and tuples
+def limited_list(seq=None):
+    if isinstance(seq, str):
+        raise TypeError('cannot convert string to list')
+    return list(seq) if seq is not None else list()
+limited_builtins['list'] = limited_list
+def limited_tuple(seq=None):
+    if isinstance(seq, str):
+        raise TypeError('cannot convert string to tuple')
+    return tuple(seq) if seq is not None else tuple()
+limited_builtins['tuple'] = limited_tuple
+def safer_getattr_allowing_string_format(object, name, default=None, getattr=getattr):
+    """Getattr implementation allowing str.format(), but preventing access to
+    private attributes.
+    format() is considered harmful, so use at own risk:
+    http://lucumr.pocoo.org/2016/12/29/careful-with-str-format/
+    """
+    if name.startswith('_'):
+        raise AttributeError(
+            '"{name}" is an invalid attribute name because it '
+            'starts with "_"'.format(name=name)
+        )
+    return getattr(object, name, default)
+class CodeEvalRestrictingTransformer(RestrictingNodeTransformer):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+    def visit_AugAssign(self, node):
+        # allow += and similar operations for list indices
+        if isinstance(node.target, ast.Subscript):
+            new_node = ast.Assign(
+                targets=[node.target],
+                value=ast.Call(
+                    func=ast.Name('_inplacevar_', ast.Load()),
+                    args=[
+                        ast.Str(IOPERATOR_TO_STR[type(node.op)]),
+                        node.target,
+                        node.value
+                    ],
+                    keywords=[]))
+            copy_locations(new_node, node)
+            return new_node
+        return super().visit_AugAssign(node)
+    # TODO: decide if I should override the method below to allow variable names that start with an underscore
+    # def check_name(self, node, name, allow_magic_methods=False):
 # TODO: Add BibTeX citation
 _CITATION = """\
 @InProceedings{huggingface:module,
     additional_globals: a optional dict of additional globals to pass to the RestrictedPython interpreter
     additional_locals: a optional dict of additional locals to pass to the RestrictedPython interpreter
     allowed_imports: an optional list of string, modules the tested code is allowed to import
+    allow_str_format: a bool indicating whether to allow the use of str.format() in the tested code
 Returns:
     pass_at_k: dict with pass rates for each k
     def _compute(self, predictions, references, k=[1, 10, 100], num_workers=4, timeout=3.0,
                  use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
                  additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
+                 allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False):
         """Returns the scores"""
         if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
                     args = (
                         test_program, timeout, task_id, completion_id[task_id],
                         use_safe_builtins, use_limited_builtins, use_utility_builtins,
+                        additional_globals, additional_locals,
+                        allowed_imports, allow_str_format,
                     )
                     future = executor.submit(_check_correctness, *args)
                     futures.append(future)
 def _check_correctness(check_program, timeout, task_id, completion_id,
                        use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
                        additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
+                       allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False):
     """
     Evaluates the functional correctness of a completion by running the test
     suite provided in the problem.
     args = (
         check_program, result, timeout,
         use_safe_builtins, use_limited_builtins, use_utility_builtins,
+        additional_globals, additional_locals,
+        allowed_imports, allow_str_format,
     )
     p = multiprocessing.Process(target=_unsafe_execute, args=args)
     p.start()
 def _unsafe_execute(check_program, result, timeout,
                     use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
                     additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
+                    allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False):
     with create_tempdir():
                 exec_globals['__builtins__']['__import__'] = AllowListImporter(allowed_imports)
+            if allow_str_format:
+                exec_globals['getattr'] = safer_getattr_allowing_string_format  # type: ignore
+                exec_globals['__builtins__']['getattr'] = safer_getattr_allowing_string_format
             if '__metaclass__' not in exec_globals:
                 exec_globals['__metaclass__'] = type  # type: ignore
             with swallow_io():
                 with time_limit(timeout):
+                    byte_code = compile_restricted(check_program, filename="<model output>", mode="exec", policy=CodeEvalRestrictingTransformer)
                     exec(byte_code, exec_globals, additional_locals)
             result.append("passed")
         except EOFError: