Spaces:
Sleeping
Sleeping
Allowing capturing and returning output
Browse files- README.md +4 -0
- restrictedpython_code_eval.py +17 -8
README.md
CHANGED
@@ -64,6 +64,10 @@ In addition, this metric supports three additional arguments, specifying which i
|
|
64 |
|
65 |
**`allow_underscore_variable_names`**: (`bool`): Whether or not to allow the use of variable names starting with an underscore. Defaults to False, as it's considered [harmful](https://stackoverflow.com/questions/1301346/what-is-the-meaning-of-a-single-and-a-double-underscore-before-an-object-name).
|
66 |
|
|
|
|
|
|
|
|
|
67 |
As the new arguments are optional, this could be used as a drop-in replacement for `code_eval`.
|
68 |
|
69 |
Additionally, this metric sets several different `globals` if they are not provided as additional globals. The full list of globals set is: `__metaclass__, __name__, _getiter_, _iter_unpack_sequence_, _getitem_, getattr, _write_, _inplacevar_, _print_`. See the code for additional details.
|
|
|
64 |
|
65 |
**`allow_underscore_variable_names`**: (`bool`): Whether or not to allow the use of variable names starting with an underscore. Defaults to False, as it's considered [harmful](https://stackoverflow.com/questions/1301346/what-is-the-meaning-of-a-single-and-a-double-underscore-before-an-object-name).
|
66 |
|
67 |
+
**`return_output`**: (`bool`): Whether or not to return the output of the code. Defaults to False.
|
68 |
+
|
69 |
+
**`output_variable`**: (`str`): The name of the variable to return the output of. Defaults to `'output'`.
|
70 |
+
|
71 |
As the new arguments are optional, this could be used as a drop-in replacement for `code_eval`.
|
72 |
|
73 |
Additionally, this metric sets several different `globals` if they are not provided as additional globals. The full list of globals set is: `__metaclass__, __name__, _getiter_, _iter_unpack_sequence_, _getitem_, getattr, _write_, _inplacevar_, _print_`. See the code for additional details.
|
restrictedpython_code_eval.py
CHANGED
@@ -263,6 +263,8 @@ Args:
|
|
263 |
allowed_imports: an optional list of string, modules the tested code is allowed to import
|
264 |
allow_str_format: a bool indicating whether to allow the use of str.format() in the tested code
|
265 |
allow_underscore_variable_names: a bool indicating whether to allow the use of underscore variable names in the tested code
|
|
|
|
|
266 |
|
267 |
Returns:
|
268 |
pass_at_k: dict with pass rates for each k
|
@@ -350,7 +352,7 @@ class RestrictedPythonCodeEval(evaluate.Metric):
|
|
350 |
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
|
351 |
additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
|
352 |
allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False,
|
353 |
-
allow_underscore_variable_names: bool = False):
|
354 |
"""Returns the scores"""
|
355 |
|
356 |
if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
|
@@ -372,7 +374,8 @@ class RestrictedPythonCodeEval(evaluate.Metric):
|
|
372 |
test_program, timeout, task_id, completion_id[task_id],
|
373 |
use_safe_builtins, use_limited_builtins, use_utility_builtins,
|
374 |
additional_globals, additional_locals,
|
375 |
-
allowed_imports, allow_str_format, allow_underscore_variable_names
|
|
|
376 |
)
|
377 |
future = executor.submit(_check_correctness, *args)
|
378 |
futures.append(future)
|
@@ -421,7 +424,7 @@ def _check_correctness(check_program, timeout, task_id, completion_id,
|
|
421 |
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
|
422 |
additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
|
423 |
allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False,
|
424 |
-
allow_underscore_variable_names: bool = False):
|
425 |
"""
|
426 |
Evaluates the functional correctness of a completion by running the test
|
427 |
suite provided in the problem.
|
@@ -437,6 +440,7 @@ def _check_correctness(check_program, timeout, task_id, completion_id,
|
|
437 |
use_safe_builtins, use_limited_builtins, use_utility_builtins,
|
438 |
additional_globals, additional_locals,
|
439 |
allowed_imports, allow_str_format, allow_underscore_variable_names,
|
|
|
440 |
)
|
441 |
p = multiprocessing.Process(target=_unsafe_execute, args=args)
|
442 |
p.start()
|
@@ -515,12 +519,11 @@ class DefaultPrinter:
|
|
515 |
print(*objects, **kwargs)
|
516 |
|
517 |
|
518 |
-
|
519 |
def _unsafe_execute(check_program, result, timeout,
|
520 |
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
|
521 |
additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
|
522 |
allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False,
|
523 |
-
allow_underscore_variable_names: bool = False):
|
524 |
|
525 |
with create_tempdir():
|
526 |
|
@@ -535,6 +538,9 @@ def _unsafe_execute(check_program, result, timeout,
|
|
535 |
# Disable functionalities that can make destructive changes to the test.
|
536 |
reliability_guard()
|
537 |
|
|
|
|
|
|
|
538 |
# Run program.
|
539 |
try:
|
540 |
builtins = {}
|
@@ -604,12 +610,15 @@ def _unsafe_execute(check_program, result, timeout,
|
|
604 |
with swallow_io():
|
605 |
policy_class = AllowAugmentedAssignAndUnderscoreVariableNamesRestrictingTransformer if allow_underscore_variable_names else AllowAugmentedAssignRestrictingTransformer
|
606 |
|
607 |
-
|
608 |
with time_limit(timeout):
|
609 |
byte_code = compile_restricted(check_program, filename="<model output>", mode="exec", policy=policy_class)
|
610 |
exec(byte_code, exec_globals, additional_locals)
|
611 |
-
|
612 |
-
|
|
|
|
|
|
|
|
|
613 |
except EOFError:
|
614 |
result.append("EOF error")
|
615 |
except TimeoutException:
|
|
|
263 |
allowed_imports: an optional list of string, modules the tested code is allowed to import
|
264 |
allow_str_format: a bool indicating whether to allow the use of str.format() in the tested code
|
265 |
allow_underscore_variable_names: a bool indicating whether to allow the use of underscore variable names in the tested code
|
266 |
+
return_output: a bool indicating whether to return the output of the tested code
|
267 |
+
output_variable: a string indicating the name of the variable to return if return_output is True
|
268 |
|
269 |
Returns:
|
270 |
pass_at_k: dict with pass rates for each k
|
|
|
352 |
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
|
353 |
additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
|
354 |
allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False,
|
355 |
+
allow_underscore_variable_names: bool = False, return_output: bool = False, output_variable: str = "output"):
|
356 |
"""Returns the scores"""
|
357 |
|
358 |
if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
|
|
|
374 |
test_program, timeout, task_id, completion_id[task_id],
|
375 |
use_safe_builtins, use_limited_builtins, use_utility_builtins,
|
376 |
additional_globals, additional_locals,
|
377 |
+
allowed_imports, allow_str_format, allow_underscore_variable_names,
|
378 |
+
return_output, output_variable,
|
379 |
)
|
380 |
future = executor.submit(_check_correctness, *args)
|
381 |
futures.append(future)
|
|
|
424 |
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
|
425 |
additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
|
426 |
allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False,
|
427 |
+
allow_underscore_variable_names: bool = False, return_output: bool = False, output_variable: str = "output"):
|
428 |
"""
|
429 |
Evaluates the functional correctness of a completion by running the test
|
430 |
suite provided in the problem.
|
|
|
440 |
use_safe_builtins, use_limited_builtins, use_utility_builtins,
|
441 |
additional_globals, additional_locals,
|
442 |
allowed_imports, allow_str_format, allow_underscore_variable_names,
|
443 |
+
return_output, output_variable
|
444 |
)
|
445 |
p = multiprocessing.Process(target=_unsafe_execute, args=args)
|
446 |
p.start()
|
|
|
519 |
print(*objects, **kwargs)
|
520 |
|
521 |
|
|
|
522 |
def _unsafe_execute(check_program, result, timeout,
|
523 |
use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
|
524 |
additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
|
525 |
allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False,
|
526 |
+
allow_underscore_variable_names: bool = False, return_output: bool = False, output_variable: str = "output"):
|
527 |
|
528 |
with create_tempdir():
|
529 |
|
|
|
538 |
# Disable functionalities that can make destructive changes to the test.
|
539 |
reliability_guard()
|
540 |
|
541 |
+
if return_output and additional_locals is None:
|
542 |
+
additional_locals = {}
|
543 |
+
|
544 |
# Run program.
|
545 |
try:
|
546 |
builtins = {}
|
|
|
610 |
with swallow_io():
|
611 |
policy_class = AllowAugmentedAssignAndUnderscoreVariableNamesRestrictingTransformer if allow_underscore_variable_names else AllowAugmentedAssignRestrictingTransformer
|
612 |
|
|
|
613 |
with time_limit(timeout):
|
614 |
byte_code = compile_restricted(check_program, filename="<model output>", mode="exec", policy=policy_class)
|
615 |
exec(byte_code, exec_globals, additional_locals)
|
616 |
+
|
617 |
+
if return_output:
|
618 |
+
result.append(additional_locals[output_variable])
|
619 |
+
else:
|
620 |
+
result.append("passed")
|
621 |
+
|
622 |
except EOFError:
|
623 |
result.append("EOF error")
|
624 |
except TimeoutException:
|