guydav commited on
Commit
6b34a48
1 Parent(s): 3baab76

Allowing capturing and returning output

Browse files
Files changed (2) hide show
  1. README.md +4 -0
  2. restrictedpython_code_eval.py +17 -8
README.md CHANGED
@@ -64,6 +64,10 @@ In addition, this metric supports three additional arguments, specifying which i
64
 
65
  **`allow_underscore_variable_names`**: (`bool`): Whether or not to allow the use of variable names starting with an underscore. Defaults to False, as it's considered [harmful](https://stackoverflow.com/questions/1301346/what-is-the-meaning-of-a-single-and-a-double-underscore-before-an-object-name).
66
 
 
 
 
 
67
  As the new arguments are optional, this could be used as a drop-in replacement for `code_eval`.
68
 
69
  Additionally, this metric sets several different `globals` if they are not provided as additional globals. The full list of globals set is: `__metaclass__, __name__, _getiter_, _iter_unpack_sequence_, _getitem_, getattr, _write_, _inplacevar_, _print_`. See the code for additional details.
 
64
 
65
  **`allow_underscore_variable_names`**: (`bool`): Whether or not to allow the use of variable names starting with an underscore. Defaults to False, as it's considered [harmful](https://stackoverflow.com/questions/1301346/what-is-the-meaning-of-a-single-and-a-double-underscore-before-an-object-name).
66
 
67
+ **`return_output`**: (`bool`): Whether or not to return the output of the code. Defaults to False.
68
+
69
+ **`output_variable`**: (`str`): The name of the variable to return the output of. Defaults to `'output'`.
70
+
71
  As the new arguments are optional, this could be used as a drop-in replacement for `code_eval`.
72
 
73
  Additionally, this metric sets several different `globals` if they are not provided as additional globals. The full list of globals set is: `__metaclass__, __name__, _getiter_, _iter_unpack_sequence_, _getitem_, getattr, _write_, _inplacevar_, _print_`. See the code for additional details.
restrictedpython_code_eval.py CHANGED
@@ -263,6 +263,8 @@ Args:
263
  allowed_imports: an optional list of string, modules the tested code is allowed to import
264
  allow_str_format: a bool indicating whether to allow the use of str.format() in the tested code
265
  allow_underscore_variable_names: a bool indicating whether to allow the use of underscore variable names in the tested code
 
 
266
 
267
  Returns:
268
  pass_at_k: dict with pass rates for each k
@@ -350,7 +352,7 @@ class RestrictedPythonCodeEval(evaluate.Metric):
350
  use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
351
  additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
352
  allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False,
353
- allow_underscore_variable_names: bool = False):
354
  """Returns the scores"""
355
 
356
  if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
@@ -372,7 +374,8 @@ class RestrictedPythonCodeEval(evaluate.Metric):
372
  test_program, timeout, task_id, completion_id[task_id],
373
  use_safe_builtins, use_limited_builtins, use_utility_builtins,
374
  additional_globals, additional_locals,
375
- allowed_imports, allow_str_format, allow_underscore_variable_names
 
376
  )
377
  future = executor.submit(_check_correctness, *args)
378
  futures.append(future)
@@ -421,7 +424,7 @@ def _check_correctness(check_program, timeout, task_id, completion_id,
421
  use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
422
  additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
423
  allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False,
424
- allow_underscore_variable_names: bool = False):
425
  """
426
  Evaluates the functional correctness of a completion by running the test
427
  suite provided in the problem.
@@ -437,6 +440,7 @@ def _check_correctness(check_program, timeout, task_id, completion_id,
437
  use_safe_builtins, use_limited_builtins, use_utility_builtins,
438
  additional_globals, additional_locals,
439
  allowed_imports, allow_str_format, allow_underscore_variable_names,
 
440
  )
441
  p = multiprocessing.Process(target=_unsafe_execute, args=args)
442
  p.start()
@@ -515,12 +519,11 @@ class DefaultPrinter:
515
  print(*objects, **kwargs)
516
 
517
 
518
-
519
  def _unsafe_execute(check_program, result, timeout,
520
  use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
521
  additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
522
  allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False,
523
- allow_underscore_variable_names: bool = False):
524
 
525
  with create_tempdir():
526
 
@@ -535,6 +538,9 @@ def _unsafe_execute(check_program, result, timeout,
535
  # Disable functionalities that can make destructive changes to the test.
536
  reliability_guard()
537
 
 
 
 
538
  # Run program.
539
  try:
540
  builtins = {}
@@ -604,12 +610,15 @@ def _unsafe_execute(check_program, result, timeout,
604
  with swallow_io():
605
  policy_class = AllowAugmentedAssignAndUnderscoreVariableNamesRestrictingTransformer if allow_underscore_variable_names else AllowAugmentedAssignRestrictingTransformer
606
 
607
-
608
  with time_limit(timeout):
609
  byte_code = compile_restricted(check_program, filename="<model output>", mode="exec", policy=policy_class)
610
  exec(byte_code, exec_globals, additional_locals)
611
-
612
- result.append("passed")
 
 
 
 
613
  except EOFError:
614
  result.append("EOF error")
615
  except TimeoutException:
 
263
  allowed_imports: an optional list of string, modules the tested code is allowed to import
264
  allow_str_format: a bool indicating whether to allow the use of str.format() in the tested code
265
  allow_underscore_variable_names: a bool indicating whether to allow the use of underscore variable names in the tested code
266
+ return_output: a bool indicating whether to return the output of the tested code
267
+ output_variable: a string indicating the name of the variable to return if return_output is True
268
 
269
  Returns:
270
  pass_at_k: dict with pass rates for each k
 
352
  use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
353
  additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
354
  allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False,
355
+ allow_underscore_variable_names: bool = False, return_output: bool = False, output_variable: str = "output"):
356
  """Returns the scores"""
357
 
358
  if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
 
374
  test_program, timeout, task_id, completion_id[task_id],
375
  use_safe_builtins, use_limited_builtins, use_utility_builtins,
376
  additional_globals, additional_locals,
377
+ allowed_imports, allow_str_format, allow_underscore_variable_names,
378
+ return_output, output_variable,
379
  )
380
  future = executor.submit(_check_correctness, *args)
381
  futures.append(future)
 
424
  use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
425
  additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
426
  allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False,
427
+ allow_underscore_variable_names: bool = False, return_output: bool = False, output_variable: str = "output"):
428
  """
429
  Evaluates the functional correctness of a completion by running the test
430
  suite provided in the problem.
 
440
  use_safe_builtins, use_limited_builtins, use_utility_builtins,
441
  additional_globals, additional_locals,
442
  allowed_imports, allow_str_format, allow_underscore_variable_names,
443
+ return_output, output_variable
444
  )
445
  p = multiprocessing.Process(target=_unsafe_execute, args=args)
446
  p.start()
 
519
  print(*objects, **kwargs)
520
 
521
 
 
522
  def _unsafe_execute(check_program, result, timeout,
523
  use_safe_builtins: bool = True, use_limited_builtins: bool = True, use_utility_builtins: bool = True,
524
  additional_globals: Optional[Dict[str, Any]] = None, additional_locals: Optional[Dict[str, Any]] = None,
525
  allowed_imports: Optional[List[str]] = None, allow_str_format: bool = False,
526
+ allow_underscore_variable_names: bool = False, return_output: bool = False, output_variable: str = "output"):
527
 
528
  with create_tempdir():
529
 
 
538
  # Disable functionalities that can make destructive changes to the test.
539
  reliability_guard()
540
 
541
+ if return_output and additional_locals is None:
542
+ additional_locals = {}
543
+
544
  # Run program.
545
  try:
546
  builtins = {}
 
610
  with swallow_io():
611
  policy_class = AllowAugmentedAssignAndUnderscoreVariableNamesRestrictingTransformer if allow_underscore_variable_names else AllowAugmentedAssignRestrictingTransformer
612
 
 
613
  with time_limit(timeout):
614
  byte_code = compile_restricted(check_program, filename="<model output>", mode="exec", policy=policy_class)
615
  exec(byte_code, exec_globals, additional_locals)
616
+
617
+ if return_output:
618
+ result.append(additional_locals[output_variable])
619
+ else:
620
+ result.append("passed")
621
+
622
  except EOFError:
623
  result.append("EOF error")
624
  except TimeoutException: