File size: 2,961 Bytes
d26280a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
"""
This library is used for the evaluation of gpt-engineer's performance, on
editing and creating code. This is very low level in that it looks at the
code written. It is possible that the AI could solve the problem in ways
that we cannot forsee, with this in mind higher level tests are always
better than lower.
The scope will bre relatively limited to a few languages but this could
be expanded.
"""
from gpt_engineer.core.files_dict import FilesDict
EVAL_LIST_NAME = "evaluations" # the top level list in the YAML file
def check_language(eval_d: dict) -> None:
if eval_d["language"] != "python":
raise Exception(f"Language: {eval_d['language']} is not supported.")
def assert_exists_in_source_code(eval_d: dict, files_dict: FilesDict) -> bool:
"""Checks of some text exists in the source code."""
source_body = files_dict[eval_d["source_file"]]
return source_body.find(eval_d["existing_string"]) > -1
def run_code_class_has_property(eval_d: dict, files_dict: FilesDict) -> bool:
"""Will execute code, then check if the code has the desired proprty."""
check_language(eval_d)
source_body = files_dict[eval_d["source_file"]]
exec(source_body)
class_ref = locals().get(eval_d["class_name"])
ob = class_ref()
return hasattr(ob, eval_d["property_name"])
def run_code_class_has_property_w_value(eval_d: dict, files_dict: FilesDict) -> bool:
"""Will execute code, then check if the code has the desired proprty."""
check_language(eval_d)
source_body = files_dict[eval_d["source_file"]]
exec(source_body)
class_ref = locals().get(eval_d["class_name"])
ob = class_ref()
assert hasattr(ob, eval_d["property_name"])
return getattr(ob, eval_d["property_name"]) == eval_d["expected_value"]
def run_code_eval_function(eval_d: dict, files_dict: FilesDict) -> bool:
"""Similar to run_code_class_has_property() except is evaluates a function call."""
check_language(eval_d)
source_body = files_dict[eval_d["source_file"]]
exec(source_body)
function_ref = globals().get(eval_d["function_name"])
# TODO: add the ability to have function arguments
return function_ref() == eval_d["expected_value"]
def check_evaluation_component(eval_d: dict, files_dict: FilesDict) -> bool:
"""Switch on evaluation components"""
test_type = eval_d.get("type")
if test_type == "assert_exists_in_source_code":
return assert_exists_in_source_code(eval_d, files_dict)
elif test_type == "run_code_class_has_property":
return run_code_class_has_property(eval_d, files_dict)
elif test_type == "run_code_class_has_property_w_value":
return run_code_class_has_property_w_value(eval_d, files_dict)
elif test_type == "run_code_eval_function":
return run_code_eval_function(eval_d, files_dict)
# The following are for new code
else:
raise Exception(f"Test type '{test_type}' is not recognized.")
|