|
import json |
|
import os |
|
|
|
import numpy as np |
|
|
|
from evalplus.data import get_human_eval_plus, get_human_eval_plus_inputs |
|
|
|
if __name__ == "__main__": |
|
import argparse |
|
|
|
parser = argparse.ArgumentParser() |
|
parser.add_argument("--root", type=str, default="/JawTitan/EvalPlus/humaneval") |
|
args = parser.parse_args() |
|
|
|
plus_inputs = get_human_eval_plus_inputs() |
|
problems = get_human_eval_plus().values() |
|
|
|
base_bvs = {} |
|
plus_bvs = {} |
|
id2idx = {} |
|
|
|
for i, problem in enumerate(problems): |
|
task_id = problem["task_id"] |
|
id2idx[task_id] = i |
|
base_bvs[task_id] = np.zeros(len(problem["base_input"]), dtype=bool) |
|
plus_bvs[task_id] = np.zeros(len(plus_inputs[task_id]), dtype=bool) |
|
|
|
for path in os.listdir(args.root): |
|
eval_json_path = os.path.join(args.root, path, "eval_results.json") |
|
if not os.path.isfile(eval_json_path) or not path[-1].isdigit(): |
|
print(f"skip {path}") |
|
continue |
|
res = json.load(open(eval_json_path, "r"))["eval"] |
|
|
|
for task_id, v in res.items(): |
|
for status, details in v["base"]: |
|
if details is None: |
|
continue |
|
fails = np.logical_not(details) |
|
base_bvs[task_id][: len(details)] = np.logical_xor( |
|
base_bvs[task_id][: len(details)], fails |
|
) |
|
for status, details in v["plus"]: |
|
if details is None: |
|
continue |
|
fails = np.logical_not(details) |
|
plus_bvs[task_id][: len(details)] = np.logical_xor( |
|
plus_bvs[task_id][: len(details)], fails |
|
) |
|
|
|
testsuite = [] |
|
|
|
new_sizes = [] |
|
for task_id, bbv in base_bvs.items(): |
|
new_inputs = [] |
|
idx = id2idx[task_id] |
|
for i in np.nonzero(bbv)[0]: |
|
new_inputs.append(problems[idx]["base_input"][i]) |
|
pbv = plus_bvs[task_id] |
|
for i in np.nonzero(pbv)[0]: |
|
new_inputs.append(plus_inputs[task_id][i]) |
|
testsuite.append({"task_id": task_id, "inputs": new_inputs}) |
|
print( |
|
task_id, f" org base {len(bbv)}; org plus {len(pbv)}; new {len(new_inputs)}" |
|
) |
|
new_sizes.append(len(new_inputs)) |
|
|
|
new_sizes = np.array(new_sizes) |
|
print(f"{new_sizes.mean() = }, {new_sizes.min() = }, {new_sizes.max() = }") |
|
|