File size: 1,712 Bytes
ca5fb3d 2718fde ca5fb3d 2718fde ca5fb3d 2718fde ca5fb3d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import json
import os
from collections import defaultdict
from refactor_eval_results import AGENTIC_LOG_MODEL_NAME_MAP, AGENTIC_TASKS
def main():
base_bm_input_path = "./base_benchmarking_logs"
agentic_bm_input_path = "/fs01/projects/aieng/public/inspect_evals/agentic_benchmarking_runs"
log_file_map = defaultdict()
for model_name in os.listdir(base_bm_input_path):
log_file_map[model_name] = defaultdict(str)
if os.path.isdir(os.path.join(base_bm_input_path, model_name)):
for task_log_file in os.listdir(os.path.join(base_bm_input_path, model_name)):
with open(os.path.join(base_bm_input_path, model_name, task_log_file), "r") as f:
result = json.load(f)
task_name = result["eval"]["task"].split("/")[-1]
log_file_map[model_name][task_name] = task_log_file
for model_name in AGENTIC_LOG_MODEL_NAME_MAP.keys():
log_file_path = os.path.join(agentic_bm_input_path, AGENTIC_LOG_MODEL_NAME_MAP[model_name])
if os.path.isdir(log_file_path):
for task in AGENTIC_TASKS:
for task_log_file in os.listdir(os.path.join(log_file_path, task)):
if task_log_file.endswith(".json"):
with open(os.path.join(log_file_path, task, task_log_file), "r") as f:
result = json.load(f)
task_name = result["eval"]["task"].split("/")[-1]
log_file_map[model_name][task_name] = task_log_file
with open("./inspect_log_file_names.json", "w") as f:
json.dump(log_file_map, f, indent=4)
if __name__ == "__main__":
main()
|