import glob import json import os from dataclasses import dataclass from src.display.formatting import make_hyperlink from src.display.utils import AutoEvalColumn @dataclass class EvalResult: """Represents one full evaluation. Built from a combination of the result and request file for a given run.""" method_name: str method_url: str model_name: str model_url: str with_hint: bool attempts: int fast_pass_count: int full_pass_count: int full_pass_count_crash: int full_pass_count_hang: int full_pass_count_miscompilation: int build_count: int build_failure_count: int mttr: float sample_count: float fixed_bug_ids: list[str] fixed_bug_ids_fast: list[str] @classmethod def init_from_json_file(self, json_filepath): """Inits the result from the specific model result file""" with open(json_filepath) as fp: data = json.load(fp) method_name = data.get("method_name", "") method_url = data.get("method_url", "") model_name = data.get("base_model_name", "") model_url = data.get("base_model_url", "") with_hint = data.get("with_hint", False) fixes = data.get("fixes", []) attempts = len(fixes) fast_pass_count = 0 full_pass_count = 0 full_pass_count_cat = {} build_count = 0 build_failure_count = 0 ttr_sum = 0 fixed_bug_ids = [] fixed_bug_ids_fast = [] sample_count = 0 for fix in fixes: bug_type = fix.get("bug_type", "") if fix.get("fast_check_pass", False): fast_pass_count += 1 fixed_bug_ids_fast.append(fix.get("bug_id", "")) if fix.get("full_check_pass", False): full_pass_count += 1 full_pass_count_cat[bug_type] = full_pass_count_cat.get(bug_type, 0) + 1 ttr_sum += fix.get("wall_time", 0) fixed_bug_ids.append(fix.get("bug_id", "")) sample_count += fix.get("fast_check_count", 0) + fix.get("full_check_count", 0) build_count += fix.get("build_count", 0) build_failure_count += fix.get("build_failure_count", 0) return self( method_name=method_name, method_url=method_url, model_name=model_name, model_url=model_url, with_hint=with_hint, attempts=attempts, fast_pass_count=fast_pass_count, full_pass_count=full_pass_count, full_pass_count_crash=full_pass_count_cat.get("crash", 0), full_pass_count_hang=full_pass_count_cat.get("hang", 0), full_pass_count_miscompilation=full_pass_count_cat.get("miscompilation", 0), build_count=build_count, build_failure_count=build_failure_count, mttr=round(ttr_sum / full_pass_count / 60, 1) if full_pass_count > 0 else 0, fixed_bug_ids=fixed_bug_ids, fixed_bug_ids_fast=fixed_bug_ids_fast, sample_count=round(sample_count / full_pass_count, 1) if full_pass_count > 0 else 0, ) def to_dict(self, total_issues): """Converts the Eval Result to a dict compatible with our dataframe display""" data_dict = { AutoEvalColumn.method_name.name: make_hyperlink(self.method_url, self.method_name), AutoEvalColumn.model_name.name: make_hyperlink(self.model_url, self.model_name), AutoEvalColumn.with_hint.name: "w/ hint" if self.with_hint else "w/o hint", AutoEvalColumn.score.name: round(self.full_pass_count * 100.0 / total_issues, 1), AutoEvalColumn.attempts.name: self.attempts, AutoEvalColumn.fast_pass_count.name: self.fast_pass_count, AutoEvalColumn.full_pass_count.name: self.full_pass_count, AutoEvalColumn.full_pass_count_crash.name: self.full_pass_count_crash, AutoEvalColumn.full_pass_count_hang.name: self.full_pass_count_hang, AutoEvalColumn.full_pass_count_miscompilation.name: self.full_pass_count_miscompilation, AutoEvalColumn.build_success_rate.name: round( (self.build_count - self.build_failure_count) * 100.0 / self.build_count, 1 ), AutoEvalColumn.mttr.name: self.mttr, "fixed_bug_ids": self.fixed_bug_ids, "fixed_bug_ids_fast": self.fixed_bug_ids_fast, "method_id": self.method_name + "(" + self.model_name + ")", AutoEvalColumn.sample_count.name: self.sample_count, } return data_dict def get_raw_eval_results(requests_path: str) -> list[EvalResult]: """From the path of the results folder root, extract all needed info for results""" results = [] for root, _, files in os.walk(requests_path): for file in files: if file.endswith(".json"): results.append(EvalResult.init_from_json_file(os.path.join(root, file))) return results