Spaces:

dtcxzyw
/

llvm-apr-benchmark-leaderboard

Running

File size: 5,041 Bytes

737fa2d
 
 
 
 
1a8bc79
 
737fa2d
 
 
 
1a8bc79
 
 
 
 
 
 
 
 
 
 
 
 
18d2712
 
bd4573d
bad714d
22a227d
913f726
737fa2d
 
 
 
 
 
 
1a8bc79
 
ba9a086
 
1a8bc79
 
 
 
 
 
18d2712
 
bd4573d
22a227d
913f726
bad714d
1a8bc79
 
 
 
913f726
1a8bc79
 
 
bd4573d
22a227d
bad714d
18d2712
 
737fa2d
 
1a8bc79
 
 
 
 
 
 
 
 
 
 
18d2712
 
c629528
22a227d
913f726
bad714d
737fa2d
 
ac176c3
737fa2d
 
6efc451
 
35eacb4
ac176c3
1a8bc79
 
 
 
 
 
18d2712
 
 
bd4573d
03fbd41
913f726
22a227d
bad714d
737fa2d
 
 
 
 
1a8bc79
737fa2d
1a8bc79
737fa2d
1a8bc79
737fa2d
92c610a
 
737fa2d

import glob
import json
import os
from dataclasses import dataclass

from src.display.formatting import make_hyperlink
from src.display.utils import AutoEvalColumn


@dataclass
class EvalResult:
    """Represents one full evaluation. Built from a combination of the result and request file for a given run."""

    method_name: str
    method_url: str
    model_name: str
    model_url: str
    with_hint: bool
    attempts: int
    fast_pass_count: int
    full_pass_count: int
    full_pass_count_crash: int
    full_pass_count_hang: int
    full_pass_count_miscompilation: int
    build_count: int
    build_failure_count: int
    mttr: float
    sample_count: float
    fixed_bug_ids: list[str]
    fixed_bug_ids_fast: list[str]

    @classmethod
    def init_from_json_file(self, json_filepath):
        """Inits the result from the specific model result file"""
        with open(json_filepath) as fp:
            data = json.load(fp)

        method_name = data.get("method_name", "")
        method_url = data.get("method_url", "")
        model_name = data.get("base_model_name", "")
        model_url = data.get("base_model_url", "")
        with_hint = data.get("with_hint", False)
        fixes = data.get("fixes", [])
        attempts = len(fixes)
        fast_pass_count = 0
        full_pass_count = 0
        full_pass_count_cat = {}
        build_count = 0
        build_failure_count = 0
        ttr_sum = 0
        fixed_bug_ids = []
        fixed_bug_ids_fast = []
        sample_count = 0
        for fix in fixes:
            bug_type = fix.get("bug_type", "")
            if fix.get("fast_check_pass", False):
                fast_pass_count += 1
                fixed_bug_ids_fast.append(fix.get("bug_id", ""))
            if fix.get("full_check_pass", False):
                full_pass_count += 1
                full_pass_count_cat[bug_type] = full_pass_count_cat.get(bug_type, 0) + 1
                ttr_sum += fix.get("wall_time", 0)
                fixed_bug_ids.append(fix.get("bug_id", ""))
                sample_count += fix.get("fast_check_count", 0) + fix.get("full_check_count", 0)
            build_count += fix.get("build_count", 0)
            build_failure_count += fix.get("build_failure_count", 0)

        return self(
            method_name=method_name,
            method_url=method_url,
            model_name=model_name,
            model_url=model_url,
            with_hint=with_hint,
            attempts=attempts,
            fast_pass_count=fast_pass_count,
            full_pass_count=full_pass_count,
            full_pass_count_crash=full_pass_count_cat.get("crash", 0),
            full_pass_count_hang=full_pass_count_cat.get("hang", 0),
            full_pass_count_miscompilation=full_pass_count_cat.get("miscompilation", 0),
            build_count=build_count,
            build_failure_count=build_failure_count,
            mttr=round(ttr_sum / full_pass_count / 60, 1) if full_pass_count > 0 else 0,
            fixed_bug_ids=fixed_bug_ids,
            fixed_bug_ids_fast=fixed_bug_ids_fast,
            sample_count=round(sample_count / full_pass_count, 1) if full_pass_count > 0 else 0,
        )

    def to_dict(self, total_issues):
        """Converts the Eval Result to a dict compatible with our dataframe display"""
        data_dict = {
            AutoEvalColumn.method_name.name: make_hyperlink(self.method_url, self.method_name),
            AutoEvalColumn.model_name.name: make_hyperlink(self.model_url, self.model_name),
            AutoEvalColumn.with_hint.name: "w/ hint" if self.with_hint else "w/o hint",
            AutoEvalColumn.score.name: round(self.full_pass_count * 100.0 / total_issues, 1),
            AutoEvalColumn.attempts.name: self.attempts,
            AutoEvalColumn.fast_pass_count.name: self.fast_pass_count,
            AutoEvalColumn.full_pass_count.name: self.full_pass_count,
            AutoEvalColumn.full_pass_count_crash.name: self.full_pass_count_crash,
            AutoEvalColumn.full_pass_count_hang.name: self.full_pass_count_hang,
            AutoEvalColumn.full_pass_count_miscompilation.name: self.full_pass_count_miscompilation,
            AutoEvalColumn.build_success_rate.name: round(
                (self.build_count - self.build_failure_count) * 100.0 / self.build_count, 1
            ),
            AutoEvalColumn.mttr.name: self.mttr,
            "fixed_bug_ids": self.fixed_bug_ids,
            "fixed_bug_ids_fast": self.fixed_bug_ids_fast,
            "method_id": self.method_name + "(" + self.model_name + ")",
            AutoEvalColumn.sample_count.name: self.sample_count,
        }

        return data_dict


def get_raw_eval_results(requests_path: str) -> list[EvalResult]:
    """From the path of the results folder root, extract all needed info for results"""
    results = []

    for root, _, files in os.walk(requests_path):
        for file in files:
            if file.endswith(".json"):
                results.append(EvalResult.init_from_json_file(os.path.join(root, file)))

    return results