Spaces:

dtcxzyw
/

llvm-apr-benchmark-leaderboard

Running

dtcxzyw commited on Feb 10

Commit

bad714d

unverified ·

1 Parent(s): a311ab2

Update

Files changed (2) hide show

src/display/utils.py CHANGED Viewed

@@ -44,6 +44,7 @@ auto_eval_column_dict.append(
     ["build_success_rate", ColumnContent, ColumnContent("Build Success Rate (%)", "number", False)]
 )
 auto_eval_column_dict.append(["mttr", ColumnContent, ColumnContent("MTTR (min)", "number", True)])
 # We use make dataclass to dynamically fill the scores from Tasks
 AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)

     ["build_success_rate", ColumnContent, ColumnContent("Build Success Rate (%)", "number", False)]
 )
 auto_eval_column_dict.append(["mttr", ColumnContent, ColumnContent("MTTR (min)", "number", True)])
+auto_eval_column_dict.append(["sample_count", ColumnContent, ColumnContent("Average Sample Count", "number", True)])
 # We use make dataclass to dynamically fill the scores from Tasks
 AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)

src/leaderboard/read_evals.py CHANGED Viewed

@@ -25,6 +25,7 @@ class EvalResult:
     build_count: int
     build_failure_count: int
     mttr: float
     fixed_bug_ids: list[str]
     @classmethod
@@ -47,6 +48,7 @@ class EvalResult:
         build_failure_count = 0
         ttr_sum = 0
         fixed_bug_ids = []
         for fix in fixes:
             bug_type = fix.get("bug_type", "")
             if fix.get("fast_check_pass", False):
@@ -56,6 +58,7 @@ class EvalResult:
                 full_pass_count_cat[bug_type] = full_pass_count_cat.get(bug_type, 0) + 1
                 ttr_sum += fix.get("wall_time", 0)
                 fixed_bug_ids.append(fix.get("bug_id", ""))
             build_count += fix.get("build_count", 0)
             build_failure_count += fix.get("build_failure_count", 0)
@@ -75,6 +78,7 @@ class EvalResult:
             build_failure_count=build_failure_count,
             mttr=round(ttr_sum / full_pass_count / 60, 1) if full_pass_count > 0 else 0,
             fixed_bug_ids=fixed_bug_ids,
         )
     def to_dict(self, total_issues):
@@ -96,6 +100,7 @@ class EvalResult:
             AutoEvalColumn.mttr.name: self.mttr,
             "fixed_bug_ids": self.fixed_bug_ids,
             "method_id": self.method_name + "(" + self.model_name + ")",
         }
         return data_dict

     build_count: int
     build_failure_count: int
     mttr: float
+    sample_count: float
     fixed_bug_ids: list[str]
     @classmethod
         build_failure_count = 0
         ttr_sum = 0
         fixed_bug_ids = []
+        sample_count = 0
         for fix in fixes:
             bug_type = fix.get("bug_type", "")
             if fix.get("fast_check_pass", False):
                 full_pass_count_cat[bug_type] = full_pass_count_cat.get(bug_type, 0) + 1
                 ttr_sum += fix.get("wall_time", 0)
                 fixed_bug_ids.append(fix.get("bug_id", ""))
+                sample_count += fix.get("fast_check_count", 0) + fix.get("full_check_count", 0)
             build_count += fix.get("build_count", 0)
             build_failure_count += fix.get("build_failure_count", 0)
             build_failure_count=build_failure_count,
             mttr=round(ttr_sum / full_pass_count / 60, 1) if full_pass_count > 0 else 0,
             fixed_bug_ids=fixed_bug_ids,
+            sample_count=round(sample_count / full_pass_count, 1) if full_pass_count > 0 else 0,
         )
     def to_dict(self, total_issues):
             AutoEvalColumn.mttr.name: self.mttr,
             "fixed_bug_ids": self.fixed_bug_ids,
             "method_id": self.method_name + "(" + self.model_name + ")",
+            AutoEvalColumn.sample_count.name: self.sample_count,
         }
         return data_dict