Spaces:

dtcxzyw
/

llvm-apr-benchmark-leaderboard

Restarting

dtcxzyw commited on Feb 9

Commit

bd4573d

unverified ·

1 Parent(s): 8711098

Update

Files changed (2) hide show

src/display/utils.py CHANGED Viewed

@@ -41,8 +41,9 @@ auto_eval_column_dict.append(
 )
 auto_eval_column_dict.append(["full_pass_count_hang", ColumnContent, ColumnContent("Repaired (Hang)", "number", True)])
 auto_eval_column_dict.append(
-    ["build_success_rate", ColumnContent, ColumnContent("Build Success Rate", "number", False)]
 )
 # We use make dataclass to dynamically fill the scores from Tasks
 AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)

 )
 auto_eval_column_dict.append(["full_pass_count_hang", ColumnContent, ColumnContent("Repaired (Hang)", "number", True)])
 auto_eval_column_dict.append(
+    ["build_success_rate", ColumnContent, ColumnContent("Build Success Rate (%)", "number", False)]
 )
+auto_eval_column_dict.append(["mttr", ColumnContent, ColumnContent("MTTR (min)", "number", False)])
 # We use make dataclass to dynamically fill the scores from Tasks
 AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)

src/leaderboard/read_evals.py CHANGED Viewed

@@ -24,6 +24,7 @@ class EvalResult:
     full_pass_count_miscompilation: int
     build_count: int
     build_failure_count: int
     @classmethod
     def init_from_json_file(self, json_filepath):
@@ -43,6 +44,7 @@ class EvalResult:
         full_pass_count_cat = {}
         build_count = 0
         build_failure_count = 0
         for fix in fixes:
             bug_type = fix.get("bug_type", "")
             if fix.get("fast_check_pass", False):
@@ -50,6 +52,7 @@ class EvalResult:
             if fix.get("full_check_pass", False):
                 full_pass_count += 1
                 full_pass_count_cat[bug_type] = full_pass_count_cat.get(bug_type, 0) + 1
             build_count += fix.get("build_count", 0)
             build_failure_count += fix.get("build_failure_count", 0)
@@ -67,6 +70,7 @@ class EvalResult:
             full_pass_count_miscompilation=full_pass_count_cat.get("miscompilation", 0),
             build_count=build_count,
             build_failure_count=build_failure_count,
         )
     def to_dict(self, total_issues):
@@ -85,6 +89,7 @@ class EvalResult:
             AutoEvalColumn.build_success_rate.name: round(
                 (self.build_count - self.build_failure_count) * 100.0 / self.build_count, 1
             ),
         }
         return data_dict

     full_pass_count_miscompilation: int
     build_count: int
     build_failure_count: int
+    mttr: float
     @classmethod
     def init_from_json_file(self, json_filepath):
         full_pass_count_cat = {}
         build_count = 0
         build_failure_count = 0
+        ttr_sum = 0
         for fix in fixes:
             bug_type = fix.get("bug_type", "")
             if fix.get("fast_check_pass", False):
             if fix.get("full_check_pass", False):
                 full_pass_count += 1
                 full_pass_count_cat[bug_type] = full_pass_count_cat.get(bug_type, 0) + 1
+                ttr_sum += fix.get("wall_time", 0)
             build_count += fix.get("build_count", 0)
             build_failure_count += fix.get("build_failure_count", 0)
             full_pass_count_miscompilation=full_pass_count_cat.get("miscompilation", 0),
             build_count=build_count,
             build_failure_count=build_failure_count,
+            mttr=round(ttr_sum / full_pass_count) if full_pass_count > 0 else 0,
         )
     def to_dict(self, total_issues):
             AutoEvalColumn.build_success_rate.name: round(
                 (self.build_count - self.build_failure_count) * 100.0 / self.build_count, 1
             ),
+            AutoEvalColumn.mttr.name: self.mttr,
         }
         return data_dict