Update
Browse files- src/display/utils.py +2 -1
- src/leaderboard/read_evals.py +5 -0
src/display/utils.py
CHANGED
@@ -41,8 +41,9 @@ auto_eval_column_dict.append(
|
|
41 |
)
|
42 |
auto_eval_column_dict.append(["full_pass_count_hang", ColumnContent, ColumnContent("Repaired (Hang)", "number", True)])
|
43 |
auto_eval_column_dict.append(
|
44 |
-
["build_success_rate", ColumnContent, ColumnContent("Build Success Rate", "number", False)]
|
45 |
)
|
|
|
46 |
|
47 |
# We use make dataclass to dynamically fill the scores from Tasks
|
48 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
|
|
41 |
)
|
42 |
auto_eval_column_dict.append(["full_pass_count_hang", ColumnContent, ColumnContent("Repaired (Hang)", "number", True)])
|
43 |
auto_eval_column_dict.append(
|
44 |
+
["build_success_rate", ColumnContent, ColumnContent("Build Success Rate (%)", "number", False)]
|
45 |
)
|
46 |
+
auto_eval_column_dict.append(["mttr", ColumnContent, ColumnContent("MTTR (min)", "number", False)])
|
47 |
|
48 |
# We use make dataclass to dynamically fill the scores from Tasks
|
49 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
src/leaderboard/read_evals.py
CHANGED
@@ -24,6 +24,7 @@ class EvalResult:
|
|
24 |
full_pass_count_miscompilation: int
|
25 |
build_count: int
|
26 |
build_failure_count: int
|
|
|
27 |
|
28 |
@classmethod
|
29 |
def init_from_json_file(self, json_filepath):
|
@@ -43,6 +44,7 @@ class EvalResult:
|
|
43 |
full_pass_count_cat = {}
|
44 |
build_count = 0
|
45 |
build_failure_count = 0
|
|
|
46 |
for fix in fixes:
|
47 |
bug_type = fix.get("bug_type", "")
|
48 |
if fix.get("fast_check_pass", False):
|
@@ -50,6 +52,7 @@ class EvalResult:
|
|
50 |
if fix.get("full_check_pass", False):
|
51 |
full_pass_count += 1
|
52 |
full_pass_count_cat[bug_type] = full_pass_count_cat.get(bug_type, 0) + 1
|
|
|
53 |
build_count += fix.get("build_count", 0)
|
54 |
build_failure_count += fix.get("build_failure_count", 0)
|
55 |
|
@@ -67,6 +70,7 @@ class EvalResult:
|
|
67 |
full_pass_count_miscompilation=full_pass_count_cat.get("miscompilation", 0),
|
68 |
build_count=build_count,
|
69 |
build_failure_count=build_failure_count,
|
|
|
70 |
)
|
71 |
|
72 |
def to_dict(self, total_issues):
|
@@ -85,6 +89,7 @@ class EvalResult:
|
|
85 |
AutoEvalColumn.build_success_rate.name: round(
|
86 |
(self.build_count - self.build_failure_count) * 100.0 / self.build_count, 1
|
87 |
),
|
|
|
88 |
}
|
89 |
|
90 |
return data_dict
|
|
|
24 |
full_pass_count_miscompilation: int
|
25 |
build_count: int
|
26 |
build_failure_count: int
|
27 |
+
mttr: float
|
28 |
|
29 |
@classmethod
|
30 |
def init_from_json_file(self, json_filepath):
|
|
|
44 |
full_pass_count_cat = {}
|
45 |
build_count = 0
|
46 |
build_failure_count = 0
|
47 |
+
ttr_sum = 0
|
48 |
for fix in fixes:
|
49 |
bug_type = fix.get("bug_type", "")
|
50 |
if fix.get("fast_check_pass", False):
|
|
|
52 |
if fix.get("full_check_pass", False):
|
53 |
full_pass_count += 1
|
54 |
full_pass_count_cat[bug_type] = full_pass_count_cat.get(bug_type, 0) + 1
|
55 |
+
ttr_sum += fix.get("wall_time", 0)
|
56 |
build_count += fix.get("build_count", 0)
|
57 |
build_failure_count += fix.get("build_failure_count", 0)
|
58 |
|
|
|
70 |
full_pass_count_miscompilation=full_pass_count_cat.get("miscompilation", 0),
|
71 |
build_count=build_count,
|
72 |
build_failure_count=build_failure_count,
|
73 |
+
mttr=round(ttr_sum / full_pass_count) if full_pass_count > 0 else 0,
|
74 |
)
|
75 |
|
76 |
def to_dict(self, total_issues):
|
|
|
89 |
AutoEvalColumn.build_success_rate.name: round(
|
90 |
(self.build_count - self.build_failure_count) * 100.0 / self.build_count, 1
|
91 |
),
|
92 |
+
AutoEvalColumn.mttr.name: self.mttr,
|
93 |
}
|
94 |
|
95 |
return data_dict
|