Update
Browse files- src/display/utils.py +1 -0
- src/leaderboard/read_evals.py +5 -0
src/display/utils.py
CHANGED
@@ -44,6 +44,7 @@ auto_eval_column_dict.append(
|
|
44 |
["build_success_rate", ColumnContent, ColumnContent("Build Success Rate (%)", "number", False)]
|
45 |
)
|
46 |
auto_eval_column_dict.append(["mttr", ColumnContent, ColumnContent("MTTR (min)", "number", True)])
|
|
|
47 |
|
48 |
# We use make dataclass to dynamically fill the scores from Tasks
|
49 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
|
|
44 |
["build_success_rate", ColumnContent, ColumnContent("Build Success Rate (%)", "number", False)]
|
45 |
)
|
46 |
auto_eval_column_dict.append(["mttr", ColumnContent, ColumnContent("MTTR (min)", "number", True)])
|
47 |
+
auto_eval_column_dict.append(["sample_count", ColumnContent, ColumnContent("Average Sample Count", "number", True)])
|
48 |
|
49 |
# We use make dataclass to dynamically fill the scores from Tasks
|
50 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
src/leaderboard/read_evals.py
CHANGED
@@ -25,6 +25,7 @@ class EvalResult:
|
|
25 |
build_count: int
|
26 |
build_failure_count: int
|
27 |
mttr: float
|
|
|
28 |
fixed_bug_ids: list[str]
|
29 |
|
30 |
@classmethod
|
@@ -47,6 +48,7 @@ class EvalResult:
|
|
47 |
build_failure_count = 0
|
48 |
ttr_sum = 0
|
49 |
fixed_bug_ids = []
|
|
|
50 |
for fix in fixes:
|
51 |
bug_type = fix.get("bug_type", "")
|
52 |
if fix.get("fast_check_pass", False):
|
@@ -56,6 +58,7 @@ class EvalResult:
|
|
56 |
full_pass_count_cat[bug_type] = full_pass_count_cat.get(bug_type, 0) + 1
|
57 |
ttr_sum += fix.get("wall_time", 0)
|
58 |
fixed_bug_ids.append(fix.get("bug_id", ""))
|
|
|
59 |
build_count += fix.get("build_count", 0)
|
60 |
build_failure_count += fix.get("build_failure_count", 0)
|
61 |
|
@@ -75,6 +78,7 @@ class EvalResult:
|
|
75 |
build_failure_count=build_failure_count,
|
76 |
mttr=round(ttr_sum / full_pass_count / 60, 1) if full_pass_count > 0 else 0,
|
77 |
fixed_bug_ids=fixed_bug_ids,
|
|
|
78 |
)
|
79 |
|
80 |
def to_dict(self, total_issues):
|
@@ -96,6 +100,7 @@ class EvalResult:
|
|
96 |
AutoEvalColumn.mttr.name: self.mttr,
|
97 |
"fixed_bug_ids": self.fixed_bug_ids,
|
98 |
"method_id": self.method_name + "(" + self.model_name + ")",
|
|
|
99 |
}
|
100 |
|
101 |
return data_dict
|
|
|
25 |
build_count: int
|
26 |
build_failure_count: int
|
27 |
mttr: float
|
28 |
+
sample_count: float
|
29 |
fixed_bug_ids: list[str]
|
30 |
|
31 |
@classmethod
|
|
|
48 |
build_failure_count = 0
|
49 |
ttr_sum = 0
|
50 |
fixed_bug_ids = []
|
51 |
+
sample_count = 0
|
52 |
for fix in fixes:
|
53 |
bug_type = fix.get("bug_type", "")
|
54 |
if fix.get("fast_check_pass", False):
|
|
|
58 |
full_pass_count_cat[bug_type] = full_pass_count_cat.get(bug_type, 0) + 1
|
59 |
ttr_sum += fix.get("wall_time", 0)
|
60 |
fixed_bug_ids.append(fix.get("bug_id", ""))
|
61 |
+
sample_count += fix.get("fast_check_count", 0) + fix.get("full_check_count", 0)
|
62 |
build_count += fix.get("build_count", 0)
|
63 |
build_failure_count += fix.get("build_failure_count", 0)
|
64 |
|
|
|
78 |
build_failure_count=build_failure_count,
|
79 |
mttr=round(ttr_sum / full_pass_count / 60, 1) if full_pass_count > 0 else 0,
|
80 |
fixed_bug_ids=fixed_bug_ids,
|
81 |
+
sample_count=round(sample_count / full_pass_count, 1) if full_pass_count > 0 else 0,
|
82 |
)
|
83 |
|
84 |
def to_dict(self, total_issues):
|
|
|
100 |
AutoEvalColumn.mttr.name: self.mttr,
|
101 |
"fixed_bug_ids": self.fixed_bug_ids,
|
102 |
"method_id": self.method_name + "(" + self.model_name + ")",
|
103 |
+
AutoEvalColumn.sample_count.name: self.sample_count,
|
104 |
}
|
105 |
|
106 |
return data_dict
|