dtcxzyw commited on
Commit
bd4573d
·
unverified ·
1 Parent(s): 8711098
src/display/utils.py CHANGED
@@ -41,8 +41,9 @@ auto_eval_column_dict.append(
41
  )
42
  auto_eval_column_dict.append(["full_pass_count_hang", ColumnContent, ColumnContent("Repaired (Hang)", "number", True)])
43
  auto_eval_column_dict.append(
44
- ["build_success_rate", ColumnContent, ColumnContent("Build Success Rate", "number", False)]
45
  )
 
46
 
47
  # We use make dataclass to dynamically fill the scores from Tasks
48
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
 
41
  )
42
  auto_eval_column_dict.append(["full_pass_count_hang", ColumnContent, ColumnContent("Repaired (Hang)", "number", True)])
43
  auto_eval_column_dict.append(
44
+ ["build_success_rate", ColumnContent, ColumnContent("Build Success Rate (%)", "number", False)]
45
  )
46
+ auto_eval_column_dict.append(["mttr", ColumnContent, ColumnContent("MTTR (min)", "number", False)])
47
 
48
  # We use make dataclass to dynamically fill the scores from Tasks
49
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
src/leaderboard/read_evals.py CHANGED
@@ -24,6 +24,7 @@ class EvalResult:
24
  full_pass_count_miscompilation: int
25
  build_count: int
26
  build_failure_count: int
 
27
 
28
  @classmethod
29
  def init_from_json_file(self, json_filepath):
@@ -43,6 +44,7 @@ class EvalResult:
43
  full_pass_count_cat = {}
44
  build_count = 0
45
  build_failure_count = 0
 
46
  for fix in fixes:
47
  bug_type = fix.get("bug_type", "")
48
  if fix.get("fast_check_pass", False):
@@ -50,6 +52,7 @@ class EvalResult:
50
  if fix.get("full_check_pass", False):
51
  full_pass_count += 1
52
  full_pass_count_cat[bug_type] = full_pass_count_cat.get(bug_type, 0) + 1
 
53
  build_count += fix.get("build_count", 0)
54
  build_failure_count += fix.get("build_failure_count", 0)
55
 
@@ -67,6 +70,7 @@ class EvalResult:
67
  full_pass_count_miscompilation=full_pass_count_cat.get("miscompilation", 0),
68
  build_count=build_count,
69
  build_failure_count=build_failure_count,
 
70
  )
71
 
72
  def to_dict(self, total_issues):
@@ -85,6 +89,7 @@ class EvalResult:
85
  AutoEvalColumn.build_success_rate.name: round(
86
  (self.build_count - self.build_failure_count) * 100.0 / self.build_count, 1
87
  ),
 
88
  }
89
 
90
  return data_dict
 
24
  full_pass_count_miscompilation: int
25
  build_count: int
26
  build_failure_count: int
27
+ mttr: float
28
 
29
  @classmethod
30
  def init_from_json_file(self, json_filepath):
 
44
  full_pass_count_cat = {}
45
  build_count = 0
46
  build_failure_count = 0
47
+ ttr_sum = 0
48
  for fix in fixes:
49
  bug_type = fix.get("bug_type", "")
50
  if fix.get("fast_check_pass", False):
 
52
  if fix.get("full_check_pass", False):
53
  full_pass_count += 1
54
  full_pass_count_cat[bug_type] = full_pass_count_cat.get(bug_type, 0) + 1
55
+ ttr_sum += fix.get("wall_time", 0)
56
  build_count += fix.get("build_count", 0)
57
  build_failure_count += fix.get("build_failure_count", 0)
58
 
 
70
  full_pass_count_miscompilation=full_pass_count_cat.get("miscompilation", 0),
71
  build_count=build_count,
72
  build_failure_count=build_failure_count,
73
+ mttr=round(ttr_sum / full_pass_count) if full_pass_count > 0 else 0,
74
  )
75
 
76
  def to_dict(self, total_issues):
 
89
  AutoEvalColumn.build_success_rate.name: round(
90
  (self.build_count - self.build_failure_count) * 100.0 / self.build_count, 1
91
  ),
92
+ AutoEvalColumn.mttr.name: self.mttr,
93
  }
94
 
95
  return data_dict