dtcxzyw commited on
Commit
18d2712
·
unverified ·
1 Parent(s): ac176c3
src/display/utils.py CHANGED
@@ -40,6 +40,9 @@ auto_eval_column_dict.append(
40
  ["full_pass_count_miscompilation", ColumnContent, ColumnContent("Repaired (Miscompilation)", "number", True)]
41
  )
42
  auto_eval_column_dict.append(["full_pass_count_hang", ColumnContent, ColumnContent("Repaired (Hang)", "number", True)])
 
 
 
43
 
44
  # We use make dataclass to dynamically fill the scores from Tasks
45
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
 
40
  ["full_pass_count_miscompilation", ColumnContent, ColumnContent("Repaired (Miscompilation)", "number", True)]
41
  )
42
  auto_eval_column_dict.append(["full_pass_count_hang", ColumnContent, ColumnContent("Repaired (Hang)", "number", True)])
43
+ auto_eval_column_dict.append(
44
+ ["build_success_rate", ColumnContent, ColumnContent("Build Success Rate", "number", True)]
45
+ )
46
 
47
  # We use make dataclass to dynamically fill the scores from Tasks
48
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
src/leaderboard/read_evals.py CHANGED
@@ -22,6 +22,8 @@ class EvalResult:
22
  full_pass_count_crash: int
23
  full_pass_count_hang: int
24
  full_pass_count_miscompilation: int
 
 
25
 
26
  @classmethod
27
  def init_from_json_file(self, json_filepath):
@@ -39,6 +41,8 @@ class EvalResult:
39
  fast_pass_count = 0
40
  full_pass_count = 0
41
  full_pass_count_cat = {}
 
 
42
  for fix in fixes:
43
  bug_type = fix.get("bug_type", "")
44
  if fix.get("fast_check_pass", False):
@@ -46,6 +50,8 @@ class EvalResult:
46
  if fix.get("full_check_pass", False):
47
  full_pass_count += 1
48
  full_pass_count_cat[bug_type] = full_pass_count_cat.get(bug_type, 0) + 1
 
 
49
 
50
  return self(
51
  method_name=method_name,
@@ -59,6 +65,8 @@ class EvalResult:
59
  full_pass_count_crash=full_pass_count_cat.get("crash", 0),
60
  full_pass_count_hang=full_pass_count_cat.get("hang", 0),
61
  full_pass_count_miscompilation=full_pass_count_cat.get("miscompilation", 0),
 
 
62
  )
63
 
64
  def to_dict(self, total_issues):
@@ -74,6 +82,9 @@ class EvalResult:
74
  AutoEvalColumn.full_pass_count_crash.name: self.full_pass_count_crash,
75
  AutoEvalColumn.full_pass_count_hang.name: self.full_pass_count_hang,
76
  AutoEvalColumn.full_pass_count_miscompilation.name: self.full_pass_count_miscompilation,
 
 
 
77
  }
78
 
79
  return data_dict
 
22
  full_pass_count_crash: int
23
  full_pass_count_hang: int
24
  full_pass_count_miscompilation: int
25
+ build_count: int
26
+ build_failure_count: int
27
 
28
  @classmethod
29
  def init_from_json_file(self, json_filepath):
 
41
  fast_pass_count = 0
42
  full_pass_count = 0
43
  full_pass_count_cat = {}
44
+ build_count = 0
45
+ build_failure_count = 0
46
  for fix in fixes:
47
  bug_type = fix.get("bug_type", "")
48
  if fix.get("fast_check_pass", False):
 
50
  if fix.get("full_check_pass", False):
51
  full_pass_count += 1
52
  full_pass_count_cat[bug_type] = full_pass_count_cat.get(bug_type, 0) + 1
53
+ build_count += fix.get("build_count", 0)
54
+ build_failure_count += fix.get("build_failure_count", 0)
55
 
56
  return self(
57
  method_name=method_name,
 
65
  full_pass_count_crash=full_pass_count_cat.get("crash", 0),
66
  full_pass_count_hang=full_pass_count_cat.get("hang", 0),
67
  full_pass_count_miscompilation=full_pass_count_cat.get("miscompilation", 0),
68
+ build_count=build_count,
69
+ build_failure_count=build_failure_count,
70
  )
71
 
72
  def to_dict(self, total_issues):
 
82
  AutoEvalColumn.full_pass_count_crash.name: self.full_pass_count_crash,
83
  AutoEvalColumn.full_pass_count_hang.name: self.full_pass_count_hang,
84
  AutoEvalColumn.full_pass_count_miscompilation.name: self.full_pass_count_miscompilation,
85
+ AutoEvalColumn.build_success_rate.name: round(
86
+ (self.build_count - self.build_failure_count) * 100.0 / self.build_count, 1
87
+ ),
88
  }
89
 
90
  return data_dict