dtcxzyw commited on
Commit
bad714d
·
unverified ·
1 Parent(s): a311ab2
src/display/utils.py CHANGED
@@ -44,6 +44,7 @@ auto_eval_column_dict.append(
44
  ["build_success_rate", ColumnContent, ColumnContent("Build Success Rate (%)", "number", False)]
45
  )
46
  auto_eval_column_dict.append(["mttr", ColumnContent, ColumnContent("MTTR (min)", "number", True)])
 
47
 
48
  # We use make dataclass to dynamically fill the scores from Tasks
49
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
 
44
  ["build_success_rate", ColumnContent, ColumnContent("Build Success Rate (%)", "number", False)]
45
  )
46
  auto_eval_column_dict.append(["mttr", ColumnContent, ColumnContent("MTTR (min)", "number", True)])
47
+ auto_eval_column_dict.append(["sample_count", ColumnContent, ColumnContent("Average Sample Count", "number", True)])
48
 
49
  # We use make dataclass to dynamically fill the scores from Tasks
50
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
src/leaderboard/read_evals.py CHANGED
@@ -25,6 +25,7 @@ class EvalResult:
25
  build_count: int
26
  build_failure_count: int
27
  mttr: float
 
28
  fixed_bug_ids: list[str]
29
 
30
  @classmethod
@@ -47,6 +48,7 @@ class EvalResult:
47
  build_failure_count = 0
48
  ttr_sum = 0
49
  fixed_bug_ids = []
 
50
  for fix in fixes:
51
  bug_type = fix.get("bug_type", "")
52
  if fix.get("fast_check_pass", False):
@@ -56,6 +58,7 @@ class EvalResult:
56
  full_pass_count_cat[bug_type] = full_pass_count_cat.get(bug_type, 0) + 1
57
  ttr_sum += fix.get("wall_time", 0)
58
  fixed_bug_ids.append(fix.get("bug_id", ""))
 
59
  build_count += fix.get("build_count", 0)
60
  build_failure_count += fix.get("build_failure_count", 0)
61
 
@@ -75,6 +78,7 @@ class EvalResult:
75
  build_failure_count=build_failure_count,
76
  mttr=round(ttr_sum / full_pass_count / 60, 1) if full_pass_count > 0 else 0,
77
  fixed_bug_ids=fixed_bug_ids,
 
78
  )
79
 
80
  def to_dict(self, total_issues):
@@ -96,6 +100,7 @@ class EvalResult:
96
  AutoEvalColumn.mttr.name: self.mttr,
97
  "fixed_bug_ids": self.fixed_bug_ids,
98
  "method_id": self.method_name + "(" + self.model_name + ")",
 
99
  }
100
 
101
  return data_dict
 
25
  build_count: int
26
  build_failure_count: int
27
  mttr: float
28
+ sample_count: float
29
  fixed_bug_ids: list[str]
30
 
31
  @classmethod
 
48
  build_failure_count = 0
49
  ttr_sum = 0
50
  fixed_bug_ids = []
51
+ sample_count = 0
52
  for fix in fixes:
53
  bug_type = fix.get("bug_type", "")
54
  if fix.get("fast_check_pass", False):
 
58
  full_pass_count_cat[bug_type] = full_pass_count_cat.get(bug_type, 0) + 1
59
  ttr_sum += fix.get("wall_time", 0)
60
  fixed_bug_ids.append(fix.get("bug_id", ""))
61
+ sample_count += fix.get("fast_check_count", 0) + fix.get("full_check_count", 0)
62
  build_count += fix.get("build_count", 0)
63
  build_failure_count += fix.get("build_failure_count", 0)
64
 
 
78
  build_failure_count=build_failure_count,
79
  mttr=round(ttr_sum / full_pass_count / 60, 1) if full_pass_count > 0 else 0,
80
  fixed_bug_ids=fixed_bug_ids,
81
+ sample_count=round(sample_count / full_pass_count, 1) if full_pass_count > 0 else 0,
82
  )
83
 
84
  def to_dict(self, total_issues):
 
100
  AutoEvalColumn.mttr.name: self.mttr,
101
  "fixed_bug_ids": self.fixed_bug_ids,
102
  "method_id": self.method_name + "(" + self.model_name + ")",
103
+ AutoEvalColumn.sample_count.name: self.sample_count,
104
  }
105
 
106
  return data_dict