dtcxzyw commited on
Commit
c1ed8f5
·
unverified ·
1 Parent(s): 345bfd4
Files changed (2) hide show
  1. app.py +22 -3
  2. src/leaderboard/read_evals.py +10 -0
app.py CHANGED
@@ -83,6 +83,7 @@ LEADERBOARD_DF = get_leaderboard_df(EVAL_REQUESTS_PATH, total_issues)
83
  fixed_bug_ids = set()
84
  fixed_bug_ids_count = dict()
85
  fixed_bug_ids_fast = set()
 
86
  for row in LEADERBOARD_DF.itertuples():
87
  print(row)
88
  model_cnt += 1
@@ -94,6 +95,7 @@ for row in LEADERBOARD_DF.itertuples():
94
  fixed_bug_ids_count[fix] = fixed_bug_ids_count.get(fix, 0) + 1
95
  for fix in row.fixed_bug_ids_fast:
96
  fixed_bug_ids_fast.add(fix)
 
97
  unique_bug_ids = set([bug_id for bug_id, count in fixed_bug_ids_count.items() if count == 1])
98
  timeline_bugtypes = []
99
  for bug_id in timeline_bugids:
@@ -212,10 +214,27 @@ with demo:
212
  fixed_bug_title_id_pairs = [(bug_id_to_title[bug_id], bug_id) for bug_id in sorted(fixed_bug_ids)]
213
  inspect_issue = gr.Dropdown(fixed_bug_title_id_pairs, label="Inspct Issue", interactive=True)
214
  golden_patch = gr.Code("", language="cpp", label="Golden Patch")
 
 
215
  inspect_issue.change(
216
- fn=lambda bug_id: bug_id_to_patch.get(bug_id, f"Not Available (bug_id = {bug_id})"),
217
- inputs=inspect_issue,
218
- outputs=golden_patch,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  )
220
 
221
  with gr.TabItem("🚀 Submission", elem_id="llm-benchmark-tab-table", id=1):
 
83
  fixed_bug_ids = set()
84
  fixed_bug_ids_count = dict()
85
  fixed_bug_ids_fast = set()
86
+ bug_ids_to_patches = dict()
87
  for row in LEADERBOARD_DF.itertuples():
88
  print(row)
89
  model_cnt += 1
 
95
  fixed_bug_ids_count[fix] = fixed_bug_ids_count.get(fix, 0) + 1
96
  for fix in row.fixed_bug_ids_fast:
97
  fixed_bug_ids_fast.add(fix)
98
+ bug_ids_to_patches[row.method_id] = row.patches
99
  unique_bug_ids = set([bug_id for bug_id, count in fixed_bug_ids_count.items() if count == 1])
100
  timeline_bugtypes = []
101
  for bug_id in timeline_bugids:
 
214
  fixed_bug_title_id_pairs = [(bug_id_to_title[bug_id], bug_id) for bug_id in sorted(fixed_bug_ids)]
215
  inspect_issue = gr.Dropdown(fixed_bug_title_id_pairs, label="Inspct Issue", interactive=True)
216
  golden_patch = gr.Code("", language="cpp", label="Golden Patch")
217
+ inspect_fix = gr.Dropdown(list(bug_ids_to_patches.keys()), label="Method(Model)", interactive=True)
218
+ method_patch = gr.Code("", language="cpp", label="APR Patch")
219
  inspect_issue.change(
220
+ fn=lambda bug_id, method: (
221
+ bug_id_to_patch.get(bug_id, f"Not Available (bug_id = {bug_id})"),
222
+ bug_ids_to_patches.get(method, dict()).get(
223
+ bug_id, f"Not Available (method = {method}, bug_id = {bug_id})"
224
+ ),
225
+ ),
226
+ inputs=[inspect_issue, inspect_fix],
227
+ outputs=[golden_patch, method_patch],
228
+ )
229
+ inspect_fix.change(
230
+ fn=lambda bug_id, method: (
231
+ bug_id_to_patch.get(bug_id, f"Not Available (bug_id = {bug_id})"),
232
+ bug_ids_to_patches.get(method, dict()).get(
233
+ bug_id, f"Not Available (method = {method}, bug_id = {bug_id})"
234
+ ),
235
+ ),
236
+ inputs=[inspect_issue, inspect_fix],
237
+ outputs=[golden_patch, method_patch],
238
  )
239
 
240
  with gr.TabItem("🚀 Submission", elem_id="llm-benchmark-tab-table", id=1):
src/leaderboard/read_evals.py CHANGED
@@ -28,6 +28,7 @@ class EvalResult:
28
  sample_count: float
29
  fixed_bug_ids: list[str]
30
  fixed_bug_ids_fast: list[str]
 
31
 
32
  @classmethod
33
  def init_from_json_file(self, json_filepath):
@@ -51,6 +52,7 @@ class EvalResult:
51
  fixed_bug_ids = []
52
  fixed_bug_ids_fast = []
53
  sample_count = 0
 
54
  for fix in fixes:
55
  bug_type = fix.get("bug_type", "")
56
  if fix.get("fast_check_pass", False):
@@ -65,6 +67,12 @@ class EvalResult:
65
  build_count += fix.get("build_count", 0)
66
  build_failure_count += fix.get("build_failure_count", 0)
67
 
 
 
 
 
 
 
68
  return self(
69
  method_name=method_name,
70
  method_url=method_url,
@@ -83,6 +91,7 @@ class EvalResult:
83
  fixed_bug_ids=fixed_bug_ids,
84
  fixed_bug_ids_fast=fixed_bug_ids_fast,
85
  sample_count=round(sample_count / full_pass_count, 1) if full_pass_count > 0 else 0,
 
86
  )
87
 
88
  def to_dict(self, total_issues):
@@ -105,6 +114,7 @@ class EvalResult:
105
  "fixed_bug_ids": self.fixed_bug_ids,
106
  "fixed_bug_ids_fast": self.fixed_bug_ids_fast,
107
  "method_id": self.method_name + "(" + self.model_name + ")",
 
108
  AutoEvalColumn.sample_count.name: self.sample_count,
109
  }
110
 
 
28
  sample_count: float
29
  fixed_bug_ids: list[str]
30
  fixed_bug_ids_fast: list[str]
31
+ patches: dict[str, str]
32
 
33
  @classmethod
34
  def init_from_json_file(self, json_filepath):
 
52
  fixed_bug_ids = []
53
  fixed_bug_ids_fast = []
54
  sample_count = 0
55
+ patches = []
56
  for fix in fixes:
57
  bug_type = fix.get("bug_type", "")
58
  if fix.get("fast_check_pass", False):
 
67
  build_count += fix.get("build_count", 0)
68
  build_failure_count += fix.get("build_failure_count", 0)
69
 
70
+ patch = ""
71
+ patch += f"// Fast check: {fix.get('fast_check_pass', False)}\n"
72
+ patch += f"// Full check: {fix.get('full_check_pass', False)}\n"
73
+ patch += fix.get("patch", "")
74
+ patches[fix.get("bug_id", "")] = patch
75
+
76
  return self(
77
  method_name=method_name,
78
  method_url=method_url,
 
91
  fixed_bug_ids=fixed_bug_ids,
92
  fixed_bug_ids_fast=fixed_bug_ids_fast,
93
  sample_count=round(sample_count / full_pass_count, 1) if full_pass_count > 0 else 0,
94
+ patches=patches,
95
  )
96
 
97
  def to_dict(self, total_issues):
 
114
  "fixed_bug_ids": self.fixed_bug_ids,
115
  "fixed_bug_ids_fast": self.fixed_bug_ids_fast,
116
  "method_id": self.method_name + "(" + self.model_name + ")",
117
+ "patches": self.patches,
118
  AutoEvalColumn.sample_count.name: self.sample_count,
119
  }
120