Update
Browse files- app.py +22 -3
- src/leaderboard/read_evals.py +10 -0
app.py
CHANGED
@@ -83,6 +83,7 @@ LEADERBOARD_DF = get_leaderboard_df(EVAL_REQUESTS_PATH, total_issues)
|
|
83 |
fixed_bug_ids = set()
|
84 |
fixed_bug_ids_count = dict()
|
85 |
fixed_bug_ids_fast = set()
|
|
|
86 |
for row in LEADERBOARD_DF.itertuples():
|
87 |
print(row)
|
88 |
model_cnt += 1
|
@@ -94,6 +95,7 @@ for row in LEADERBOARD_DF.itertuples():
|
|
94 |
fixed_bug_ids_count[fix] = fixed_bug_ids_count.get(fix, 0) + 1
|
95 |
for fix in row.fixed_bug_ids_fast:
|
96 |
fixed_bug_ids_fast.add(fix)
|
|
|
97 |
unique_bug_ids = set([bug_id for bug_id, count in fixed_bug_ids_count.items() if count == 1])
|
98 |
timeline_bugtypes = []
|
99 |
for bug_id in timeline_bugids:
|
@@ -212,10 +214,27 @@ with demo:
|
|
212 |
fixed_bug_title_id_pairs = [(bug_id_to_title[bug_id], bug_id) for bug_id in sorted(fixed_bug_ids)]
|
213 |
inspect_issue = gr.Dropdown(fixed_bug_title_id_pairs, label="Inspct Issue", interactive=True)
|
214 |
golden_patch = gr.Code("", language="cpp", label="Golden Patch")
|
|
|
|
|
215 |
inspect_issue.change(
|
216 |
-
fn=lambda bug_id
|
217 |
-
|
218 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
)
|
220 |
|
221 |
with gr.TabItem("🚀 Submission", elem_id="llm-benchmark-tab-table", id=1):
|
|
|
83 |
fixed_bug_ids = set()
|
84 |
fixed_bug_ids_count = dict()
|
85 |
fixed_bug_ids_fast = set()
|
86 |
+
bug_ids_to_patches = dict()
|
87 |
for row in LEADERBOARD_DF.itertuples():
|
88 |
print(row)
|
89 |
model_cnt += 1
|
|
|
95 |
fixed_bug_ids_count[fix] = fixed_bug_ids_count.get(fix, 0) + 1
|
96 |
for fix in row.fixed_bug_ids_fast:
|
97 |
fixed_bug_ids_fast.add(fix)
|
98 |
+
bug_ids_to_patches[row.method_id] = row.patches
|
99 |
unique_bug_ids = set([bug_id for bug_id, count in fixed_bug_ids_count.items() if count == 1])
|
100 |
timeline_bugtypes = []
|
101 |
for bug_id in timeline_bugids:
|
|
|
214 |
fixed_bug_title_id_pairs = [(bug_id_to_title[bug_id], bug_id) for bug_id in sorted(fixed_bug_ids)]
|
215 |
inspect_issue = gr.Dropdown(fixed_bug_title_id_pairs, label="Inspct Issue", interactive=True)
|
216 |
golden_patch = gr.Code("", language="cpp", label="Golden Patch")
|
217 |
+
inspect_fix = gr.Dropdown(list(bug_ids_to_patches.keys()), label="Method(Model)", interactive=True)
|
218 |
+
method_patch = gr.Code("", language="cpp", label="APR Patch")
|
219 |
inspect_issue.change(
|
220 |
+
fn=lambda bug_id, method: (
|
221 |
+
bug_id_to_patch.get(bug_id, f"Not Available (bug_id = {bug_id})"),
|
222 |
+
bug_ids_to_patches.get(method, dict()).get(
|
223 |
+
bug_id, f"Not Available (method = {method}, bug_id = {bug_id})"
|
224 |
+
),
|
225 |
+
),
|
226 |
+
inputs=[inspect_issue, inspect_fix],
|
227 |
+
outputs=[golden_patch, method_patch],
|
228 |
+
)
|
229 |
+
inspect_fix.change(
|
230 |
+
fn=lambda bug_id, method: (
|
231 |
+
bug_id_to_patch.get(bug_id, f"Not Available (bug_id = {bug_id})"),
|
232 |
+
bug_ids_to_patches.get(method, dict()).get(
|
233 |
+
bug_id, f"Not Available (method = {method}, bug_id = {bug_id})"
|
234 |
+
),
|
235 |
+
),
|
236 |
+
inputs=[inspect_issue, inspect_fix],
|
237 |
+
outputs=[golden_patch, method_patch],
|
238 |
)
|
239 |
|
240 |
with gr.TabItem("🚀 Submission", elem_id="llm-benchmark-tab-table", id=1):
|
src/leaderboard/read_evals.py
CHANGED
@@ -28,6 +28,7 @@ class EvalResult:
|
|
28 |
sample_count: float
|
29 |
fixed_bug_ids: list[str]
|
30 |
fixed_bug_ids_fast: list[str]
|
|
|
31 |
|
32 |
@classmethod
|
33 |
def init_from_json_file(self, json_filepath):
|
@@ -51,6 +52,7 @@ class EvalResult:
|
|
51 |
fixed_bug_ids = []
|
52 |
fixed_bug_ids_fast = []
|
53 |
sample_count = 0
|
|
|
54 |
for fix in fixes:
|
55 |
bug_type = fix.get("bug_type", "")
|
56 |
if fix.get("fast_check_pass", False):
|
@@ -65,6 +67,12 @@ class EvalResult:
|
|
65 |
build_count += fix.get("build_count", 0)
|
66 |
build_failure_count += fix.get("build_failure_count", 0)
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
return self(
|
69 |
method_name=method_name,
|
70 |
method_url=method_url,
|
@@ -83,6 +91,7 @@ class EvalResult:
|
|
83 |
fixed_bug_ids=fixed_bug_ids,
|
84 |
fixed_bug_ids_fast=fixed_bug_ids_fast,
|
85 |
sample_count=round(sample_count / full_pass_count, 1) if full_pass_count > 0 else 0,
|
|
|
86 |
)
|
87 |
|
88 |
def to_dict(self, total_issues):
|
@@ -105,6 +114,7 @@ class EvalResult:
|
|
105 |
"fixed_bug_ids": self.fixed_bug_ids,
|
106 |
"fixed_bug_ids_fast": self.fixed_bug_ids_fast,
|
107 |
"method_id": self.method_name + "(" + self.model_name + ")",
|
|
|
108 |
AutoEvalColumn.sample_count.name: self.sample_count,
|
109 |
}
|
110 |
|
|
|
28 |
sample_count: float
|
29 |
fixed_bug_ids: list[str]
|
30 |
fixed_bug_ids_fast: list[str]
|
31 |
+
patches: dict[str, str]
|
32 |
|
33 |
@classmethod
|
34 |
def init_from_json_file(self, json_filepath):
|
|
|
52 |
fixed_bug_ids = []
|
53 |
fixed_bug_ids_fast = []
|
54 |
sample_count = 0
|
55 |
+
patches = []
|
56 |
for fix in fixes:
|
57 |
bug_type = fix.get("bug_type", "")
|
58 |
if fix.get("fast_check_pass", False):
|
|
|
67 |
build_count += fix.get("build_count", 0)
|
68 |
build_failure_count += fix.get("build_failure_count", 0)
|
69 |
|
70 |
+
patch = ""
|
71 |
+
patch += f"// Fast check: {fix.get('fast_check_pass', False)}\n"
|
72 |
+
patch += f"// Full check: {fix.get('full_check_pass', False)}\n"
|
73 |
+
patch += fix.get("patch", "")
|
74 |
+
patches[fix.get("bug_id", "")] = patch
|
75 |
+
|
76 |
return self(
|
77 |
method_name=method_name,
|
78 |
method_url=method_url,
|
|
|
91 |
fixed_bug_ids=fixed_bug_ids,
|
92 |
fixed_bug_ids_fast=fixed_bug_ids_fast,
|
93 |
sample_count=round(sample_count / full_pass_count, 1) if full_pass_count > 0 else 0,
|
94 |
+
patches=patches,
|
95 |
)
|
96 |
|
97 |
def to_dict(self, total_issues):
|
|
|
114 |
"fixed_bug_ids": self.fixed_bug_ids,
|
115 |
"fixed_bug_ids_fast": self.fixed_bug_ids_fast,
|
116 |
"method_id": self.method_name + "(" + self.model_name + ")",
|
117 |
+
"patches": self.patches,
|
118 |
AutoEvalColumn.sample_count.name: self.sample_count,
|
119 |
}
|
120 |
|