Update
Browse files
app.py
CHANGED
@@ -44,6 +44,7 @@ except Exception:
|
|
44 |
dataset = load_dataset("dtcxzyw/llvm-apr-benchmark")
|
45 |
total_issues = dataset.num_rows["test"]
|
46 |
bug_id_to_time = dict()
|
|
|
47 |
bug_id_by_cat = {
|
48 |
"crash": [],
|
49 |
"miscompilation": [],
|
@@ -52,13 +53,13 @@ bug_id_by_cat = {
|
|
52 |
for issue in dataset["test"]:
|
53 |
bug_id_to_time[issue["bug_id"]] = pd.to_datetime(issue["knowledge_cutoff"])
|
54 |
bug_id_by_cat[issue["bug_type"]].append(issue["bug_id"])
|
|
|
55 |
timeline_xs = []
|
56 |
timeline_ys = []
|
57 |
timeline_cols = []
|
58 |
timeline_bugids = []
|
59 |
model_cnt = 0
|
60 |
for bug_id, time in bug_id_to_time.items():
|
61 |
-
timeline_xs.append(time)
|
62 |
timeline_ys.append(0)
|
63 |
timeline_cols.append("All")
|
64 |
timeline_bugids.append(bug_id)
|
@@ -66,7 +67,6 @@ cat_cnt = 4
|
|
66 |
for cat, bug_ids in bug_id_by_cat.items():
|
67 |
cat_cnt -= 1
|
68 |
for bug_id in bug_ids:
|
69 |
-
timeline_xs.append(bug_id_to_time[bug_id])
|
70 |
timeline_ys.append(cat_cnt)
|
71 |
timeline_cols.append(str(cat).capitalize())
|
72 |
timeline_bugids.append(bug_id)
|
@@ -75,16 +75,20 @@ for row in LEADERBOARD_DF.itertuples():
|
|
75 |
print(row)
|
76 |
model_cnt += 1
|
77 |
for fix in row.fixed_bug_ids:
|
78 |
-
timeline_xs.append(bug_id_to_time[fix])
|
79 |
timeline_ys.append(-model_cnt)
|
80 |
timeline_cols.append(row.method_id)
|
81 |
timeline_bugids.append(fix)
|
|
|
|
|
|
|
|
|
82 |
timeline_df = pd.DataFrame(
|
83 |
{
|
84 |
"time": timeline_xs,
|
85 |
"model": timeline_ys,
|
86 |
"method_name": timeline_cols,
|
87 |
"bug_id": timeline_bugids,
|
|
|
88 |
}
|
89 |
)
|
90 |
|
@@ -127,7 +131,7 @@ with demo:
|
|
127 |
y_label="Model",
|
128 |
title="Timeline",
|
129 |
y_lim=(-model_cnt - 1, 4),
|
130 |
-
tooltip=["bug_id", "method_name", "time"],
|
131 |
)
|
132 |
|
133 |
with gr.TabItem("🚀 Submission", elem_id="llm-benchmark-tab-table", id=1):
|
|
|
44 |
dataset = load_dataset("dtcxzyw/llvm-apr-benchmark")
|
45 |
total_issues = dataset.num_rows["test"]
|
46 |
bug_id_to_time = dict()
|
47 |
+
bug_id_to_type = dict()
|
48 |
bug_id_by_cat = {
|
49 |
"crash": [],
|
50 |
"miscompilation": [],
|
|
|
53 |
for issue in dataset["test"]:
|
54 |
bug_id_to_time[issue["bug_id"]] = pd.to_datetime(issue["knowledge_cutoff"])
|
55 |
bug_id_by_cat[issue["bug_type"]].append(issue["bug_id"])
|
56 |
+
bug_id_to_type[issue["bug_id"]] = issue["bug_type"]
|
57 |
timeline_xs = []
|
58 |
timeline_ys = []
|
59 |
timeline_cols = []
|
60 |
timeline_bugids = []
|
61 |
model_cnt = 0
|
62 |
for bug_id, time in bug_id_to_time.items():
|
|
|
63 |
timeline_ys.append(0)
|
64 |
timeline_cols.append("All")
|
65 |
timeline_bugids.append(bug_id)
|
|
|
67 |
for cat, bug_ids in bug_id_by_cat.items():
|
68 |
cat_cnt -= 1
|
69 |
for bug_id in bug_ids:
|
|
|
70 |
timeline_ys.append(cat_cnt)
|
71 |
timeline_cols.append(str(cat).capitalize())
|
72 |
timeline_bugids.append(bug_id)
|
|
|
75 |
print(row)
|
76 |
model_cnt += 1
|
77 |
for fix in row.fixed_bug_ids:
|
|
|
78 |
timeline_ys.append(-model_cnt)
|
79 |
timeline_cols.append(row.method_id)
|
80 |
timeline_bugids.append(fix)
|
81 |
+
timeline_bugtypes = []
|
82 |
+
for bug_id in timeline_bugids:
|
83 |
+
timeline_xs.append(bug_id_to_time[bug_id])
|
84 |
+
timeline_bugtypes.append(bug_id_to_type[bug_id])
|
85 |
timeline_df = pd.DataFrame(
|
86 |
{
|
87 |
"time": timeline_xs,
|
88 |
"model": timeline_ys,
|
89 |
"method_name": timeline_cols,
|
90 |
"bug_id": timeline_bugids,
|
91 |
+
"bug_type": timeline_bugtypes,
|
92 |
}
|
93 |
)
|
94 |
|
|
|
131 |
y_label="Model",
|
132 |
title="Timeline",
|
133 |
y_lim=(-model_cnt - 1, 4),
|
134 |
+
tooltip=["bug_id", "method_name", "time", "bug_type"],
|
135 |
)
|
136 |
|
137 |
with gr.TabItem("🚀 Submission", elem_id="llm-benchmark-tab-table", id=1):
|