Update
Browse files
app.py
CHANGED
@@ -50,10 +50,15 @@ bug_id_by_cat = {
|
|
50 |
"miscompilation": [],
|
51 |
"hang": [],
|
52 |
}
|
|
|
|
|
53 |
for issue in dataset["test"]:
|
54 |
bug_id_to_time[issue["bug_id"]] = pd.to_datetime(issue["knowledge_cutoff"])
|
55 |
bug_id_by_cat[issue["bug_type"]].append(issue["bug_id"])
|
56 |
bug_id_to_type[issue["bug_id"]] = issue["bug_type"]
|
|
|
|
|
|
|
57 |
timeline_xs = []
|
58 |
timeline_ys = []
|
59 |
timeline_cols = []
|
@@ -107,6 +112,21 @@ fixed_by_cat_df = pd.DataFrame(
|
|
107 |
}
|
108 |
)
|
109 |
fixed_by_cat_df.sort_values("Total", inplace=True, ascending=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
|
112 |
def init_leaderboard(dataframe):
|
@@ -150,6 +170,7 @@ with demo:
|
|
150 |
tooltip=["bug_id", "method_name", "time", "bug_type"],
|
151 |
)
|
152 |
gr.Dataframe(fixed_by_cat_df)
|
|
|
153 |
|
154 |
with gr.TabItem("🚀 Submission", elem_id="llm-benchmark-tab-table", id=1):
|
155 |
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
|
|
50 |
"miscompilation": [],
|
51 |
"hang": [],
|
52 |
}
|
53 |
+
bug_id_to_comp = dict()
|
54 |
+
comp_bug_count = dict()
|
55 |
for issue in dataset["test"]:
|
56 |
bug_id_to_time[issue["bug_id"]] = pd.to_datetime(issue["knowledge_cutoff"])
|
57 |
bug_id_by_cat[issue["bug_type"]].append(issue["bug_id"])
|
58 |
bug_id_to_type[issue["bug_id"]] = issue["bug_type"]
|
59 |
+
bug_id_to_comp[issue["bug_id"]] = issue["components"]
|
60 |
+
for comp in issue["components"]:
|
61 |
+
comp_bug_count[comp] = comp_bug_count.get(comp, 0) + 1
|
62 |
timeline_xs = []
|
63 |
timeline_ys = []
|
64 |
timeline_cols = []
|
|
|
112 |
}
|
113 |
)
|
114 |
fixed_by_cat_df.sort_values("Total", inplace=True, ascending=False)
|
115 |
+
fixed_by_comp = dict()
|
116 |
+
for bug_id in fixed_bug_ids:
|
117 |
+
for comp in bug_id_to_comp[bug_id]:
|
118 |
+
fixed_by_comp[comp] = fixed_by_comp.get(comp, 0) + 1
|
119 |
+
fixed_by_comp_df = pd.DataFrame(
|
120 |
+
{
|
121 |
+
"Component": [comp for comp in fixed_by_comp.keys()],
|
122 |
+
"Total": [comp_bug_count[comp] for comp in fixed_by_comp.keys()],
|
123 |
+
"Repaired": list(fixed_by_comp.values()),
|
124 |
+
"Repair Rate (%)": [
|
125 |
+
round(fixed_by_comp[comp] / comp_bug_count[comp] * 100, 1) for comp in fixed_by_comp.keys()
|
126 |
+
],
|
127 |
+
}
|
128 |
+
)
|
129 |
+
fixed_by_comp_df.sort_values("Total", inplace=True, ascending=False)
|
130 |
|
131 |
|
132 |
def init_leaderboard(dataframe):
|
|
|
170 |
tooltip=["bug_id", "method_name", "time", "bug_type"],
|
171 |
)
|
172 |
gr.Dataframe(fixed_by_cat_df)
|
173 |
+
gr.Dataframe(fixed_by_comp_df)
|
174 |
|
175 |
with gr.TabItem("🚀 Submission", elem_id="llm-benchmark-tab-table", id=1):
|
176 |
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|