Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
42f179a
1
Parent(s):
2c9a73e
fix: parsing of validated and unvalidated submissions
Browse files- dabstep_benchmark/leaderboard.py +10 -10
dabstep_benchmark/leaderboard.py
CHANGED
@@ -234,7 +234,7 @@ def generate_leaderboard_df() -> Tuple[pd.DataFrame, pd.DataFrame]:
|
|
234 |
task_scores_df = DATASETS["task_scores"].to_pandas()
|
235 |
submissions_df = DATASETS["submissions"].to_pandas()
|
236 |
|
237 |
-
# get metadata of each
|
238 |
submissions_df = (
|
239 |
submissions_df.groupby("submission_id")
|
240 |
.first()
|
@@ -297,23 +297,23 @@ def generate_leaderboard_df() -> Tuple[pd.DataFrame, pd.DataFrame]:
|
|
297 |
}
|
298 |
col_order = [new_col_name for new_col_name in col_map.values()]
|
299 |
leaderboard_df.rename(columns=col_map, inplace=True)
|
300 |
-
|
301 |
|
302 |
# formatting
|
303 |
# convert scores to %
|
304 |
-
|
305 |
-
|
306 |
|
307 |
# make repo url clickable in markdown
|
308 |
-
|
309 |
|
310 |
# make agent name bold
|
311 |
-
|
312 |
|
313 |
# sort-by best score
|
314 |
-
|
315 |
|
316 |
-
|
317 |
-
|
318 |
|
319 |
-
return
|
|
|
234 |
task_scores_df = DATASETS["task_scores"].to_pandas()
|
235 |
submissions_df = DATASETS["submissions"].to_pandas()
|
236 |
|
237 |
+
# get metadata of each submission_id
|
238 |
submissions_df = (
|
239 |
submissions_df.groupby("submission_id")
|
240 |
.first()
|
|
|
297 |
}
|
298 |
col_order = [new_col_name for new_col_name in col_map.values()]
|
299 |
leaderboard_df.rename(columns=col_map, inplace=True)
|
300 |
+
leaderboard_df = leaderboard_df[col_order]
|
301 |
|
302 |
# formatting
|
303 |
# convert scores to %
|
304 |
+
leaderboard_df["Easy Level Accuracy (%)"] = leaderboard_df["Easy Level Accuracy (%)"].apply(lambda x: round(x * 100, 2))
|
305 |
+
leaderboard_df["Hard Level Accuracy (%)"] = leaderboard_df["Hard Level Accuracy (%)"].apply(lambda x: round(x * 100, 2))
|
306 |
|
307 |
# make repo url clickable in markdown
|
308 |
+
leaderboard_df["Repo URL"] = leaderboard_df["Repo URL"].apply(lambda x: f"[Link]({x})" if x != "" else x)
|
309 |
|
310 |
# make agent name bold
|
311 |
+
leaderboard_df["Agent"] = leaderboard_df["Agent"].apply(lambda x: f"**{x}**")
|
312 |
|
313 |
# sort-by best score
|
314 |
+
leaderboard_df.sort_values(by="Hard Level Accuracy (%)", ascending=False, inplace=True)
|
315 |
|
316 |
+
validated_lb = leaderboard_df[leaderboard_df["validated"] == True].drop(columns=["validated"])
|
317 |
+
unvalidated_lb = leaderboard_df[leaderboard_df["validated"] == False].drop(columns=["validated"])
|
318 |
|
319 |
+
return validated_lb, unvalidated_lb
|