xeon27
commited on
Commit
·
1f6d554
1
Parent(s):
b1accaf
Fix bug
Browse files- app.py +3 -0
- src/populate.py +1 -1
app.py
CHANGED
@@ -64,6 +64,9 @@ def init_leaderboard(dataframe, benchmark_type):
|
|
64 |
if benchmark_type == "agentic":
|
65 |
# Include agent column
|
66 |
non_task_cols.append("Agent")
|
|
|
|
|
|
|
67 |
AutoEvalColumnSubset = [c for c in fields(AutoEvalColumn) if ((c.name in non_task_cols) or (TASK_NAME_INVERSE_MAP.get(c.name, dict()).get("type", "")==benchmark_type))]
|
68 |
|
69 |
# styler = dataframe.style.apply(bold_max, subset=pd.IndexSlice[:, dataframe.columns[1:]])
|
|
|
64 |
if benchmark_type == "agentic":
|
65 |
# Include agent column
|
66 |
non_task_cols.append("Agent")
|
67 |
+
elif benchmark_type == "base":
|
68 |
+
# Drop agent column
|
69 |
+
dataframe = dataframe.drop(columns=["Agent"])
|
70 |
AutoEvalColumnSubset = [c for c in fields(AutoEvalColumn) if ((c.name in non_task_cols) or (TASK_NAME_INVERSE_MAP.get(c.name, dict()).get("type", "")==benchmark_type))]
|
71 |
|
72 |
# styler = dataframe.style.apply(bold_max, subset=pd.IndexSlice[:, dataframe.columns[1:]])
|
src/populate.py
CHANGED
@@ -46,7 +46,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
46 |
df = df[cols].round(decimals=2)
|
47 |
|
48 |
# subset for model and benchmark cols
|
49 |
-
df = df[[AutoEvalColumn.model.name] + benchmark_cols]
|
50 |
|
51 |
# drop rows for which all benchmark cols are empty
|
52 |
df = df.dropna(subset=benchmark_cols, axis=0, how="all")
|
|
|
46 |
df = df[cols].round(decimals=2)
|
47 |
|
48 |
# subset for model and benchmark cols
|
49 |
+
df = df[[AutoEvalColumn.model.name, AutoEvalColumn.agent.name] + benchmark_cols]
|
50 |
|
51 |
# drop rows for which all benchmark cols are empty
|
52 |
df = df.dropna(subset=benchmark_cols, axis=0, how="all")
|