xeon27
commited on
Commit
·
d201228
1
Parent(s):
423bf9b
Add Agent column
Browse files- app.py +6 -1
- src/display/utils.py +1 -0
app.py
CHANGED
@@ -60,7 +60,12 @@ def init_leaderboard(dataframe, benchmark_type):
|
|
60 |
if dataframe is None or dataframe.empty:
|
61 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
62 |
|
63 |
-
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
# styler = dataframe.style.apply(bold_max, subset=pd.IndexSlice[:, dataframe.columns[1:]])
|
66 |
|
|
|
60 |
if dataframe is None or dataframe.empty:
|
61 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
62 |
|
63 |
+
non_task_cols = ["Model"]
|
64 |
+
if benchmark_type == "agentic":
|
65 |
+
# Add column for type of agent, as of now all models use the basic inspect agent
|
66 |
+
dataframe["Agent"] = ["[Basic Agent](https://inspect.ai-safety-institute.org.uk/agents.html#sec-basic-agent)"]*len(dataframe)
|
67 |
+
non_task_cols.append("Agent")
|
68 |
+
AutoEvalColumnSubset = [c for c in fields(AutoEvalColumn) if ((c.name in non_task_cols) or (TASK_NAME_INVERSE_MAP.get(c.name, dict()).get("type", "")==benchmark_type))]
|
69 |
|
70 |
# styler = dataframe.style.apply(bold_max, subset=pd.IndexSlice[:, dataframe.columns[1:]])
|
71 |
|
src/display/utils.py
CHANGED
@@ -27,6 +27,7 @@ class ColumnContent:
|
|
27 |
auto_eval_column_dict = []
|
28 |
# Init
|
29 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
|
|
30 |
# Scores
|
31 |
for task in Tasks:
|
32 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "markdown", True)])
|
|
|
27 |
auto_eval_column_dict = []
|
28 |
# Init
|
29 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
30 |
+
auto_eval_column_dict.append(["agent", ColumnContent, ColumnContent("Agent", "markdown", True, never_hidden=True)])
|
31 |
# Scores
|
32 |
for task in Tasks:
|
33 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "markdown", True)])
|