xeon27
commited on
Commit
·
b1accaf
1
Parent(s):
bad4049
Fix bug
Browse files- app.py +1 -2
- src/leaderboard/read_evals.py +2 -0
app.py
CHANGED
@@ -62,8 +62,7 @@ def init_leaderboard(dataframe, benchmark_type):
|
|
62 |
|
63 |
non_task_cols = ["Model"]
|
64 |
if benchmark_type == "agentic":
|
65 |
-
#
|
66 |
-
dataframe["Agent"] = ["[Basic Agent](https://inspect.ai-safety-institute.org.uk/agents.html#sec-basic-agent)"]*(dataframe.shape[0])
|
67 |
non_task_cols.append("Agent")
|
68 |
AutoEvalColumnSubset = [c for c in fields(AutoEvalColumn) if ((c.name in non_task_cols) or (TASK_NAME_INVERSE_MAP.get(c.name, dict()).get("type", "")==benchmark_type))]
|
69 |
|
|
|
62 |
|
63 |
non_task_cols = ["Model"]
|
64 |
if benchmark_type == "agentic":
|
65 |
+
# Include agent column
|
|
|
66 |
non_task_cols.append("Agent")
|
67 |
AutoEvalColumnSubset = [c for c in fields(AutoEvalColumn) if ((c.name in non_task_cols) or (TASK_NAME_INVERSE_MAP.get(c.name, dict()).get("type", "")==benchmark_type))]
|
68 |
|
src/leaderboard/read_evals.py
CHANGED
@@ -118,6 +118,8 @@ class EvalResult:
|
|
118 |
data_dict = {
|
119 |
"eval_name": self.eval_name, # not a column, just a save name,
|
120 |
AutoEvalColumn.model.name: make_clickable_model(self.model_version, self.revision),
|
|
|
|
|
121 |
}
|
122 |
|
123 |
for task in Tasks:
|
|
|
118 |
data_dict = {
|
119 |
"eval_name": self.eval_name, # not a column, just a save name,
|
120 |
AutoEvalColumn.model.name: make_clickable_model(self.model_version, self.revision),
|
121 |
+
# As of now all models use the basic inspect agent
|
122 |
+
AutoEvalColumn.agent.name: "[Basic Agent](https://inspect.ai-safety-institute.org.uk/agents.html#sec-basic-agent)"
|
123 |
}
|
124 |
|
125 |
for task in Tasks:
|