xeon27 commited on
Commit
d201228
·
1 Parent(s): 423bf9b

Add Agent column

Browse files
Files changed (2) hide show
  1. app.py +6 -1
  2. src/display/utils.py +1 -0
app.py CHANGED
@@ -60,7 +60,12 @@ def init_leaderboard(dataframe, benchmark_type):
60
  if dataframe is None or dataframe.empty:
61
  raise ValueError("Leaderboard DataFrame is empty or None.")
62
 
63
- AutoEvalColumnSubset = [c for c in fields(AutoEvalColumn) if ((c.name=="Model") or (TASK_NAME_INVERSE_MAP.get(c.name, dict()).get("type", "")==benchmark_type))]
 
 
 
 
 
64
 
65
  # styler = dataframe.style.apply(bold_max, subset=pd.IndexSlice[:, dataframe.columns[1:]])
66
 
 
60
  if dataframe is None or dataframe.empty:
61
  raise ValueError("Leaderboard DataFrame is empty or None.")
62
 
63
+ non_task_cols = ["Model"]
64
+ if benchmark_type == "agentic":
65
+ # Add column for type of agent, as of now all models use the basic inspect agent
66
+ dataframe["Agent"] = ["[Basic Agent](https://inspect.ai-safety-institute.org.uk/agents.html#sec-basic-agent)"]*len(dataframe)
67
+ non_task_cols.append("Agent")
68
+ AutoEvalColumnSubset = [c for c in fields(AutoEvalColumn) if ((c.name in non_task_cols) or (TASK_NAME_INVERSE_MAP.get(c.name, dict()).get("type", "")==benchmark_type))]
69
 
70
  # styler = dataframe.style.apply(bold_max, subset=pd.IndexSlice[:, dataframe.columns[1:]])
71
 
src/display/utils.py CHANGED
@@ -27,6 +27,7 @@ class ColumnContent:
27
  auto_eval_column_dict = []
28
  # Init
29
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
 
30
  # Scores
31
  for task in Tasks:
32
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "markdown", True)])
 
27
  auto_eval_column_dict = []
28
  # Init
29
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
30
+ auto_eval_column_dict.append(["agent", ColumnContent, ColumnContent("Agent", "markdown", True, never_hidden=True)])
31
  # Scores
32
  for task in Tasks:
33
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "markdown", True)])