xeon27 commited on
Commit
36244aa
·
1 Parent(s): 15e5347

Make task names clickable and link to inspect-evals repo

Browse files
Files changed (2) hide show
  1. src/display/utils.py +1 -1
  2. src/populate.py +4 -0
src/display/utils.py CHANGED
@@ -28,7 +28,7 @@ auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "ma
28
  #Scores
29
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
30
  for task in Tasks:
31
- auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(f"[{task.value.col_name}]({task.value.source})", "markdown", True)])
32
  # # Model information
33
  # auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
34
  # auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
 
28
  #Scores
29
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
30
  for task in Tasks:
31
+ auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "markdown", True)])
32
  # # Model information
33
  # auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
34
  # auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
src/populate.py CHANGED
@@ -14,6 +14,7 @@ for task in Tasks:
14
  TASK_NAME_INVERSE_MAP[task.value.col_name] = {
15
  "name": task.value.benchmark,
16
  "type": task.value.type,
 
17
  }
18
 
19
 
@@ -45,6 +46,9 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
45
  for col in benchmark_cols:
46
  df[col] = df[[AutoEvalColumn.model.name, col]].apply(lambda x: f"[{x[col]}]({get_inspect_log_url(model_name=x[AutoEvalColumn.model.name].split('>')[1].split('<')[0], benchmark_name=TASK_NAME_INVERSE_MAP[col]['name'])})", axis=1)
47
 
 
 
 
48
  return df
49
 
50
 
 
14
  TASK_NAME_INVERSE_MAP[task.value.col_name] = {
15
  "name": task.value.benchmark,
16
  "type": task.value.type,
17
+ "source": task.value.source,
18
  }
19
 
20
 
 
46
  for col in benchmark_cols:
47
  df[col] = df[[AutoEvalColumn.model.name, col]].apply(lambda x: f"[{x[col]}]({get_inspect_log_url(model_name=x[AutoEvalColumn.model.name].split('>')[1].split('<')[0], benchmark_name=TASK_NAME_INVERSE_MAP[col]['name'])})", axis=1)
48
 
49
+ # make task names clickable and link to inspect-evals repository
50
+ df = df.rename(columns={col: f"[{col}]({TASK_NAME_INVERSE_MAP[col]['source']})" for col in benchmark_cols})
51
+
52
  return df
53
 
54