xeon27
commited on
Commit
·
36244aa
1
Parent(s):
15e5347
Make task names clickable and link to inspect-evals repo
Browse files- src/display/utils.py +1 -1
- src/populate.py +4 -0
src/display/utils.py
CHANGED
@@ -28,7 +28,7 @@ auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "ma
|
|
28 |
#Scores
|
29 |
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
|
30 |
for task in Tasks:
|
31 |
-
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(
|
32 |
# # Model information
|
33 |
# auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
34 |
# auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
|
|
28 |
#Scores
|
29 |
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
|
30 |
for task in Tasks:
|
31 |
+
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "markdown", True)])
|
32 |
# # Model information
|
33 |
# auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
34 |
# auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
src/populate.py
CHANGED
@@ -14,6 +14,7 @@ for task in Tasks:
|
|
14 |
TASK_NAME_INVERSE_MAP[task.value.col_name] = {
|
15 |
"name": task.value.benchmark,
|
16 |
"type": task.value.type,
|
|
|
17 |
}
|
18 |
|
19 |
|
@@ -45,6 +46,9 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
45 |
for col in benchmark_cols:
|
46 |
df[col] = df[[AutoEvalColumn.model.name, col]].apply(lambda x: f"[{x[col]}]({get_inspect_log_url(model_name=x[AutoEvalColumn.model.name].split('>')[1].split('<')[0], benchmark_name=TASK_NAME_INVERSE_MAP[col]['name'])})", axis=1)
|
47 |
|
|
|
|
|
|
|
48 |
return df
|
49 |
|
50 |
|
|
|
14 |
TASK_NAME_INVERSE_MAP[task.value.col_name] = {
|
15 |
"name": task.value.benchmark,
|
16 |
"type": task.value.type,
|
17 |
+
"source": task.value.source,
|
18 |
}
|
19 |
|
20 |
|
|
|
46 |
for col in benchmark_cols:
|
47 |
df[col] = df[[AutoEvalColumn.model.name, col]].apply(lambda x: f"[{x[col]}]({get_inspect_log_url(model_name=x[AutoEvalColumn.model.name].split('>')[1].split('<')[0], benchmark_name=TASK_NAME_INVERSE_MAP[col]['name'])})", axis=1)
|
48 |
|
49 |
+
# make task names clickable and link to inspect-evals repository
|
50 |
+
df = df.rename(columns={col: f"[{col}]({TASK_NAME_INVERSE_MAP[col]['source']})" for col in benchmark_cols})
|
51 |
+
|
52 |
return df
|
53 |
|
54 |
|