eval-leaderboard

Running

xeon27 commited on Jan 24

Commit

a319d81

1 Parent(s): e7a2635

Use dash symbol for markdown

Files changed (1) hide show

src/populate.py CHANGED Viewed

@@ -18,6 +18,8 @@ for task in Tasks:
         "source": task.value.source,
     }
 def get_inspect_log_url(model_name: str, benchmark_name: str) -> str:
     """Returns the URL to the log file for a given model and benchmark"""
@@ -44,12 +46,12 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
     # # filter out if any of the benchmarks have not been produced
     # df = df[has_no_nan_values(df, benchmark_cols)]
-    df = df.fillna("-")
     print(df["GAIA"].head())
     # make values clickable and link to log files
     for col in benchmark_cols:
-        df[col] = df[[AutoEvalColumn.model.name, col]].apply(lambda x: f"[{x[col]}]({get_inspect_log_url(model_name=x[AutoEvalColumn.model.name].split('>')[1].split('<')[0], benchmark_name=TASK_NAME_INVERSE_MAP[col]['name'])})" if x[col] != "-" else x[col], axis=1)
     # # make task names clickable and link to inspect-evals repository - this creates issues later
     # df = df.rename(columns={col: f"[{col}]({TASK_NAME_INVERSE_MAP[col]['source']})" for col in benchmark_cols})

         "source": task.value.source,
     }
+EMPTY_SYMBOL = "&ndash"
 def get_inspect_log_url(model_name: str, benchmark_name: str) -> str:
     """Returns the URL to the log file for a given model and benchmark"""
     # # filter out if any of the benchmarks have not been produced
     # df = df[has_no_nan_values(df, benchmark_cols)]
+    df = df.fillna(EMPTY_SYMBOL)
     print(df["GAIA"].head())
     # make values clickable and link to log files
     for col in benchmark_cols:
+        df[col] = df[[AutoEvalColumn.model.name, col]].apply(lambda x: f"[{x[col]}]({get_inspect_log_url(model_name=x[AutoEvalColumn.model.name].split('>')[1].split('<')[0], benchmark_name=TASK_NAME_INVERSE_MAP[col]['name'])})" if x[col] != EMPTY_SYMBOL else x[col], axis=1)
     # # make task names clickable and link to inspect-evals repository - this creates issues later
     # df = df.rename(columns={col: f"[{col}]({TASK_NAME_INVERSE_MAP[col]['source']})" for col in benchmark_cols})