eval-leaderboard

Running

xeon27 commited on Jan 24

Commit

18638a9

1 Parent(s): cd53742

Replace missing values by None

Files changed (2) hide show

refactor_eval_results.py CHANGED Viewed

@@ -96,7 +96,7 @@ def main():
         # TMP: Add dummy agentic benchmarks to the results
         for metric in METRIC_NAME.items():
             if metric[0] not in results["results"]:
-                results["results"].update({metric[0]: {metric[1]: -1.0}})
         if os.path.isdir(os.path.join(agentic_bm_input_path, model_name)):
             agentic_bm_results = combine_eval_results(agentic_bm_input_path, model_name)
             results["results"].update(agentic_bm_results["results"])

         # TMP: Add dummy agentic benchmarks to the results
         for metric in METRIC_NAME.items():
             if metric[0] not in results["results"]:
+                results["results"].update({metric[0]: {metric[1]: None}})
         if os.path.isdir(os.path.join(agentic_bm_input_path, model_name)):
             agentic_bm_results = combine_eval_results(agentic_bm_input_path, model_name)
             results["results"].update(agentic_bm_results["results"])

src/populate.py CHANGED Viewed

@@ -41,8 +41,12 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
     df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
     df = df[cols].round(decimals=2)
     # filter out if any of the benchmarks have not been produced
     df = df[has_no_nan_values(df, benchmark_cols)]
     # make values clickable and link to log files
     for col in benchmark_cols:

     df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
     df = df[cols].round(decimals=2)
+    # TMP: Debug
+    print(df.shape())
     # filter out if any of the benchmarks have not been produced
     df = df[has_no_nan_values(df, benchmark_cols)]
+    # TMP: Debug
+    print(df.shape())
     # make values clickable and link to log files
     for col in benchmark_cols: