[email protected] commited on
Commit
e83e5e0
·
1 Parent(s): 2d95777
Files changed (2) hide show
  1. src/leaderboard/read_evals.py +1 -1
  2. src/populate.py +2 -2
src/leaderboard/read_evals.py CHANGED
@@ -109,7 +109,7 @@ class EvalResult:
109
 
110
  def to_dict(self, task_class):
111
  """Converts the Eval Result to a dict compatible with our dataframe display"""
112
- average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
113
  data_dict = {
114
  "eval_name": self.eval_name, # not a column, just a save name,
115
  AutoEvalColumn.precision.name: self.precision.value.name,
 
109
 
110
  def to_dict(self, task_class):
111
  """Converts the Eval Result to a dict compatible with our dataframe display"""
112
+ average = sum([v for v in self.results.values() if v is not None]) / len(task_class)
113
  data_dict = {
114
  "eval_name": self.eval_name, # not a column, just a save name,
115
  AutoEvalColumn.precision.name: self.precision.value.name,
src/populate.py CHANGED
@@ -4,7 +4,7 @@ import os
4
  import pandas as pd
5
 
6
  from src.display.formatting import has_no_nan_values, make_clickable_model
7
- from src.display.utils import AutoEvalColumnAsset, EvalQueueColumn
8
  from src.leaderboard.read_evals import get_raw_eval_results
9
 
10
 
@@ -17,7 +17,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
17
 
18
  df = pd.DataFrame.from_records(all_data_json)
19
  print(df)
20
- df = df.sort_values(by=[AutoEvalColumnAsset.average.name], ascending=False)
21
  df = df[cols].round(decimals=2)
22
 
23
  # filter out if any of the benchmarks have not been produced
 
4
  import pandas as pd
5
 
6
  from src.display.formatting import has_no_nan_values, make_clickable_model
7
+ from src.display.utils import AutoEvalColumn, EvalQueueColumn
8
  from src.leaderboard.read_evals import get_raw_eval_results
9
 
10
 
 
17
 
18
  df = pd.DataFrame.from_records(all_data_json)
19
  print(df)
20
+ df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
21
  df = df[cols].round(decimals=2)
22
 
23
  # filter out if any of the benchmarks have not been produced