Spaces:
Running
Running
[email protected]
commited on
Commit
·
e83e5e0
1
Parent(s):
2d95777
udpate
Browse files- src/leaderboard/read_evals.py +1 -1
- src/populate.py +2 -2
src/leaderboard/read_evals.py
CHANGED
@@ -109,7 +109,7 @@ class EvalResult:
|
|
109 |
|
110 |
def to_dict(self, task_class):
|
111 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
112 |
-
average = sum([v for v in self.results.values() if v is not None]) / len(
|
113 |
data_dict = {
|
114 |
"eval_name": self.eval_name, # not a column, just a save name,
|
115 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
|
|
109 |
|
110 |
def to_dict(self, task_class):
|
111 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
112 |
+
average = sum([v for v in self.results.values() if v is not None]) / len(task_class)
|
113 |
data_dict = {
|
114 |
"eval_name": self.eval_name, # not a column, just a save name,
|
115 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
src/populate.py
CHANGED
@@ -4,7 +4,7 @@ import os
|
|
4 |
import pandas as pd
|
5 |
|
6 |
from src.display.formatting import has_no_nan_values, make_clickable_model
|
7 |
-
from src.display.utils import
|
8 |
from src.leaderboard.read_evals import get_raw_eval_results
|
9 |
|
10 |
|
@@ -17,7 +17,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
17 |
|
18 |
df = pd.DataFrame.from_records(all_data_json)
|
19 |
print(df)
|
20 |
-
df = df.sort_values(by=[
|
21 |
df = df[cols].round(decimals=2)
|
22 |
|
23 |
# filter out if any of the benchmarks have not been produced
|
|
|
4 |
import pandas as pd
|
5 |
|
6 |
from src.display.formatting import has_no_nan_values, make_clickable_model
|
7 |
+
from src.display.utils import AutoEvalColumn, EvalQueueColumn
|
8 |
from src.leaderboard.read_evals import get_raw_eval_results
|
9 |
|
10 |
|
|
|
17 |
|
18 |
df = pd.DataFrame.from_records(all_data_json)
|
19 |
print(df)
|
20 |
+
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
21 |
df = df[cols].round(decimals=2)
|
22 |
|
23 |
# filter out if any of the benchmarks have not been produced
|