Rank + others
Browse files
app.py
CHANGED
@@ -75,17 +75,7 @@ def init_leaderboard(dataframe):
|
|
75 |
|
76 |
ColumnFilter(AutoEvalColumn.model_source.name, type="checkboxgroup", label="Model Source"),
|
77 |
ColumnFilter(AutoEvalColumn.model_category.name, type="checkboxgroup", label="Model Category"),
|
78 |
-
|
79 |
-
ColumnFilter(
|
80 |
-
AutoEvalColumn.params.name,
|
81 |
-
type="slider",
|
82 |
-
min=0.01,
|
83 |
-
max=150,
|
84 |
-
label="Select the number of parameters (B)",
|
85 |
-
),
|
86 |
-
#ColumnFilter(
|
87 |
-
# AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
|
88 |
-
#),
|
89 |
],
|
90 |
bool_checkboxgroup_label="Hide models",
|
91 |
interactive=True,
|
|
|
75 |
|
76 |
ColumnFilter(AutoEvalColumn.model_source.name, type="checkboxgroup", label="Model Source"),
|
77 |
ColumnFilter(AutoEvalColumn.model_category.name, type="checkboxgroup", label="Model Category"),
|
78 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
],
|
80 |
bool_checkboxgroup_label="Hide models",
|
81 |
interactive=True,
|
results/open-ai/chatgpt-3.5-turbo_results_2025-04-21 16:28:50.730625.json
CHANGED
@@ -30,7 +30,7 @@
|
|
30 |
"model_sha": "NA",
|
31 |
"submitted_time": "2025-04-21 16:28:38",
|
32 |
"likes": -1,
|
33 |
-
"params":
|
34 |
"license": "closed",
|
35 |
"model_source": "API",
|
36 |
"model_category": "Large"
|
|
|
30 |
"model_sha": "NA",
|
31 |
"submitted_time": "2025-04-21 16:28:38",
|
32 |
"likes": -1,
|
33 |
+
"params": 999,
|
34 |
"license": "closed",
|
35 |
"model_source": "API",
|
36 |
"model_category": "Large"
|
src/display/utils.py
CHANGED
@@ -23,6 +23,8 @@ class ColumnContent:
|
|
23 |
## Leaderboard columns
|
24 |
auto_eval_column_dict = []
|
25 |
# Init
|
|
|
|
|
26 |
auto_eval_column_dict.append(["model_source", ColumnContent, ColumnContent("Source", "str", True, False)])
|
27 |
auto_eval_column_dict.append(["model_category", ColumnContent, ColumnContent("Category", "str", True, False)])
|
28 |
|
@@ -30,7 +32,7 @@ auto_eval_column_dict.append(["model_category", ColumnContent, ColumnContent("Ca
|
|
30 |
#auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
31 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
32 |
#Scores
|
33 |
-
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average
|
34 |
for eval_dim in EvalDimensions:
|
35 |
auto_eval_column_dict.append([eval_dim.name, ColumnContent, ColumnContent(eval_dim.value.col_name, "number", True)])
|
36 |
# Model information
|
@@ -39,9 +41,9 @@ for eval_dim in EvalDimensions:
|
|
39 |
#auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
40 |
#auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
|
41 |
#auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
|
42 |
-
auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("
|
43 |
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
44 |
-
auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("
|
45 |
#auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
|
46 |
#auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
47 |
|
|
|
23 |
## Leaderboard columns
|
24 |
auto_eval_column_dict = []
|
25 |
# Init
|
26 |
+
auto_eval_column_dict.append(["rank", ColumnContent, ColumnContent("Rank", "str", True, False)])
|
27 |
+
|
28 |
auto_eval_column_dict.append(["model_source", ColumnContent, ColumnContent("Source", "str", True, False)])
|
29 |
auto_eval_column_dict.append(["model_category", ColumnContent, ColumnContent("Category", "str", True, False)])
|
30 |
|
|
|
32 |
#auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
33 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
34 |
#Scores
|
35 |
+
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average", "number", True)])
|
36 |
for eval_dim in EvalDimensions:
|
37 |
auto_eval_column_dict.append([eval_dim.name, ColumnContent, ColumnContent(eval_dim.value.col_name, "number", True)])
|
38 |
# Model information
|
|
|
41 |
#auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
42 |
#auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
|
43 |
#auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
|
44 |
+
auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("License", "str", False)])
|
45 |
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
46 |
+
auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Popularity (Likes)", "number", False)])
|
47 |
#auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
|
48 |
#auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
49 |
|
src/leaderboard/read_evals.py
CHANGED
@@ -88,6 +88,9 @@ class EvalResult:
|
|
88 |
model=model,
|
89 |
model_source=config.get("model_source", ""),
|
90 |
model_category=config.get("model_category", ""),
|
|
|
|
|
|
|
91 |
results=results,
|
92 |
#precision=precision,
|
93 |
#revision= config.get("model_sha", ""),
|
@@ -104,9 +107,9 @@ class EvalResult:
|
|
104 |
|
105 |
#self.model_type = ModelType.from_str(request.get("model_type", ""))
|
106 |
#self.weight_type = WeightType[request.get("weight_type", "Original")]
|
107 |
-
self.license = request.get("license", "?")
|
108 |
-
self.likes = request.get("likes", 0)
|
109 |
-
self.
|
110 |
self.date = request.get("submitted_time", "")
|
111 |
except Exception:
|
112 |
print(f"Could not find request file for {self.org}/{self.model}") # with precision {self.precision.value.name}
|
|
|
88 |
model=model,
|
89 |
model_source=config.get("model_source", ""),
|
90 |
model_category=config.get("model_category", ""),
|
91 |
+
num_params=config.get("params", 0),
|
92 |
+
license=config.get("license", "?"),
|
93 |
+
likes=config.get("likes", -1),
|
94 |
results=results,
|
95 |
#precision=precision,
|
96 |
#revision= config.get("model_sha", ""),
|
|
|
107 |
|
108 |
#self.model_type = ModelType.from_str(request.get("model_type", ""))
|
109 |
#self.weight_type = WeightType[request.get("weight_type", "Original")]
|
110 |
+
#self.license = request.get("license", "?")
|
111 |
+
#self.likes = request.get("likes", 0)
|
112 |
+
#self.params = request.get("params", 0)
|
113 |
self.date = request.get("submitted_time", "")
|
114 |
except Exception:
|
115 |
print(f"Could not find request file for {self.org}/{self.model}") # with precision {self.precision.value.name}
|
src/populate.py
CHANGED
@@ -14,13 +14,17 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
14 |
all_data_json = [v.to_dict() for v in raw_data]
|
15 |
|
16 |
df = pd.DataFrame.from_records(all_data_json)
|
17 |
-
|
18 |
if not df.empty:
|
19 |
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
20 |
-
|
21 |
|
22 |
# filter out if any of the benchmarks have not been produced
|
23 |
df = df[has_no_nan_values(df, benchmark_cols)]
|
|
|
|
|
|
|
|
|
24 |
return df
|
25 |
else:
|
26 |
return pd.DataFrame(columns=cols)
|
|
|
14 |
all_data_json = [v.to_dict() for v in raw_data]
|
15 |
|
16 |
df = pd.DataFrame.from_records(all_data_json)
|
17 |
+
|
18 |
if not df.empty:
|
19 |
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
20 |
+
|
21 |
|
22 |
# filter out if any of the benchmarks have not been produced
|
23 |
df = df[has_no_nan_values(df, benchmark_cols)]
|
24 |
+
|
25 |
+
df.insert(0, "Rank", range(1, len(df) + 1))
|
26 |
+
df = df[cols].round(decimals=2)
|
27 |
+
print(df)
|
28 |
return df
|
29 |
else:
|
30 |
return pd.DataFrame(columns=cols)
|