Spaces:
Sleeping
Sleeping
Clémentine
commited on
Commit
·
0c7ef71
1
Parent(s):
9d02a6b
wip
Browse files- app.py +42 -23
- src/envs.py +3 -0
- src/leaderboard/read_evals.py +19 -43
- src/populate.py +2 -2
- {scripts → src/scripts}/create_request_file.py +5 -20
- src/scripts/update_all_request_files.py +97 -0
- src/submission/check_validity.py +13 -7
- src/submission/submit.py +52 -3
app.py
CHANGED
@@ -27,7 +27,7 @@ from src.display.utils import (
|
|
27 |
WeightType,
|
28 |
Precision
|
29 |
)
|
30 |
-
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO
|
31 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
32 |
from src.submission.submit import add_new_eval
|
33 |
from src.tools.collections import update_collections
|
@@ -43,33 +43,52 @@ enable_space_ci()
|
|
43 |
def restart_space():
|
44 |
API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
|
45 |
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
)
|
58 |
-
|
59 |
-
|
60 |
|
|
|
61 |
|
62 |
-
|
63 |
-
|
64 |
-
|
|
|
|
|
65 |
|
66 |
-
plot_df
|
67 |
|
68 |
-
(
|
69 |
-
finished_eval_queue_df,
|
70 |
-
running_eval_queue_df,
|
71 |
-
pending_eval_queue_df,
|
72 |
-
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
73 |
|
74 |
|
75 |
# Searching and filtering
|
|
|
27 |
WeightType,
|
28 |
Precision
|
29 |
)
|
30 |
+
from src.envs import API, EVAL_REQUESTS_PATH, DYNAMIC_INFO_REPO, DYNAMIC_INFO_FILE_PATH, DYNAMIC_INFO_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO
|
31 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
32 |
from src.submission.submit import add_new_eval
|
33 |
from src.tools.collections import update_collections
|
|
|
43 |
def restart_space():
|
44 |
API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
|
45 |
|
46 |
+
|
47 |
+
def init_space():
|
48 |
+
try:
|
49 |
+
print(EVAL_REQUESTS_PATH)
|
50 |
+
snapshot_download(
|
51 |
+
repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
|
52 |
+
)
|
53 |
+
except Exception:
|
54 |
+
restart_space()
|
55 |
+
try:
|
56 |
+
print(DYNAMIC_INFO_PATH)
|
57 |
+
snapshot_download(
|
58 |
+
repo_id=DYNAMIC_INFO_REPO, local_dir=DYNAMIC_INFO_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
|
59 |
+
)
|
60 |
+
except Exception:
|
61 |
+
restart_space()
|
62 |
+
try:
|
63 |
+
print(EVAL_RESULTS_PATH)
|
64 |
+
snapshot_download(
|
65 |
+
repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
|
66 |
+
)
|
67 |
+
except Exception:
|
68 |
+
restart_space()
|
69 |
+
|
70 |
+
|
71 |
+
raw_data, original_df = get_leaderboard_df(
|
72 |
+
results_path=EVAL_RESULTS_PATH,
|
73 |
+
requests_path=EVAL_REQUESTS_PATH,
|
74 |
+
dynamic_path=DYNAMIC_INFO_FILE_PATH,
|
75 |
+
cols=COLS,
|
76 |
+
benchmark_cols=BENCHMARK_COLS
|
77 |
)
|
78 |
+
update_collections(original_df.copy())
|
79 |
+
leaderboard_df = original_df.copy()
|
80 |
|
81 |
+
plot_df = create_plot_df(create_scores_df(raw_data))
|
82 |
|
83 |
+
(
|
84 |
+
finished_eval_queue_df,
|
85 |
+
running_eval_queue_df,
|
86 |
+
pending_eval_queue_df,
|
87 |
+
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
88 |
|
89 |
+
return leaderboard_df, original_df, plot_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
|
90 |
|
91 |
+
leaderboard_df, original_df, plot_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
|
|
|
|
|
|
|
|
|
92 |
|
93 |
|
94 |
# Searching and filtering
|
src/envs.py
CHANGED
@@ -7,6 +7,7 @@ H4_TOKEN = os.environ.get("H4_TOKEN", None)
|
|
7 |
|
8 |
REPO_ID = "HuggingFaceH4/open_llm_leaderboard"
|
9 |
QUEUE_REPO = "open-llm-leaderboard/requests"
|
|
|
10 |
RESULTS_REPO = "open-llm-leaderboard/results"
|
11 |
|
12 |
PRIVATE_QUEUE_REPO = "open-llm-leaderboard/private-requests"
|
@@ -18,6 +19,8 @@ CACHE_PATH=os.getenv("HF_HOME", ".")
|
|
18 |
|
19 |
EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
|
20 |
EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
|
|
|
|
|
21 |
|
22 |
EVAL_REQUESTS_PATH_PRIVATE = "eval-queue-private"
|
23 |
EVAL_RESULTS_PATH_PRIVATE = "eval-results-private"
|
|
|
7 |
|
8 |
REPO_ID = "HuggingFaceH4/open_llm_leaderboard"
|
9 |
QUEUE_REPO = "open-llm-leaderboard/requests"
|
10 |
+
DYNAMIC_INFO_REPO = "open-llm-leaderboard/dynamic_model_information"
|
11 |
RESULTS_REPO = "open-llm-leaderboard/results"
|
12 |
|
13 |
PRIVATE_QUEUE_REPO = "open-llm-leaderboard/private-requests"
|
|
|
19 |
|
20 |
EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
|
21 |
EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
|
22 |
+
DYNAMIC_INFO_PATH = os.path.join(CACHE_PATH, "dynamic-info")
|
23 |
+
DYNAMIC_INFO_FILE_PATH = os.path.join(DYNAMIC_INFO_PATH, "model_infos.json")
|
24 |
|
25 |
EVAL_REQUESTS_PATH_PRIVATE = "eval-queue-private"
|
26 |
EVAL_RESULTS_PATH_PRIVATE = "eval-results-private"
|
src/leaderboard/read_evals.py
CHANGED
@@ -11,7 +11,6 @@ from huggingface_hub import ModelCard
|
|
11 |
|
12 |
from src.display.formatting import make_clickable_model
|
13 |
from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
|
14 |
-
from src.submission.check_validity import is_model_on_hub, check_model_card
|
15 |
|
16 |
|
17 |
@dataclass
|
@@ -34,6 +33,7 @@ class EvalResult:
|
|
34 |
still_on_hub: bool = False
|
35 |
is_merge: bool = False
|
36 |
flagged: bool = False
|
|
|
37 |
|
38 |
@classmethod
|
39 |
def init_from_json_file(self, json_filepath):
|
@@ -42,13 +42,13 @@ class EvalResult:
|
|
42 |
data = json.load(fp)
|
43 |
|
44 |
# We manage the legacy config format
|
45 |
-
config = data.get("
|
46 |
|
47 |
# Precision
|
48 |
precision = Precision.from_str(config.get("model_dtype"))
|
49 |
|
50 |
# Get model and org
|
51 |
-
org_and_model = config.get("model_name"
|
52 |
org_and_model = org_and_model.split("/", 1)
|
53 |
|
54 |
if len(org_and_model) == 1:
|
@@ -61,37 +61,6 @@ class EvalResult:
|
|
61 |
result_key = f"{org}_{model}_{precision.value.name}"
|
62 |
full_model = "/".join(org_and_model)
|
63 |
|
64 |
-
still_on_hub, error, model_config = is_model_on_hub(
|
65 |
-
full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
|
66 |
-
)
|
67 |
-
architecture = "?"
|
68 |
-
if model_config is not None:
|
69 |
-
architectures = getattr(model_config, "architectures", None)
|
70 |
-
if architectures:
|
71 |
-
architecture = ";".join(architectures)
|
72 |
-
|
73 |
-
# If the model doesn't have a model card or a license, we consider it's deleted
|
74 |
-
if still_on_hub:
|
75 |
-
try:
|
76 |
-
if check_model_card(full_model)[0] is False:
|
77 |
-
still_on_hub = False
|
78 |
-
except Exception:
|
79 |
-
still_on_hub = False
|
80 |
-
|
81 |
-
# Check if the model is a merge
|
82 |
-
is_merge_from_metadata = False
|
83 |
-
flagged = False
|
84 |
-
if still_on_hub:
|
85 |
-
model_card = ModelCard.load(full_model)
|
86 |
-
|
87 |
-
if model_card.data.tags:
|
88 |
-
is_merge_from_metadata = "merge" in model_card.data.tags
|
89 |
-
merge_keywords = ["mergekit", "merged model", "merge model", "merging"]
|
90 |
-
# If the model is a merge but not saying it in the metadata, we flag it
|
91 |
-
is_merge_from_model_card = any(keyword in model_card.text.lower() for keyword in merge_keywords)
|
92 |
-
flagged = is_merge_from_model_card and not is_merge_from_metadata
|
93 |
-
|
94 |
-
|
95 |
# Extract results available in this file (some results are split in several files)
|
96 |
results = {}
|
97 |
for task in Tasks:
|
@@ -128,10 +97,6 @@ class EvalResult:
|
|
128 |
results=results,
|
129 |
precision=precision,
|
130 |
revision= config.get("model_sha", ""),
|
131 |
-
still_on_hub=still_on_hub,
|
132 |
-
architecture=architecture,
|
133 |
-
is_merge=is_merge_from_metadata,
|
134 |
-
flagged=flagged,
|
135 |
)
|
136 |
|
137 |
def update_with_request_file(self, requests_path):
|
@@ -143,13 +108,21 @@ class EvalResult:
|
|
143 |
request = json.load(f)
|
144 |
self.model_type = ModelType.from_str(request.get("model_type", ""))
|
145 |
self.weight_type = WeightType[request.get("weight_type", "Original")]
|
146 |
-
self.license = request.get("license", "?")
|
147 |
-
self.likes = request.get("likes", 0)
|
148 |
self.num_params = request.get("params", 0)
|
149 |
self.date = request.get("submitted_time", "")
|
|
|
150 |
except Exception:
|
151 |
print(f"Could not find request file for {self.org}/{self.model}")
|
152 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
def to_dict(self):
|
154 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
155 |
average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
@@ -158,7 +131,7 @@ class EvalResult:
|
|
158 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
159 |
AutoEvalColumn.model_type.name: self.model_type.value.name,
|
160 |
AutoEvalColumn.merged.name: self.is_merge,
|
161 |
-
AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
|
162 |
AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
163 |
AutoEvalColumn.architecture.name: self.architecture,
|
164 |
AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
@@ -170,7 +143,6 @@ class EvalResult:
|
|
170 |
AutoEvalColumn.params.name: self.num_params,
|
171 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
172 |
AutoEvalColumn.flagged.name: self.flagged
|
173 |
-
|
174 |
}
|
175 |
|
176 |
for task in Tasks:
|
@@ -201,7 +173,7 @@ def get_request_file_for_model(requests_path, model_name, precision):
|
|
201 |
return request_file
|
202 |
|
203 |
|
204 |
-
def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
|
205 |
"""From the path of the results folder root, extract all needed info for results"""
|
206 |
model_result_filepaths = []
|
207 |
|
@@ -219,11 +191,15 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
219 |
for file in files:
|
220 |
model_result_filepaths.append(os.path.join(root, file))
|
221 |
|
|
|
|
|
|
|
222 |
eval_results = {}
|
223 |
for model_result_filepath in model_result_filepaths:
|
224 |
# Creation of result
|
225 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
226 |
eval_result.update_with_request_file(requests_path)
|
|
|
227 |
|
228 |
# Store results of same eval together
|
229 |
eval_name = eval_result.eval_name
|
|
|
11 |
|
12 |
from src.display.formatting import make_clickable_model
|
13 |
from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
|
|
|
14 |
|
15 |
|
16 |
@dataclass
|
|
|
33 |
still_on_hub: bool = False
|
34 |
is_merge: bool = False
|
35 |
flagged: bool = False
|
36 |
+
tags: list = None
|
37 |
|
38 |
@classmethod
|
39 |
def init_from_json_file(self, json_filepath):
|
|
|
42 |
data = json.load(fp)
|
43 |
|
44 |
# We manage the legacy config format
|
45 |
+
config = data.get("config_general")
|
46 |
|
47 |
# Precision
|
48 |
precision = Precision.from_str(config.get("model_dtype"))
|
49 |
|
50 |
# Get model and org
|
51 |
+
org_and_model = config.get("model_name")
|
52 |
org_and_model = org_and_model.split("/", 1)
|
53 |
|
54 |
if len(org_and_model) == 1:
|
|
|
61 |
result_key = f"{org}_{model}_{precision.value.name}"
|
62 |
full_model = "/".join(org_and_model)
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
# Extract results available in this file (some results are split in several files)
|
65 |
results = {}
|
66 |
for task in Tasks:
|
|
|
97 |
results=results,
|
98 |
precision=precision,
|
99 |
revision= config.get("model_sha", ""),
|
|
|
|
|
|
|
|
|
100 |
)
|
101 |
|
102 |
def update_with_request_file(self, requests_path):
|
|
|
108 |
request = json.load(f)
|
109 |
self.model_type = ModelType.from_str(request.get("model_type", ""))
|
110 |
self.weight_type = WeightType[request.get("weight_type", "Original")]
|
|
|
|
|
111 |
self.num_params = request.get("params", 0)
|
112 |
self.date = request.get("submitted_time", "")
|
113 |
+
self.architecture = request["architectures"]
|
114 |
except Exception:
|
115 |
print(f"Could not find request file for {self.org}/{self.model}")
|
116 |
|
117 |
+
def update_with_dynamic_file_dict(self, file_dict):
|
118 |
+
self.license = file_dict.get("license", "?")
|
119 |
+
self.likes = file_dict.get("likes", 0)
|
120 |
+
self.still_on_hub = file_dict["still_on_hub"]
|
121 |
+
self.flagged = any("flagged" in tag for tag in file_dict["tags"])
|
122 |
+
self.is_merge = "merge" in file_dict["tags"]
|
123 |
+
self.tags = file_dict["tags"]
|
124 |
+
|
125 |
+
|
126 |
def to_dict(self):
|
127 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
128 |
average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
|
|
131 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
132 |
AutoEvalColumn.model_type.name: self.model_type.value.name,
|
133 |
AutoEvalColumn.merged.name: self.is_merge,
|
134 |
+
AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
|
135 |
AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
136 |
AutoEvalColumn.architecture.name: self.architecture,
|
137 |
AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
|
|
143 |
AutoEvalColumn.params.name: self.num_params,
|
144 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
145 |
AutoEvalColumn.flagged.name: self.flagged
|
|
|
146 |
}
|
147 |
|
148 |
for task in Tasks:
|
|
|
173 |
return request_file
|
174 |
|
175 |
|
176 |
+
def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: str) -> list[EvalResult]:
|
177 |
"""From the path of the results folder root, extract all needed info for results"""
|
178 |
model_result_filepaths = []
|
179 |
|
|
|
191 |
for file in files:
|
192 |
model_result_filepaths.append(os.path.join(root, file))
|
193 |
|
194 |
+
with open(dynamic_path) as f:
|
195 |
+
dynamic_data = json.load(f)
|
196 |
+
|
197 |
eval_results = {}
|
198 |
for model_result_filepath in model_result_filepaths:
|
199 |
# Creation of result
|
200 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
201 |
eval_result.update_with_request_file(requests_path)
|
202 |
+
eval_result.update_with_dynamic_file_dict(dynamic_data[eval_result.full_model])
|
203 |
|
204 |
# Store results of same eval together
|
205 |
eval_name = eval_result.eval_name
|
src/populate.py
CHANGED
@@ -9,8 +9,8 @@ from src.leaderboard.filter_models import filter_models
|
|
9 |
from src.leaderboard.read_evals import get_raw_eval_results
|
10 |
|
11 |
|
12 |
-
def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
|
13 |
-
raw_data = get_raw_eval_results(results_path, requests_path)
|
14 |
all_data_json = [v.to_dict() for v in raw_data]
|
15 |
all_data_json.append(baseline_row)
|
16 |
filter_models(all_data_json)
|
|
|
9 |
from src.leaderboard.read_evals import get_raw_eval_results
|
10 |
|
11 |
|
12 |
+
def get_leaderboard_df(results_path: str, requests_path: str, dynamic_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
|
13 |
+
raw_data = get_raw_eval_results(results_path=results_path, requests_path=requests_path, dynamic_path=dynamic_path)
|
14 |
all_data_json = [v.to_dict() for v in raw_data]
|
15 |
all_data_json.append(baseline_row)
|
16 |
filter_models(all_data_json)
|
{scripts → src/scripts}/create_request_file.py
RENAMED
@@ -1,36 +1,21 @@
|
|
1 |
import json
|
2 |
import os
|
3 |
import pprint
|
4 |
-
import re
|
5 |
from datetime import datetime, timezone
|
6 |
|
7 |
import click
|
8 |
from colorama import Fore
|
9 |
from huggingface_hub import HfApi, snapshot_download
|
10 |
|
|
|
|
|
|
|
11 |
EVAL_REQUESTS_PATH = "eval-queue"
|
12 |
QUEUE_REPO = "open-llm-leaderboard/requests"
|
13 |
|
14 |
precisions = ("float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)", "GPTQ")
|
15 |
-
model_types =
|
16 |
-
weight_types =
|
17 |
-
|
18 |
-
|
19 |
-
def get_model_size(model_info, precision: str):
|
20 |
-
size_pattern = re.compile(r"(\d+\.)?\d+(b|m)")
|
21 |
-
try:
|
22 |
-
model_size = round(model_info.safetensors["total"] / 1e9, 3)
|
23 |
-
except (AttributeError, TypeError):
|
24 |
-
try:
|
25 |
-
size_match = re.search(size_pattern, model_info.modelId.lower())
|
26 |
-
model_size = size_match.group(0)
|
27 |
-
model_size = round(float(model_size[:-1]) if model_size[-1] == "b" else float(model_size[:-1]) / 1e3, 3)
|
28 |
-
except AttributeError:
|
29 |
-
return 0 # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
|
30 |
-
|
31 |
-
size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.modelId.lower()) else 1
|
32 |
-
model_size = size_factor * model_size
|
33 |
-
return model_size
|
34 |
|
35 |
|
36 |
def main():
|
|
|
1 |
import json
|
2 |
import os
|
3 |
import pprint
|
|
|
4 |
from datetime import datetime, timezone
|
5 |
|
6 |
import click
|
7 |
from colorama import Fore
|
8 |
from huggingface_hub import HfApi, snapshot_download
|
9 |
|
10 |
+
from src.submission.check_validity import get_model_size
|
11 |
+
from src.display.utils import ModelType, WeightType
|
12 |
+
|
13 |
EVAL_REQUESTS_PATH = "eval-queue"
|
14 |
QUEUE_REPO = "open-llm-leaderboard/requests"
|
15 |
|
16 |
precisions = ("float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)", "GPTQ")
|
17 |
+
model_types = [e.name for e in ModelType]
|
18 |
+
weight_types = [e.name for e in WeightType]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
|
21 |
def main():
|
src/scripts/update_all_request_files.py
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from huggingface_hub import HfApi, ModelFilter, snapshot_download
|
2 |
+
from huggingface_hub import ModelCard
|
3 |
+
|
4 |
+
import json
|
5 |
+
import os
|
6 |
+
import time
|
7 |
+
import shutil
|
8 |
+
from src.submission.check_validity import is_model_on_hub, check_model_card, get_model_size
|
9 |
+
from src.envs import DYNAMIC_INFO_REPO, DYNAMIC_INFO_FILE_PATH, API
|
10 |
+
|
11 |
+
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
12 |
+
|
13 |
+
TMP_FOLDER = "tmp_requests"
|
14 |
+
snapshot_download(
|
15 |
+
repo_id=DYNAMIC_INFO_REPO, local_dir=TMP_FOLDER, repo_type="dataset", tqdm_class=None, etag_timeout=30
|
16 |
+
)
|
17 |
+
|
18 |
+
# Get models
|
19 |
+
start = time.time()
|
20 |
+
|
21 |
+
models = list(API.list_models(
|
22 |
+
filter=ModelFilter(task="text-generation"),
|
23 |
+
full=False,
|
24 |
+
cardData=True,
|
25 |
+
fetch_config=True,
|
26 |
+
))
|
27 |
+
|
28 |
+
print(f"Downloaded list of models in {time.time() - start:.2f} seconds")
|
29 |
+
|
30 |
+
def update_models(file_path, models):
|
31 |
+
"""
|
32 |
+
Search through all JSON files in the specified root folder and its subfolders,
|
33 |
+
and update the likes key in JSON dict from value of input dict
|
34 |
+
"""
|
35 |
+
with open(file_path, "r") as f:
|
36 |
+
model_infos = json.load(f)
|
37 |
+
for model_id, data in model_infos.items():
|
38 |
+
if model_id not in models:
|
39 |
+
continue
|
40 |
+
|
41 |
+
model_cfg = models[model_id]
|
42 |
+
data['likes'] = model_cfg.likes
|
43 |
+
#data['params'] = get_model_size(model_cfg, data['precision'])
|
44 |
+
data['license'] = model_cfg.card_data.license if model_cfg.card_data is not None else ""
|
45 |
+
|
46 |
+
# Is the model still on the hub
|
47 |
+
still_on_hub, error, model_config = is_model_on_hub(
|
48 |
+
model_name=model_id, revision=data.get("revision"), trust_remote_code=True, test_tokenizer=False
|
49 |
+
)
|
50 |
+
# If the model doesn't have a model card or a license, we consider it's deleted
|
51 |
+
if still_on_hub:
|
52 |
+
try:
|
53 |
+
if check_model_card(model_id)[0] is False:
|
54 |
+
still_on_hub = False
|
55 |
+
except Exception:
|
56 |
+
still_on_hub = False
|
57 |
+
data['still_on_hub'] = still_on_hub
|
58 |
+
|
59 |
+
# Check if the model is a merge
|
60 |
+
is_merge_from_metadata = False
|
61 |
+
if still_on_hub:
|
62 |
+
model_card = ModelCard.load(model_id)
|
63 |
+
|
64 |
+
# Storing the model metadata
|
65 |
+
tags = []
|
66 |
+
if model_card.data.tags:
|
67 |
+
is_merge_from_metadata = "merge" in model_card.data.tags
|
68 |
+
merge_keywords = ["mergekit", "merged model", "merge model", "merging"]
|
69 |
+
# If the model is a merge but not saying it in the metadata, we flag it
|
70 |
+
is_merge_from_model_card = any(keyword in model_card.text.lower() for keyword in merge_keywords)
|
71 |
+
if is_merge_from_model_card:
|
72 |
+
tags.append("merge")
|
73 |
+
if not is_merge_from_metadata:
|
74 |
+
tags.append("flagged:undisclosed_merge")
|
75 |
+
if "moe" in model_card.data.tags:
|
76 |
+
tags.append("moe")
|
77 |
+
|
78 |
+
data["tags"] = tags
|
79 |
+
|
80 |
+
with open(file_path, 'w') as f:
|
81 |
+
json.dump(model_infos, f, indent=2)
|
82 |
+
|
83 |
+
start = time.time()
|
84 |
+
|
85 |
+
updated_ids = update_models(DYNAMIC_INFO_FILE_PATH, models)
|
86 |
+
|
87 |
+
print(f"updated in {time.time() - start:.2f} seconds, updated ids: {len(updated_ids)}")
|
88 |
+
|
89 |
+
API.upload_file(
|
90 |
+
path_or_fileobj=DYNAMIC_INFO_FILE_PATH,
|
91 |
+
path_in_repo=DYNAMIC_INFO_FILE_PATH.split("/")[-1],
|
92 |
+
repo_id=DYNAMIC_INFO_REPO,
|
93 |
+
repo_type="dataset",
|
94 |
+
commit_message=f"Daily request file update.",
|
95 |
+
)
|
96 |
+
|
97 |
+
shutil.rmtree(TMP_FOLDER)
|
src/submission/check_validity.py
CHANGED
@@ -6,7 +6,7 @@ from datetime import datetime, timedelta, timezone
|
|
6 |
|
7 |
import huggingface_hub
|
8 |
from huggingface_hub import ModelCard
|
9 |
-
from huggingface_hub.hf_api import ModelInfo
|
10 |
from transformers import AutoConfig, AutoTokenizer
|
11 |
|
12 |
from src.envs import HAS_HIGHER_RATE_LIMIT
|
@@ -36,7 +36,7 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
|
|
36 |
return True, ""
|
37 |
|
38 |
|
39 |
-
def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
|
40 |
try:
|
41 |
config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token) #, force_download=True)
|
42 |
if test_tokenizer:
|
@@ -65,17 +65,23 @@ def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_rem
|
|
65 |
|
66 |
def get_model_size(model_info: ModelInfo, precision: str):
|
67 |
size_pattern = re.compile(r"(\d+\.)?\d+(b|m)")
|
|
|
68 |
try:
|
69 |
-
|
70 |
-
except
|
|
|
|
|
|
|
|
|
|
|
71 |
try:
|
72 |
-
size_match = re.search(size_pattern, model_info.
|
73 |
model_size = size_match.group(0)
|
74 |
model_size = round(float(model_size[:-1]) if model_size[-1] == "b" else float(model_size[:-1]) / 1e3, 3)
|
75 |
-
except AttributeError:
|
76 |
return 0 # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
|
77 |
|
78 |
-
size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.
|
79 |
model_size = size_factor * model_size
|
80 |
return model_size
|
81 |
|
|
|
6 |
|
7 |
import huggingface_hub
|
8 |
from huggingface_hub import ModelCard
|
9 |
+
from huggingface_hub.hf_api import ModelInfo, get_safetensors_metadata
|
10 |
from transformers import AutoConfig, AutoTokenizer
|
11 |
|
12 |
from src.envs import HAS_HIGHER_RATE_LIMIT
|
|
|
36 |
return True, ""
|
37 |
|
38 |
|
39 |
+
def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str, AutoConfig]:
|
40 |
try:
|
41 |
config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token) #, force_download=True)
|
42 |
if test_tokenizer:
|
|
|
65 |
|
66 |
def get_model_size(model_info: ModelInfo, precision: str):
|
67 |
size_pattern = re.compile(r"(\d+\.)?\d+(b|m)")
|
68 |
+
safetensors = None
|
69 |
try:
|
70 |
+
safetensors = get_safetensors_metadata(model_info.id)
|
71 |
+
except Exception as e:
|
72 |
+
print(e)
|
73 |
+
|
74 |
+
if safetensors is not None:
|
75 |
+
model_size = round(sum(safetensors.parameter_count.values()) / 1e9, 3)
|
76 |
+
else:
|
77 |
try:
|
78 |
+
size_match = re.search(size_pattern, model_info.id.lower())
|
79 |
model_size = size_match.group(0)
|
80 |
model_size = round(float(model_size[:-1]) if model_size[-1] == "b" else float(model_size[:-1]) / 1e3, 3)
|
81 |
+
except AttributeError as e:
|
82 |
return 0 # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
|
83 |
|
84 |
+
size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.id.lower()) else 1
|
85 |
model_size = size_factor * model_size
|
86 |
return model_size
|
87 |
|
src/submission/submit.py
CHANGED
@@ -2,8 +2,10 @@ import json
|
|
2 |
import os
|
3 |
from datetime import datetime, timezone
|
4 |
|
|
|
|
|
5 |
from src.display.formatting import styled_error, styled_message, styled_warning
|
6 |
-
from src.envs import API, EVAL_REQUESTS_PATH, H4_TOKEN, QUEUE_REPO, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA
|
7 |
from src.leaderboard.filter_models import DO_NOT_SUBMIT_MODELS
|
8 |
from src.submission.check_validity import (
|
9 |
already_submitted_models,
|
@@ -65,9 +67,15 @@ def add_new_eval(
|
|
65 |
return styled_error(f'Base model "{base_model}" {error}')
|
66 |
|
67 |
if not weight_type == "Adapter":
|
68 |
-
model_on_hub, error,
|
69 |
if not model_on_hub:
|
70 |
return styled_error(f'Model "{model}" {error}')
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
# Is the model info correctly filled?
|
73 |
try:
|
@@ -86,6 +94,22 @@ def add_new_eval(
|
|
86 |
modelcard_OK, error_msg = check_model_card(model)
|
87 |
if not modelcard_OK:
|
88 |
return styled_error(error_msg)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
# Seems good, creating the eval
|
91 |
print("Adding new eval")
|
@@ -96,13 +120,21 @@ def add_new_eval(
|
|
96 |
"revision": revision,
|
97 |
"private": private,
|
98 |
"precision": precision,
|
|
|
|
|
99 |
"weight_type": weight_type,
|
100 |
"status": "PENDING",
|
101 |
"submitted_time": current_time,
|
102 |
"model_type": model_type,
|
|
|
|
|
|
|
|
|
|
|
103 |
"likes": model_info.likes,
|
104 |
-
"params": model_size,
|
105 |
"license": license,
|
|
|
|
|
106 |
}
|
107 |
|
108 |
# Check for duplicate submission
|
@@ -126,6 +158,23 @@ def add_new_eval(
|
|
126 |
commit_message=f"Add {model} to eval queue",
|
127 |
)
|
128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
# Remove the local file
|
130 |
os.remove(out_path)
|
131 |
|
|
|
2 |
import os
|
3 |
from datetime import datetime, timezone
|
4 |
|
5 |
+
from huggingface_hub import ModelCard
|
6 |
+
|
7 |
from src.display.formatting import styled_error, styled_message, styled_warning
|
8 |
+
from src.envs import API, EVAL_REQUESTS_PATH, DYNAMIC_INFO_PATH, DYNAMIC_INFO_FILE_PATH, DYNAMIC_INFO_REPO, H4_TOKEN, QUEUE_REPO, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA
|
9 |
from src.leaderboard.filter_models import DO_NOT_SUBMIT_MODELS
|
10 |
from src.submission.check_validity import (
|
11 |
already_submitted_models,
|
|
|
67 |
return styled_error(f'Base model "{base_model}" {error}')
|
68 |
|
69 |
if not weight_type == "Adapter":
|
70 |
+
model_on_hub, error, model_config = is_model_on_hub(model_name=model, revision=revision, test_tokenizer=True)
|
71 |
if not model_on_hub:
|
72 |
return styled_error(f'Model "{model}" {error}')
|
73 |
+
architecture = "?"
|
74 |
+
if model_config is not None:
|
75 |
+
architectures = getattr(model_config, "architectures", None)
|
76 |
+
if architectures:
|
77 |
+
architecture = ";".join(architectures)
|
78 |
+
|
79 |
|
80 |
# Is the model info correctly filled?
|
81 |
try:
|
|
|
94 |
modelcard_OK, error_msg = check_model_card(model)
|
95 |
if not modelcard_OK:
|
96 |
return styled_error(error_msg)
|
97 |
+
|
98 |
+
# Storing the model tags
|
99 |
+
tags = []
|
100 |
+
|
101 |
+
model_card = ModelCard.load(model)
|
102 |
+
is_merge_from_metadata = "merge" in model_card.data.tags if model_card.data.tags else False
|
103 |
+
merge_keywords = ["mergekit", "merged model", "merge model", "merging"]
|
104 |
+
# If the model is a merge but not saying it in the metadata, we flag it
|
105 |
+
is_merge_from_model_card = any(keyword in model_card.text.lower() for keyword in merge_keywords)
|
106 |
+
if is_merge_from_model_card:
|
107 |
+
tags.append("merge")
|
108 |
+
if not is_merge_from_metadata:
|
109 |
+
tags.append("flagged:undisclosed_merge")
|
110 |
+
if "moe" in model_card.data.tags:
|
111 |
+
tags.append("moe")
|
112 |
+
|
113 |
|
114 |
# Seems good, creating the eval
|
115 |
print("Adding new eval")
|
|
|
120 |
"revision": revision,
|
121 |
"private": private,
|
122 |
"precision": precision,
|
123 |
+
"params": model_size,
|
124 |
+
"architectures": architecture,
|
125 |
"weight_type": weight_type,
|
126 |
"status": "PENDING",
|
127 |
"submitted_time": current_time,
|
128 |
"model_type": model_type,
|
129 |
+
"job_id": -1,
|
130 |
+
"job_start_time": None,
|
131 |
+
}
|
132 |
+
|
133 |
+
supplementary_info = {
|
134 |
"likes": model_info.likes,
|
|
|
135 |
"license": license,
|
136 |
+
"still_on_hub": True,
|
137 |
+
"tags": tags,
|
138 |
}
|
139 |
|
140 |
# Check for duplicate submission
|
|
|
158 |
commit_message=f"Add {model} to eval queue",
|
159 |
)
|
160 |
|
161 |
+
with open(DYNAMIC_INFO_FILE_PATH) as f:
|
162 |
+
all_supplementary_info = json.load(f)
|
163 |
+
|
164 |
+
all_supplementary_info[model] = supplementary_info
|
165 |
+
with open(DYNAMIC_INFO_FILE_PATH, "w") as f:
|
166 |
+
json.dump(all_supplementary_info, f, indent=2)
|
167 |
+
|
168 |
+
API.upload_file(
|
169 |
+
path_or_fileobj=DYNAMIC_INFO_FILE_PATH,
|
170 |
+
path_in_repo=DYNAMIC_INFO_FILE_PATH.split("/")[-1],
|
171 |
+
repo_id=DYNAMIC_INFO_REPO,
|
172 |
+
repo_type="dataset",
|
173 |
+
commit_message=f"Add {model} to dynamic info queue",
|
174 |
+
)
|
175 |
+
|
176 |
+
|
177 |
+
|
178 |
# Remove the local file
|
179 |
os.remove(out_path)
|
180 |
|