Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update submission functionality
Browse files- src/display/utils.py +11 -0
- src/envs.py +3 -2
- src/submission/check_validity.py +21 -32
- src/submission/submit.py +43 -46
src/display/utils.py
CHANGED
@@ -81,6 +81,17 @@ class EvalQueueColumn: # Queue column
|
|
81 |
status = ColumnContent("status", "str", True)
|
82 |
|
83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
## All the model information that we might need
|
85 |
@dataclass
|
86 |
class ModelDetails:
|
|
|
81 |
status = ColumnContent("status", "str", True)
|
82 |
|
83 |
|
84 |
+
# This class is used to store the model data in the queue
|
85 |
+
@dataclass(frozen=True)
|
86 |
+
class EvalQueuedModel:
|
87 |
+
model: str
|
88 |
+
revision: str
|
89 |
+
precision: str
|
90 |
+
add_special_tokens: str
|
91 |
+
llm_jp_eval_version: str
|
92 |
+
vllm_version: str
|
93 |
+
|
94 |
+
|
95 |
## All the model information that we might need
|
96 |
@dataclass
|
97 |
class ModelDetails:
|
src/envs.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import os
|
|
|
2 |
|
3 |
from huggingface_hub import HfApi
|
4 |
|
@@ -14,9 +15,9 @@ QUEUE_REPO = f"{OWNER}/leaderboard-requests"
|
|
14 |
CONTENTS_REPO = f"{OWNER}/leaderboard-contents"
|
15 |
|
16 |
# If you setup a cache later, just change HF_HOME
|
17 |
-
CACHE_PATH = os.getenv("HF_HOME", ".")
|
18 |
|
19 |
# Local caches
|
20 |
-
EVAL_REQUESTS_PATH =
|
21 |
|
22 |
API = HfApi(token=TOKEN)
|
|
|
1 |
import os
|
2 |
+
import pathlib
|
3 |
|
4 |
from huggingface_hub import HfApi
|
5 |
|
|
|
15 |
CONTENTS_REPO = f"{OWNER}/leaderboard-contents"
|
16 |
|
17 |
# If you setup a cache later, just change HF_HOME
|
18 |
+
CACHE_PATH = pathlib.Path(os.getenv("HF_HOME", "."))
|
19 |
|
20 |
# Local caches
|
21 |
+
EVAL_REQUESTS_PATH = CACHE_PATH / "eval-queue"
|
22 |
|
23 |
API = HfApi(token=TOKEN)
|
src/submission/check_validity.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import json
|
2 |
import os
|
3 |
-
|
4 |
|
5 |
import huggingface_hub
|
6 |
import requests
|
@@ -9,7 +9,7 @@ from huggingface_hub.hf_api import ModelInfo
|
|
9 |
from transformers import AutoConfig
|
10 |
from transformers.models.auto.tokenization_auto import AutoTokenizer
|
11 |
|
12 |
-
from src.display.utils import
|
13 |
|
14 |
|
15 |
def check_model_card(repo_id: str) -> tuple[bool, str]:
|
@@ -92,34 +92,23 @@ def get_model_arch(model_info: ModelInfo):
|
|
92 |
return model_info.config.get("architectures", "Unknown")
|
93 |
|
94 |
|
95 |
-
def already_submitted_models(requested_models_dir:
|
96 |
"""Gather a list of already submitted models to avoid duplicates"""
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
file_names.append(
|
116 |
-
f"{info['model']}_{info['precision']}_{info['add_special_tokens']}_{version}_{vllm_version}"
|
117 |
-
)
|
118 |
-
|
119 |
-
# Select organisation
|
120 |
-
if info["model"].count("/") == 0 or "submitted_time" not in info:
|
121 |
-
continue
|
122 |
-
organisation, _ = info["model"].split("/")
|
123 |
-
users_to_submission_dates[organisation].append(info["submitted_time"])
|
124 |
-
|
125 |
-
return set(file_names), users_to_submission_dates
|
|
|
1 |
import json
|
2 |
import os
|
3 |
+
import pathlib
|
4 |
|
5 |
import huggingface_hub
|
6 |
import requests
|
|
|
9 |
from transformers import AutoConfig
|
10 |
from transformers.models.auto.tokenization_auto import AutoTokenizer
|
11 |
|
12 |
+
from src.display.utils import EvalQueuedModel
|
13 |
|
14 |
|
15 |
def check_model_card(repo_id: str) -> tuple[bool, str]:
|
|
|
92 |
return model_info.config.get("architectures", "Unknown")
|
93 |
|
94 |
|
95 |
+
def already_submitted_models(requested_models_dir: pathlib.Path) -> set[EvalQueuedModel]:
|
96 |
"""Gather a list of already submitted models to avoid duplicates"""
|
97 |
+
queued_models = set()
|
98 |
+
for json_path in requested_models_dir.glob("*/*.json"):
|
99 |
+
with json_path.open() as f:
|
100 |
+
info = json.load(f)
|
101 |
+
# Allow failed submissions to be re-submitted
|
102 |
+
if info["status"] == "FAILED":
|
103 |
+
continue
|
104 |
+
queued_models.add(
|
105 |
+
EvalQueuedModel(
|
106 |
+
model=info["model"],
|
107 |
+
revision=info["revision"],
|
108 |
+
precision=info["precision"],
|
109 |
+
add_special_tokens=info["add_special_tokens"],
|
110 |
+
llm_jp_eval_version=info["llm_jp_eval_version"],
|
111 |
+
vllm_version=info["vllm_version"],
|
112 |
+
)
|
113 |
+
)
|
114 |
+
return queued_models
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/submission/submit.py
CHANGED
@@ -1,63 +1,60 @@
|
|
1 |
import json
|
2 |
-
import os
|
3 |
from datetime import datetime, timezone
|
4 |
|
5 |
from src.display.formatting import styled_error, styled_message, styled_warning
|
6 |
-
from src.display.utils import LLMJpEvalVersion, VllmVersion
|
7 |
from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, TOKEN
|
8 |
from src.submission.check_validity import already_submitted_models, check_model_card, is_model_on_hub
|
9 |
|
10 |
-
REQUESTED_MODELS =
|
11 |
-
|
|
|
|
|
12 |
|
13 |
|
14 |
def add_new_eval(
|
15 |
-
|
16 |
revision: str,
|
17 |
precision: str,
|
18 |
model_type: str,
|
19 |
add_special_tokens: str,
|
20 |
):
|
21 |
global REQUESTED_MODELS
|
22 |
-
global USERS_TO_SUBMISSION_DATES
|
23 |
if not REQUESTED_MODELS:
|
24 |
-
REQUESTED_MODELS
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
|
|
44 |
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
45 |
|
46 |
if model_type is None or model_type == "":
|
47 |
return styled_error("Please select a model type.")
|
48 |
|
49 |
-
# Does the model actually exist?
|
50 |
-
if revision == "":
|
51 |
-
revision = "main"
|
52 |
-
|
53 |
# Is the model on the hub?
|
54 |
-
model_on_hub, error, _ = is_model_on_hub(model_name=
|
55 |
if not model_on_hub:
|
56 |
-
return styled_error(f'Model "{
|
57 |
|
58 |
# Is the model info correctly filled?
|
59 |
try:
|
60 |
-
model_info = API.model_info(repo_id=
|
61 |
except Exception:
|
62 |
return styled_error("Could not get your model information. Please fill it up properly.")
|
63 |
|
@@ -67,7 +64,7 @@ def add_new_eval(
|
|
67 |
except Exception:
|
68 |
return styled_error("Please select a license for your model")
|
69 |
|
70 |
-
modelcard_OK, error_msg = check_model_card(
|
71 |
if not modelcard_OK:
|
72 |
return styled_error(error_msg)
|
73 |
|
@@ -76,37 +73,37 @@ def add_new_eval(
|
|
76 |
|
77 |
eval_entry = {
|
78 |
"model_type": model_type,
|
79 |
-
"model":
|
80 |
"precision": precision,
|
81 |
"revision": revision,
|
82 |
"add_special_tokens": add_special_tokens,
|
83 |
-
"llm_jp_eval_version":
|
84 |
-
"vllm_version":
|
85 |
"status": "PENDING",
|
86 |
"submitted_time": current_time,
|
87 |
}
|
88 |
|
89 |
print("Creating eval file")
|
90 |
-
OUT_DIR =
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
)
|
95 |
|
96 |
-
with open(
|
97 |
f.write(json.dumps(eval_entry))
|
98 |
|
99 |
print("Uploading eval file")
|
100 |
API.upload_file(
|
101 |
path_or_fileobj=out_path,
|
102 |
-
path_in_repo=out_path.
|
103 |
repo_id=QUEUE_REPO,
|
104 |
repo_type="dataset",
|
105 |
-
commit_message=f"Add {
|
106 |
)
|
|
|
107 |
|
108 |
# Remove the local file
|
109 |
-
|
110 |
|
111 |
return styled_message(
|
112 |
"Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
|
|
|
1 |
import json
|
|
|
2 |
from datetime import datetime, timezone
|
3 |
|
4 |
from src.display.formatting import styled_error, styled_message, styled_warning
|
5 |
+
from src.display.utils import EvalQueuedModel, LLMJpEvalVersion, VllmVersion
|
6 |
from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, TOKEN
|
7 |
from src.submission.check_validity import already_submitted_models, check_model_card, is_model_on_hub
|
8 |
|
9 |
+
REQUESTED_MODELS: set[EvalQueuedModel] = set()
|
10 |
+
|
11 |
+
LLM_JP_EVAL_VERSION = LLMJpEvalVersion.current.value.name
|
12 |
+
VLLM_VERSION = VllmVersion.current.value.name
|
13 |
|
14 |
|
15 |
def add_new_eval(
|
16 |
+
model_id: str,
|
17 |
revision: str,
|
18 |
precision: str,
|
19 |
model_type: str,
|
20 |
add_special_tokens: str,
|
21 |
):
|
22 |
global REQUESTED_MODELS
|
|
|
23 |
if not REQUESTED_MODELS:
|
24 |
+
REQUESTED_MODELS = already_submitted_models(EVAL_REQUESTS_PATH)
|
25 |
+
|
26 |
+
revision = revision or "main"
|
27 |
+
|
28 |
+
model_data = EvalQueuedModel(
|
29 |
+
model=model_id,
|
30 |
+
revision=revision,
|
31 |
+
precision=precision,
|
32 |
+
add_special_tokens=add_special_tokens,
|
33 |
+
llm_jp_eval_version=LLM_JP_EVAL_VERSION,
|
34 |
+
vllm_version=VLLM_VERSION,
|
35 |
+
)
|
36 |
+
|
37 |
+
if model_data in REQUESTED_MODELS:
|
38 |
+
return styled_warning("This model has already been submitted with the same configuration.")
|
39 |
+
|
40 |
+
if "/" in model_id:
|
41 |
+
user_or_org, model_name = model_id.split("/")
|
42 |
+
else:
|
43 |
+
user_or_org, model_name = "", model_id
|
44 |
+
|
45 |
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
46 |
|
47 |
if model_type is None or model_type == "":
|
48 |
return styled_error("Please select a model type.")
|
49 |
|
|
|
|
|
|
|
|
|
50 |
# Is the model on the hub?
|
51 |
+
model_on_hub, error, _ = is_model_on_hub(model_name=model_id, revision=revision, token=TOKEN, test_tokenizer=True)
|
52 |
if not model_on_hub:
|
53 |
+
return styled_error(f'Model "{model_id}" {error}')
|
54 |
|
55 |
# Is the model info correctly filled?
|
56 |
try:
|
57 |
+
model_info = API.model_info(repo_id=model_id, revision=revision)
|
58 |
except Exception:
|
59 |
return styled_error("Could not get your model information. Please fill it up properly.")
|
60 |
|
|
|
64 |
except Exception:
|
65 |
return styled_error("Please select a license for your model")
|
66 |
|
67 |
+
modelcard_OK, error_msg = check_model_card(model_id)
|
68 |
if not modelcard_OK:
|
69 |
return styled_error(error_msg)
|
70 |
|
|
|
73 |
|
74 |
eval_entry = {
|
75 |
"model_type": model_type,
|
76 |
+
"model": model_id,
|
77 |
"precision": precision,
|
78 |
"revision": revision,
|
79 |
"add_special_tokens": add_special_tokens,
|
80 |
+
"llm_jp_eval_version": LLM_JP_EVAL_VERSION,
|
81 |
+
"vllm_version": VLLM_VERSION,
|
82 |
"status": "PENDING",
|
83 |
"submitted_time": current_time,
|
84 |
}
|
85 |
|
86 |
print("Creating eval file")
|
87 |
+
OUT_DIR = EVAL_REQUESTS_PATH / user_or_org
|
88 |
+
OUT_DIR.mkdir(parents=True, exist_ok=True)
|
89 |
+
out_file_name = f"{model_name}_eval_request_False_{precision}_{add_special_tokens}_{VLLM_VERSION}.json"
|
90 |
+
out_path = OUT_DIR / out_file_name
|
|
|
91 |
|
92 |
+
with out_path.open("w") as f:
|
93 |
f.write(json.dumps(eval_entry))
|
94 |
|
95 |
print("Uploading eval file")
|
96 |
API.upload_file(
|
97 |
path_or_fileobj=out_path,
|
98 |
+
path_in_repo=out_path.relative_to(EVAL_REQUESTS_PATH).as_posix(),
|
99 |
repo_id=QUEUE_REPO,
|
100 |
repo_type="dataset",
|
101 |
+
commit_message=f"Add {model_id} to eval queue",
|
102 |
)
|
103 |
+
REQUESTED_MODELS.add(model_data)
|
104 |
|
105 |
# Remove the local file
|
106 |
+
out_path.unlink()
|
107 |
|
108 |
return styled_message(
|
109 |
"Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
|