Spaces:
Runtime error
Runtime error
lixuejing
commited on
Commit
·
a28724a
1
Parent(s):
8545ff9
update
Browse files- src/envs.py +6 -0
- src/submission/check_validity.py +53 -0
src/envs.py
CHANGED
@@ -31,4 +31,10 @@ DYNAMIC_INFO_PATH = os.path.join(CACHE_PATH, "dynamic-info")
|
|
31 |
DYNAMIC_INFO_FILE_PATH = os.path.join(DYNAMIC_INFO_PATH, "model_infos.json")
|
32 |
|
33 |
PATH_TO_COLLECTION = "open-cn-llm-leaderboard/flageval-vlm-leaderboard-best-models-677e51cdc44f8123e02cbda1"
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
API = HfApi(token=TOKEN)
|
|
|
31 |
DYNAMIC_INFO_FILE_PATH = os.path.join(DYNAMIC_INFO_PATH, "model_infos.json")
|
32 |
|
33 |
PATH_TO_COLLECTION = "open-cn-llm-leaderboard/flageval-vlm-leaderboard-best-models-677e51cdc44f8123e02cbda1"
|
34 |
+
|
35 |
+
# Rate limit variables
|
36 |
+
RATE_LIMIT_PERIOD = 7
|
37 |
+
RATE_LIMIT_QUOTA = 5
|
38 |
+
HAS_HIGHER_RATE_LIMIT = ["TheBloke"]
|
39 |
+
|
40 |
API = HfApi(token=TOKEN)
|
src/submission/check_validity.py
CHANGED
@@ -10,6 +10,8 @@ from huggingface_hub.hf_api import ModelInfo
|
|
10 |
from transformers import AutoConfig
|
11 |
from transformers.models.auto.tokenization_auto import AutoTokenizer
|
12 |
|
|
|
|
|
13 |
def check_model_card(repo_id: str) -> tuple[bool, str]:
|
14 |
"""Checks if the model card and license exist and have been filled"""
|
15 |
try:
|
@@ -97,3 +99,54 @@ def already_submitted_models(requested_models_dir: str) -> set[str]:
|
|
97 |
users_to_submission_dates[organisation].append(info["submitted_time"])
|
98 |
|
99 |
return set(file_names), users_to_submission_dates
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
from transformers import AutoConfig
|
11 |
from transformers.models.auto.tokenization_auto import AutoTokenizer
|
12 |
|
13 |
+
from src.envs import HAS_HIGHER_RATE_LIMIT
|
14 |
+
|
15 |
def check_model_card(repo_id: str) -> tuple[bool, str]:
|
16 |
"""Checks if the model card and license exist and have been filled"""
|
17 |
try:
|
|
|
99 |
users_to_submission_dates[organisation].append(info["submitted_time"])
|
100 |
|
101 |
return set(file_names), users_to_submission_dates
|
102 |
+
|
103 |
+
def user_submission_permission(org_or_user, users_to_submission_dates, rate_limit_period, rate_limit_quota):
|
104 |
+
if org_or_user not in users_to_submission_dates:
|
105 |
+
return True, ""
|
106 |
+
submission_dates = sorted(users_to_submission_dates[org_or_user])
|
107 |
+
|
108 |
+
time_limit = (datetime.now(timezone.utc) - timedelta(days=rate_limit_period)).strftime("%Y-%m-%dT%H:%M:%SZ")
|
109 |
+
submissions_after_timelimit = [d for d in submission_dates if d > time_limit]
|
110 |
+
|
111 |
+
num_models_submitted_in_period = len(submissions_after_timelimit)
|
112 |
+
if org_or_user in HAS_HIGHER_RATE_LIMIT:
|
113 |
+
rate_limit_quota = 2 * rate_limit_quota
|
114 |
+
|
115 |
+
if num_models_submitted_in_period > rate_limit_quota:
|
116 |
+
error_msg = f"Organisation or user `{org_or_user}`"
|
117 |
+
error_msg += f"already has {num_models_submitted_in_period} model requests submitted to the leaderboard "
|
118 |
+
error_msg += f"in the last {rate_limit_period} days.\n"
|
119 |
+
error_msg += (
|
120 |
+
"Please wait a couple of days before resubmitting, so that everybody can enjoy using the leaderboard 🤗"
|
121 |
+
)
|
122 |
+
return False, error_msg
|
123 |
+
return True, ""
|
124 |
+
|
125 |
+
def get_model_tags(model_card, model: str):
|
126 |
+
is_merge_from_metadata = False
|
127 |
+
is_moe_from_metadata = False
|
128 |
+
|
129 |
+
tags = []
|
130 |
+
if model_card is None:
|
131 |
+
return tags
|
132 |
+
if model_card.data.tags:
|
133 |
+
is_merge_from_metadata = "merge" in model_card.data.tags
|
134 |
+
is_moe_from_metadata = "moe" in model_card.data.tags
|
135 |
+
merge_keywords = ["merged model", "merge model"]
|
136 |
+
# If the model is a merge but not saying it in the metadata, we flag it
|
137 |
+
is_merge_from_model_card = any(keyword in model_card.text.lower() for keyword in merge_keywords)
|
138 |
+
if is_merge_from_model_card or is_merge_from_metadata:
|
139 |
+
tags.append("merge")
|
140 |
+
if not is_merge_from_metadata:
|
141 |
+
tags.append("flagged:undisclosed_merge")
|
142 |
+
moe_keywords = ["moe", "mixtral"]
|
143 |
+
is_moe_from_model_card = any(keyword in model_card.text.lower() for keyword in moe_keywords)
|
144 |
+
is_moe_from_name = "moe" in model.lower().replace("/", "-").replace("_", "-").split("-")
|
145 |
+
if is_moe_from_model_card or is_moe_from_name or is_moe_from_metadata:
|
146 |
+
tags.append("moe")
|
147 |
+
# We no longer tag undisclosed MoEs
|
148 |
+
#if not is_moe_from_metadata:
|
149 |
+
# tags.append("flagged:undisclosed_moe")
|
150 |
+
|
151 |
+
|
152 |
+
return tags
|