lixuejing commited on
Commit
a28724a
·
1 Parent(s): 8545ff9
Files changed (2) hide show
  1. src/envs.py +6 -0
  2. src/submission/check_validity.py +53 -0
src/envs.py CHANGED
@@ -31,4 +31,10 @@ DYNAMIC_INFO_PATH = os.path.join(CACHE_PATH, "dynamic-info")
31
  DYNAMIC_INFO_FILE_PATH = os.path.join(DYNAMIC_INFO_PATH, "model_infos.json")
32
 
33
  PATH_TO_COLLECTION = "open-cn-llm-leaderboard/flageval-vlm-leaderboard-best-models-677e51cdc44f8123e02cbda1"
 
 
 
 
 
 
34
  API = HfApi(token=TOKEN)
 
31
  DYNAMIC_INFO_FILE_PATH = os.path.join(DYNAMIC_INFO_PATH, "model_infos.json")
32
 
33
  PATH_TO_COLLECTION = "open-cn-llm-leaderboard/flageval-vlm-leaderboard-best-models-677e51cdc44f8123e02cbda1"
34
+
35
+ # Rate limit variables
36
+ RATE_LIMIT_PERIOD = 7
37
+ RATE_LIMIT_QUOTA = 5
38
+ HAS_HIGHER_RATE_LIMIT = ["TheBloke"]
39
+
40
  API = HfApi(token=TOKEN)
src/submission/check_validity.py CHANGED
@@ -10,6 +10,8 @@ from huggingface_hub.hf_api import ModelInfo
10
  from transformers import AutoConfig
11
  from transformers.models.auto.tokenization_auto import AutoTokenizer
12
 
 
 
13
  def check_model_card(repo_id: str) -> tuple[bool, str]:
14
  """Checks if the model card and license exist and have been filled"""
15
  try:
@@ -97,3 +99,54 @@ def already_submitted_models(requested_models_dir: str) -> set[str]:
97
  users_to_submission_dates[organisation].append(info["submitted_time"])
98
 
99
  return set(file_names), users_to_submission_dates
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  from transformers import AutoConfig
11
  from transformers.models.auto.tokenization_auto import AutoTokenizer
12
 
13
+ from src.envs import HAS_HIGHER_RATE_LIMIT
14
+
15
  def check_model_card(repo_id: str) -> tuple[bool, str]:
16
  """Checks if the model card and license exist and have been filled"""
17
  try:
 
99
  users_to_submission_dates[organisation].append(info["submitted_time"])
100
 
101
  return set(file_names), users_to_submission_dates
102
+
103
+ def user_submission_permission(org_or_user, users_to_submission_dates, rate_limit_period, rate_limit_quota):
104
+ if org_or_user not in users_to_submission_dates:
105
+ return True, ""
106
+ submission_dates = sorted(users_to_submission_dates[org_or_user])
107
+
108
+ time_limit = (datetime.now(timezone.utc) - timedelta(days=rate_limit_period)).strftime("%Y-%m-%dT%H:%M:%SZ")
109
+ submissions_after_timelimit = [d for d in submission_dates if d > time_limit]
110
+
111
+ num_models_submitted_in_period = len(submissions_after_timelimit)
112
+ if org_or_user in HAS_HIGHER_RATE_LIMIT:
113
+ rate_limit_quota = 2 * rate_limit_quota
114
+
115
+ if num_models_submitted_in_period > rate_limit_quota:
116
+ error_msg = f"Organisation or user `{org_or_user}`"
117
+ error_msg += f"already has {num_models_submitted_in_period} model requests submitted to the leaderboard "
118
+ error_msg += f"in the last {rate_limit_period} days.\n"
119
+ error_msg += (
120
+ "Please wait a couple of days before resubmitting, so that everybody can enjoy using the leaderboard 🤗"
121
+ )
122
+ return False, error_msg
123
+ return True, ""
124
+
125
+ def get_model_tags(model_card, model: str):
126
+ is_merge_from_metadata = False
127
+ is_moe_from_metadata = False
128
+
129
+ tags = []
130
+ if model_card is None:
131
+ return tags
132
+ if model_card.data.tags:
133
+ is_merge_from_metadata = "merge" in model_card.data.tags
134
+ is_moe_from_metadata = "moe" in model_card.data.tags
135
+ merge_keywords = ["merged model", "merge model"]
136
+ # If the model is a merge but not saying it in the metadata, we flag it
137
+ is_merge_from_model_card = any(keyword in model_card.text.lower() for keyword in merge_keywords)
138
+ if is_merge_from_model_card or is_merge_from_metadata:
139
+ tags.append("merge")
140
+ if not is_merge_from_metadata:
141
+ tags.append("flagged:undisclosed_merge")
142
+ moe_keywords = ["moe", "mixtral"]
143
+ is_moe_from_model_card = any(keyword in model_card.text.lower() for keyword in moe_keywords)
144
+ is_moe_from_name = "moe" in model.lower().replace("/", "-").replace("_", "-").split("-")
145
+ if is_moe_from_model_card or is_moe_from_name or is_moe_from_metadata:
146
+ tags.append("moe")
147
+ # We no longer tag undisclosed MoEs
148
+ #if not is_moe_from_metadata:
149
+ # tags.append("flagged:undisclosed_moe")
150
+
151
+
152
+ return tags