Clémentine commited on
Commit
3777786
·
1 Parent(s): 77c51de

Added rate limiting system to the leaderboard to prevent abuse

Browse files
Files changed (3) hide show
  1. app.py +14 -2
  2. src/load_from_hub.py +11 -3
  3. src/rate_limiting.py +16 -0
app.py CHANGED
@@ -26,6 +26,7 @@ from src.display_models.utils import (
26
  styled_warning,
27
  )
28
  from src.load_from_hub import get_evaluation_queue_df, get_leaderboard_df, is_model_on_hub, load_all_info_from_hub
 
29
 
30
  pd.set_option("display.precision", 1)
31
 
@@ -52,6 +53,9 @@ api = HfApi(token=H4_TOKEN)
52
  def restart_space():
53
  api.restart_space(repo_id="HuggingFaceH4/open_llm_leaderboard", token=H4_TOKEN)
54
 
 
 
 
55
 
56
  # Column selection
57
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
@@ -77,12 +81,12 @@ BENCHMARK_COLS = [
77
  ]
78
 
79
  ## LOAD INFO FROM HUB
80
- eval_queue, requested_models, eval_results = load_all_info_from_hub(
81
  QUEUE_REPO, RESULTS_REPO, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH
82
  )
83
 
84
  if not IS_PUBLIC:
85
- (eval_queue_private, requested_models_private, eval_results_private,) = load_all_info_from_hub(
86
  PRIVATE_QUEUE_REPO,
87
  PRIVATE_RESULTS_REPO,
88
  EVAL_REQUESTS_PATH_PRIVATE,
@@ -122,6 +126,14 @@ def add_new_eval(
122
  precision = precision.split(" ")[0]
123
  current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
124
 
 
 
 
 
 
 
 
 
125
  if model_type is None or model_type == "":
126
  return styled_error("Please select a model type.")
127
 
 
26
  styled_warning,
27
  )
28
  from src.load_from_hub import get_evaluation_queue_df, get_leaderboard_df, is_model_on_hub, load_all_info_from_hub
29
+ from src.rate_limiting import user_submission_permission
30
 
31
  pd.set_option("display.precision", 1)
32
 
 
53
  def restart_space():
54
  api.restart_space(repo_id="HuggingFaceH4/open_llm_leaderboard", token=H4_TOKEN)
55
 
56
+ # Rate limit variables
57
+ RATE_LIMIT_PERIOD = 7
58
+ RATE_LIMIT_QUOTA = 5
59
 
60
  # Column selection
61
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
 
81
  ]
82
 
83
  ## LOAD INFO FROM HUB
84
+ eval_queue, requested_models, eval_results, users_to_submission_dates = load_all_info_from_hub(
85
  QUEUE_REPO, RESULTS_REPO, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH
86
  )
87
 
88
  if not IS_PUBLIC:
89
+ (eval_queue_private, requested_models_private, eval_results_private, _) = load_all_info_from_hub(
90
  PRIVATE_QUEUE_REPO,
91
  PRIVATE_RESULTS_REPO,
92
  EVAL_REQUESTS_PATH_PRIVATE,
 
126
  precision = precision.split(" ")[0]
127
  current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
128
 
129
+ num_models_submitted_in_period = user_submission_permission(model, users_to_submission_dates, RATE_LIMIT_PERIOD)
130
+ if num_models_submitted_in_period > RATE_LIMIT_QUOTA:
131
+ error_msg = f"Organisation or user `{model.split('/')[0]}`"
132
+ error_msg += f"already has {num_models_submitted_in_period} model requests submitted to the leaderboard "
133
+ error_msg += f"in the last {RATE_LIMIT_PERIOD} days.\n"
134
+ error_msg += "Please wait a couple of days before resubmitting, so that everybody can enjoy using the leaderboard 🤗"
135
+ return styled_error(error_msg)
136
+
137
  if model_type is None or model_type == "":
138
  return styled_error("Please select a model type.")
139
 
src/load_from_hub.py CHANGED
@@ -4,6 +4,7 @@ import os
4
  import pandas as pd
5
  from huggingface_hub import Repository
6
  from transformers import AutoConfig
 
7
 
8
  from src.assets.hardcoded_evals import baseline, gpt4_values, gpt35_values
9
  from src.display_models.get_model_metadata import apply_metadata
@@ -16,6 +17,7 @@ IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", True))
16
  def get_all_requested_models(requested_models_dir: str) -> set[str]:
17
  depth = 1
18
  file_names = []
 
19
 
20
  for root, _, files in os.walk(requested_models_dir):
21
  current_depth = root.count(os.sep) - requested_models_dir.count(os.sep)
@@ -26,7 +28,13 @@ def get_all_requested_models(requested_models_dir: str) -> set[str]:
26
  info = json.load(f)
27
  file_names.append(f"{info['model']}_{info['revision']}_{info['precision']}")
28
 
29
- return set(file_names)
 
 
 
 
 
 
30
 
31
 
32
  def load_all_info_from_hub(QUEUE_REPO: str, RESULTS_REPO: str, QUEUE_PATH: str, RESULTS_PATH: str) -> list[Repository]:
@@ -50,9 +58,9 @@ def load_all_info_from_hub(QUEUE_REPO: str, RESULTS_REPO: str, QUEUE_PATH: str,
50
  )
51
  eval_results_repo.git_pull()
52
 
53
- requested_models = get_all_requested_models("eval-queue")
54
 
55
- return eval_queue_repo, requested_models, eval_results_repo
56
 
57
 
58
  def get_leaderboard_df(
 
4
  import pandas as pd
5
  from huggingface_hub import Repository
6
  from transformers import AutoConfig
7
+ from collections import defaultdict
8
 
9
  from src.assets.hardcoded_evals import baseline, gpt4_values, gpt35_values
10
  from src.display_models.get_model_metadata import apply_metadata
 
17
  def get_all_requested_models(requested_models_dir: str) -> set[str]:
18
  depth = 1
19
  file_names = []
20
+ users_to_submission_dates = defaultdict(list)
21
 
22
  for root, _, files in os.walk(requested_models_dir):
23
  current_depth = root.count(os.sep) - requested_models_dir.count(os.sep)
 
28
  info = json.load(f)
29
  file_names.append(f"{info['model']}_{info['revision']}_{info['precision']}")
30
 
31
+ # Select organisation
32
+ if info["model"].count("/") == 0 or "submitted_time" not in info:
33
+ continue
34
+ organisation, _ = info["model"].split("/")
35
+ users_to_submission_dates[organisation].append(info["submitted_time"])
36
+
37
+ return set(file_names), users_to_submission_dates
38
 
39
 
40
  def load_all_info_from_hub(QUEUE_REPO: str, RESULTS_REPO: str, QUEUE_PATH: str, RESULTS_PATH: str) -> list[Repository]:
 
58
  )
59
  eval_results_repo.git_pull()
60
 
61
+ requested_models, users_to_submission_dates = get_all_requested_models("eval-queue")
62
 
63
+ return eval_queue_repo, requested_models, eval_results_repo, users_to_submission_dates
64
 
65
 
66
  def get_leaderboard_df(
src/rate_limiting.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from datetime import datetime, timezone, timedelta
3
+
4
+
5
+ def user_submission_permission(submission_name, users_to_submission_dates, rate_limit_period):
6
+ org_or_user, _ = submission_name.split("/")
7
+ if org_or_user not in users_to_submission_dates:
8
+ return 0
9
+ submission_dates = sorted(users_to_submission_dates[org_or_user])
10
+
11
+ time_limit = (datetime.now(timezone.utc) - timedelta(days=rate_limit_period)).strftime("%Y-%m-%dT%H:%M:%SZ")
12
+ submissions_after_timelimit = [d for d in submission_dates if d > time_limit]
13
+
14
+ return len(submissions_after_timelimit)
15
+
16
+