Alvinn-aai commited on
Commit
b3d34ad
·
1 Parent(s): 9d4d10e

slight refactor, doc

Browse files
Files changed (1) hide show
  1. src/submission/submit.py +10 -8
src/submission/submit.py CHANGED
@@ -35,10 +35,6 @@ def add_new_solutions(
35
  is_warmup_dataset: bool,
36
  ensure_all_present: bool = False,
37
  ):
38
- # Rate limits:
39
- # 1. Users must wait MIN_WAIT_TIME_PER_USER_HRS hours between submissions.
40
- # 2. No more than MAX_SUBMISSIONS_PER_WINDOW submissions RATE_LIMIT_WINDOW_HRS hours overall.
41
-
42
  try:
43
  submitted_ids = get_dataset_config_names(SUBMISSIONS_REPO, token=TOKEN)
44
  except (DatasetNotFoundError, FileNotFoundError):
@@ -46,14 +42,19 @@ def add_new_solutions(
46
 
47
  logger.info(f"Found {len(submitted_ids)} submissions")
48
 
 
 
 
 
49
  sub_df = pd.DataFrame.from_dict(
50
  {
51
  "submission_id": submitted_ids,
52
- "user_id": map(submission_id_to_user_id, submitted_ids),
53
- "timestamp": map(submission_id_to_timestamp, submitted_ids),
54
  }
55
  )
56
 
 
57
  now = datetime.now(timezone.utc)
58
  cutoff_user = now - timedelta(hours=MIN_WAIT_TIME_PER_USER_HRS)
59
  user_last_submission_ts = sub_df[sub_df.user_id == user_id].timestamp.max()
@@ -66,6 +67,7 @@ def add_new_solutions(
66
  f"Remaining wait time: {remaining_hrs:.2f} hours"
67
  )
68
 
 
69
  cutoff_overall = now - timedelta(hours=RATE_LIMIT_WINDOW_HRS)
70
  if len(sub_df.timestamp > cutoff_overall) >= MAX_SUBMISSIONS_PER_WINDOW:
71
  logger.info(
@@ -174,14 +176,14 @@ def _validate_all_submissions_present(
174
  return ValueError("Duplicate problem IDs exist in uploaded file")
175
 
176
 
177
- def submission_id_to_user_id(submission_id: str) -> str:
178
  """
179
  Extracts the user ID from the submission ID: "YYYYMMDD_HHMMSS_username_userid"
180
  """
181
  return submission_id.rsplit("_", 1)[-1]
182
 
183
 
184
- def submission_id_to_timestamp(submission_id: str) -> datetime:
185
  """
186
  Extracts the timestamp from the submission ID: "YYYYMMDD_HHMMSS_username_userid"
187
  """
 
35
  is_warmup_dataset: bool,
36
  ensure_all_present: bool = False,
37
  ):
 
 
 
 
38
  try:
39
  submitted_ids = get_dataset_config_names(SUBMISSIONS_REPO, token=TOKEN)
40
  except (DatasetNotFoundError, FileNotFoundError):
 
42
 
43
  logger.info(f"Found {len(submitted_ids)} submissions")
44
 
45
+ # Rate limits:
46
+ # 1. Users must wait MIN_WAIT_TIME_PER_USER_HRS hours between submissions.
47
+ # 2. No more than MAX_SUBMISSIONS_PER_WINDOW submissions RATE_LIMIT_WINDOW_HRS hours overall.
48
+
49
  sub_df = pd.DataFrame.from_dict(
50
  {
51
  "submission_id": submitted_ids,
52
+ "user_id": map(_submission_id_to_user_id, submitted_ids),
53
+ "timestamp": map(_submission_id_to_timestamp, submitted_ids),
54
  }
55
  )
56
 
57
+ # Per user limit
58
  now = datetime.now(timezone.utc)
59
  cutoff_user = now - timedelta(hours=MIN_WAIT_TIME_PER_USER_HRS)
60
  user_last_submission_ts = sub_df[sub_df.user_id == user_id].timestamp.max()
 
67
  f"Remaining wait time: {remaining_hrs:.2f} hours"
68
  )
69
 
70
+ # Overall limit
71
  cutoff_overall = now - timedelta(hours=RATE_LIMIT_WINDOW_HRS)
72
  if len(sub_df.timestamp > cutoff_overall) >= MAX_SUBMISSIONS_PER_WINDOW:
73
  logger.info(
 
176
  return ValueError("Duplicate problem IDs exist in uploaded file")
177
 
178
 
179
+ def _submission_id_to_user_id(submission_id: str) -> str:
180
  """
181
  Extracts the user ID from the submission ID: "YYYYMMDD_HHMMSS_username_userid"
182
  """
183
  return submission_id.rsplit("_", 1)[-1]
184
 
185
 
186
+ def _submission_id_to_timestamp(submission_id: str) -> datetime:
187
  """
188
  Extracts the timestamp from the submission ID: "YYYYMMDD_HHMMSS_username_userid"
189
  """