Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
b3d34ad
1
Parent(s):
9d4d10e
slight refactor, doc
Browse files- src/submission/submit.py +10 -8
src/submission/submit.py
CHANGED
@@ -35,10 +35,6 @@ def add_new_solutions(
|
|
35 |
is_warmup_dataset: bool,
|
36 |
ensure_all_present: bool = False,
|
37 |
):
|
38 |
-
# Rate limits:
|
39 |
-
# 1. Users must wait MIN_WAIT_TIME_PER_USER_HRS hours between submissions.
|
40 |
-
# 2. No more than MAX_SUBMISSIONS_PER_WINDOW submissions RATE_LIMIT_WINDOW_HRS hours overall.
|
41 |
-
|
42 |
try:
|
43 |
submitted_ids = get_dataset_config_names(SUBMISSIONS_REPO, token=TOKEN)
|
44 |
except (DatasetNotFoundError, FileNotFoundError):
|
@@ -46,14 +42,19 @@ def add_new_solutions(
|
|
46 |
|
47 |
logger.info(f"Found {len(submitted_ids)} submissions")
|
48 |
|
|
|
|
|
|
|
|
|
49 |
sub_df = pd.DataFrame.from_dict(
|
50 |
{
|
51 |
"submission_id": submitted_ids,
|
52 |
-
"user_id": map(
|
53 |
-
"timestamp": map(
|
54 |
}
|
55 |
)
|
56 |
|
|
|
57 |
now = datetime.now(timezone.utc)
|
58 |
cutoff_user = now - timedelta(hours=MIN_WAIT_TIME_PER_USER_HRS)
|
59 |
user_last_submission_ts = sub_df[sub_df.user_id == user_id].timestamp.max()
|
@@ -66,6 +67,7 @@ def add_new_solutions(
|
|
66 |
f"Remaining wait time: {remaining_hrs:.2f} hours"
|
67 |
)
|
68 |
|
|
|
69 |
cutoff_overall = now - timedelta(hours=RATE_LIMIT_WINDOW_HRS)
|
70 |
if len(sub_df.timestamp > cutoff_overall) >= MAX_SUBMISSIONS_PER_WINDOW:
|
71 |
logger.info(
|
@@ -174,14 +176,14 @@ def _validate_all_submissions_present(
|
|
174 |
return ValueError("Duplicate problem IDs exist in uploaded file")
|
175 |
|
176 |
|
177 |
-
def
|
178 |
"""
|
179 |
Extracts the user ID from the submission ID: "YYYYMMDD_HHMMSS_username_userid"
|
180 |
"""
|
181 |
return submission_id.rsplit("_", 1)[-1]
|
182 |
|
183 |
|
184 |
-
def
|
185 |
"""
|
186 |
Extracts the timestamp from the submission ID: "YYYYMMDD_HHMMSS_username_userid"
|
187 |
"""
|
|
|
35 |
is_warmup_dataset: bool,
|
36 |
ensure_all_present: bool = False,
|
37 |
):
|
|
|
|
|
|
|
|
|
38 |
try:
|
39 |
submitted_ids = get_dataset_config_names(SUBMISSIONS_REPO, token=TOKEN)
|
40 |
except (DatasetNotFoundError, FileNotFoundError):
|
|
|
42 |
|
43 |
logger.info(f"Found {len(submitted_ids)} submissions")
|
44 |
|
45 |
+
# Rate limits:
|
46 |
+
# 1. Users must wait MIN_WAIT_TIME_PER_USER_HRS hours between submissions.
|
47 |
+
# 2. No more than MAX_SUBMISSIONS_PER_WINDOW submissions RATE_LIMIT_WINDOW_HRS hours overall.
|
48 |
+
|
49 |
sub_df = pd.DataFrame.from_dict(
|
50 |
{
|
51 |
"submission_id": submitted_ids,
|
52 |
+
"user_id": map(_submission_id_to_user_id, submitted_ids),
|
53 |
+
"timestamp": map(_submission_id_to_timestamp, submitted_ids),
|
54 |
}
|
55 |
)
|
56 |
|
57 |
+
# Per user limit
|
58 |
now = datetime.now(timezone.utc)
|
59 |
cutoff_user = now - timedelta(hours=MIN_WAIT_TIME_PER_USER_HRS)
|
60 |
user_last_submission_ts = sub_df[sub_df.user_id == user_id].timestamp.max()
|
|
|
67 |
f"Remaining wait time: {remaining_hrs:.2f} hours"
|
68 |
)
|
69 |
|
70 |
+
# Overall limit
|
71 |
cutoff_overall = now - timedelta(hours=RATE_LIMIT_WINDOW_HRS)
|
72 |
if len(sub_df.timestamp > cutoff_overall) >= MAX_SUBMISSIONS_PER_WINDOW:
|
73 |
logger.info(
|
|
|
176 |
return ValueError("Duplicate problem IDs exist in uploaded file")
|
177 |
|
178 |
|
179 |
+
def _submission_id_to_user_id(submission_id: str) -> str:
|
180 |
"""
|
181 |
Extracts the user ID from the submission ID: "YYYYMMDD_HHMMSS_username_userid"
|
182 |
"""
|
183 |
return submission_id.rsplit("_", 1)[-1]
|
184 |
|
185 |
|
186 |
+
def _submission_id_to_timestamp(submission_id: str) -> datetime:
|
187 |
"""
|
188 |
Extracts the timestamp from the submission ID: "YYYYMMDD_HHMMSS_username_userid"
|
189 |
"""
|