Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
b74992f
1
Parent(s):
80fb2c0
improve validation, testing wip
Browse files- app.py +8 -6
- src/envs.py +1 -1
- src/submission/submit.py +19 -8
app.py
CHANGED
@@ -38,12 +38,15 @@ from src.submission.submit import add_new_solutions
|
|
38 |
|
39 |
logger = get_logger(__name__)
|
40 |
|
|
|
|
|
|
|
41 |
|
42 |
def restart_space():
|
43 |
API.restart_space(repo_id=REPO_ID)
|
44 |
|
45 |
|
46 |
-
lbdb = F1Data(cp_ds_name=CODE_PROBLEMS_REPO, sub_ds_name=SUBMISSIONS_REPO, res_ds_name=RESULTS_REPO)
|
47 |
|
48 |
logger.info("Initialized LBDB")
|
49 |
|
@@ -165,7 +168,6 @@ with demo:
|
|
165 |
interactive=True,
|
166 |
)
|
167 |
|
168 |
-
|
169 |
# with gr.Column():
|
170 |
submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"])
|
171 |
# precision = gr.Dropdown(
|
@@ -188,10 +190,10 @@ with demo:
|
|
188 |
submit_button = gr.Button("Submit")
|
189 |
submission_result = gr.Markdown()
|
190 |
|
191 |
-
def add_solution_cbk(system_name,
|
192 |
-
|
193 |
-
sys_type, submission_path
|
194 |
-
|
195 |
|
196 |
submit_button.click(
|
197 |
add_solution_cbk,
|
|
|
38 |
|
39 |
logger = get_logger(__name__)
|
40 |
|
41 |
+
SPLIT = "warmup" # TODO temp
|
42 |
+
SKIP_VALIDATION = True # TODO temp
|
43 |
+
|
44 |
|
45 |
def restart_space():
|
46 |
API.restart_space(repo_id=REPO_ID)
|
47 |
|
48 |
|
49 |
+
lbdb = F1Data(cp_ds_name=CODE_PROBLEMS_REPO, sub_ds_name=SUBMISSIONS_REPO, res_ds_name=RESULTS_REPO, split=SPLIT)
|
50 |
|
51 |
logger.info("Initialized LBDB")
|
52 |
|
|
|
168 |
interactive=True,
|
169 |
)
|
170 |
|
|
|
171 |
# with gr.Column():
|
172 |
submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"])
|
173 |
# precision = gr.Dropdown(
|
|
|
190 |
submit_button = gr.Button("Submit")
|
191 |
submission_result = gr.Markdown()
|
192 |
|
193 |
+
def add_solution_cbk(system_name, org, sys_type, submission_path):
|
194 |
+
return add_new_solutions(
|
195 |
+
lbdb, system_name, org, sys_type, submission_path, skip_validation=SKIP_VALIDATION
|
196 |
+
)
|
197 |
|
198 |
submit_button.click(
|
199 |
add_solution_cbk,
|
src/envs.py
CHANGED
@@ -14,7 +14,7 @@ SUBMISSIONS_REPO = f"{OWNER}/dev-f1-leaderboard-submissions"
|
|
14 |
RESULTS_REPO = f"{OWNER}/dev-f1-leaderboard-results"
|
15 |
|
16 |
# If you setup a cache later, just change HF_HOME
|
17 |
-
CACHE_PATH=os.getenv("HF_HOME", ".")
|
18 |
|
19 |
print(f"{TOKEN=}")
|
20 |
print(f"{REPO_ID=}")
|
|
|
14 |
RESULTS_REPO = f"{OWNER}/dev-f1-leaderboard-results"
|
15 |
|
16 |
# If you setup a cache later, just change HF_HOME
|
17 |
+
CACHE_PATH = os.getenv("HF_HOME", ".")
|
18 |
|
19 |
print(f"{TOKEN=}")
|
20 |
print(f"{REPO_ID=}")
|
src/submission/submit.py
CHANGED
@@ -3,8 +3,9 @@ import os
|
|
3 |
from datetime import datetime, timezone
|
4 |
import time
|
5 |
|
6 |
-
from datasets import Dataset
|
7 |
import pandas as pd
|
|
|
8 |
|
9 |
from src.datamodel.data import F1Data
|
10 |
from src.display.formatting import styled_error, styled_message, styled_warning
|
@@ -25,19 +26,24 @@ logger = get_logger(__name__)
|
|
25 |
def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str | None:
|
26 |
logger.info("Validating DS size %d columns %s set %s", len(pd_ds), pd_ds.columns, set(pd_ds.columns))
|
27 |
expected_cols = ["problem_id", "solution"]
|
|
|
28 |
if set(pd_ds.columns) != set(expected_cols):
|
29 |
return f"Expected attributes: {expected_cols}, Got: {pd_ds.columns.tolist()}"
|
30 |
-
|
31 |
-
|
|
|
|
|
32 |
if any(type(v) != str for v in pd_ds["solution"]):
|
33 |
return "solution must be of type str"
|
34 |
-
|
|
|
35 |
if submitted_ids != lbdb.code_problem_ids:
|
36 |
missing = lbdb.code_problem_ids - submitted_ids
|
37 |
unknown = submitted_ids - lbdb.code_problem_ids
|
38 |
return f"Mismatched problem IDs: {len(missing)} missing, {len(unknown)} unknown"
|
39 |
if len(pd_ds) > len(lbdb.code_problem_ids):
|
40 |
return "Duplicate problem IDs exist in uploaded file"
|
|
|
41 |
return None
|
42 |
|
43 |
|
@@ -47,6 +53,7 @@ def add_new_solutions(
|
|
47 |
org: str,
|
48 |
sys_type: str,
|
49 |
submission_path: str,
|
|
|
50 |
):
|
51 |
logger.info("ADD SUBMISSION! %s path %s", str((system_name, org, sys_type)), submission_path)
|
52 |
if not system_name:
|
@@ -67,9 +74,10 @@ def add_new_solutions(
|
|
67 |
except Exception as e:
|
68 |
return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")
|
69 |
|
70 |
-
|
71 |
-
|
72 |
-
|
|
|
73 |
|
74 |
submission_id = f"{system_name}_{org}_{sys_type}_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"
|
75 |
|
@@ -86,6 +94,9 @@ def add_new_solutions(
|
|
86 |
|
87 |
ds = Dataset.from_pandas(submission_df).map(add_info)
|
88 |
|
|
|
|
|
|
|
89 |
ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
|
90 |
# print("Creating eval file")
|
91 |
# OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
@@ -108,5 +119,5 @@ def add_new_solutions(
|
|
108 |
# os.remove(out_path)
|
109 |
|
110 |
return styled_message(
|
111 |
-
"Your request has been submitted to the evaluation queue!\
|
112 |
)
|
|
|
3 |
from datetime import datetime, timezone
|
4 |
import time
|
5 |
|
6 |
+
from datasets import Dataset, DatasetDict
|
7 |
import pandas as pd
|
8 |
+
from pandas.api.types import is_integer_dtype, is_string_dtype
|
9 |
|
10 |
from src.datamodel.data import F1Data
|
11 |
from src.display.formatting import styled_error, styled_message, styled_warning
|
|
|
26 |
def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str | None:
|
27 |
logger.info("Validating DS size %d columns %s set %s", len(pd_ds), pd_ds.columns, set(pd_ds.columns))
|
28 |
expected_cols = ["problem_id", "solution"]
|
29 |
+
|
30 |
if set(pd_ds.columns) != set(expected_cols):
|
31 |
return f"Expected attributes: {expected_cols}, Got: {pd_ds.columns.tolist()}"
|
32 |
+
|
33 |
+
if not is_integer_dtype(pd_ds["problem_id"]):
|
34 |
+
return "problem_id must be str convertible to int"
|
35 |
+
|
36 |
if any(type(v) != str for v in pd_ds["solution"]):
|
37 |
return "solution must be of type str"
|
38 |
+
|
39 |
+
submitted_ids = set(pd_ds.problem_id.astype(str))
|
40 |
if submitted_ids != lbdb.code_problem_ids:
|
41 |
missing = lbdb.code_problem_ids - submitted_ids
|
42 |
unknown = submitted_ids - lbdb.code_problem_ids
|
43 |
return f"Mismatched problem IDs: {len(missing)} missing, {len(unknown)} unknown"
|
44 |
if len(pd_ds) > len(lbdb.code_problem_ids):
|
45 |
return "Duplicate problem IDs exist in uploaded file"
|
46 |
+
|
47 |
return None
|
48 |
|
49 |
|
|
|
53 |
org: str,
|
54 |
sys_type: str,
|
55 |
submission_path: str,
|
56 |
+
skip_validation: bool = False,
|
57 |
):
|
58 |
logger.info("ADD SUBMISSION! %s path %s", str((system_name, org, sys_type)), submission_path)
|
59 |
if not system_name:
|
|
|
74 |
except Exception as e:
|
75 |
return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")
|
76 |
|
77 |
+
if not skip_validation:
|
78 |
+
validation_error = validate_submission(lbdb, submission_df)
|
79 |
+
if validation_error:
|
80 |
+
return styled_error(validation_error)
|
81 |
|
82 |
submission_id = f"{system_name}_{org}_{sys_type}_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"
|
83 |
|
|
|
94 |
|
95 |
ds = Dataset.from_pandas(submission_df).map(add_info)
|
96 |
|
97 |
+
# dsdict = DatasetDict({submission_id: ds})
|
98 |
+
# dsdict.push_to_hub(SUBMISSIONS_REPO, private=True)
|
99 |
+
|
100 |
ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
|
101 |
# print("Creating eval file")
|
102 |
# OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
|
|
119 |
# os.remove(out_path)
|
120 |
|
121 |
return styled_message(
|
122 |
+
"Your request has been submitted to the evaluation queue!\nResults may take up to 24 hours to be processed and shown in the leaderboard."
|
123 |
)
|