Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
a3d4fda
1
Parent(s):
37caa62
wip
Browse files- src/submission/submit.py +22 -9
src/submission/submit.py
CHANGED
@@ -3,6 +3,7 @@ import os
|
|
3 |
from datetime import datetime, timezone
|
4 |
import time
|
5 |
|
|
|
6 |
import pandas as pd
|
7 |
|
8 |
from src.datamodel.data import F1Data
|
@@ -18,6 +19,22 @@ from src.logger import get_logger
|
|
18 |
|
19 |
logger = get_logger(__name__)
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
def add_new_solutions(
|
22 |
lbdb: F1Data,
|
23 |
submitter: str,
|
@@ -31,17 +48,13 @@ def add_new_solutions(
|
|
31 |
return styled_error("Please upload JSONL solutions file")
|
32 |
|
33 |
try:
|
34 |
-
|
35 |
except Exception as e:
|
36 |
return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")
|
37 |
|
38 |
-
|
39 |
-
if
|
40 |
-
|
41 |
-
unknown = submitted_formulas - lbdb.code_problem_formulas
|
42 |
-
return styled_error(f"Mismatched formula names: missing {len(missing)} unknown {len(unknown)}")
|
43 |
-
if len(ds) > len(lbdb.code_problem_formulas):
|
44 |
-
return styled_error("Duplicate formula solutions exist in uploaded file")
|
45 |
|
46 |
submission_id = datetime.now().strftime("%Y%m%d%H%M%S")
|
47 |
|
@@ -54,7 +67,7 @@ def add_new_solutions(
|
|
54 |
row["submission_id"] = submission_id
|
55 |
row["submission_ts"] = submission_ts
|
56 |
|
57 |
-
ds =
|
58 |
|
59 |
ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
|
60 |
# print("Creating eval file")
|
|
|
3 |
from datetime import datetime, timezone
|
4 |
import time
|
5 |
|
6 |
+
from datasets import Dataset
|
7 |
import pandas as pd
|
8 |
|
9 |
from src.datamodel.data import F1Data
|
|
|
19 |
|
20 |
logger = get_logger(__name__)
|
21 |
|
22 |
+
def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str | None:
|
23 |
+
if set(pd_ds.columns) != set(["formula_name", "solution"]):
|
24 |
+
return "Bad format of submission"
|
25 |
+
if any(type(v) != str for v in pd_ds["formula_name"]):
|
26 |
+
return "Not all formula_name values are of type str"
|
27 |
+
if any(type(v) != str for v in pd_ds["solution"]):
|
28 |
+
return "Not all solution values are of type str"
|
29 |
+
submitted_formulas = set(pd_ds["formula_name"])
|
30 |
+
if submitted_formulas != lbdb.code_problem_formulas:
|
31 |
+
missing = lbdb.code_problem_formulas - submitted_formulas
|
32 |
+
unknown = submitted_formulas - lbdb.code_problem_formulas
|
33 |
+
return f"Mismatched formula names: missing {len(missing)} unknown {len(unknown)}"
|
34 |
+
if len(pd_ds) > len(lbdb.code_problem_formulas):
|
35 |
+
return "Duplicate formula solutions exist in uploaded file"
|
36 |
+
return None
|
37 |
+
|
38 |
def add_new_solutions(
|
39 |
lbdb: F1Data,
|
40 |
submitter: str,
|
|
|
48 |
return styled_error("Please upload JSONL solutions file")
|
49 |
|
50 |
try:
|
51 |
+
pd_ds = pd.read_json(submission_path, lines=True)
|
52 |
except Exception as e:
|
53 |
return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")
|
54 |
|
55 |
+
validation_error = validate_submission(lbdb, pd_ds)
|
56 |
+
if validation_error:
|
57 |
+
return styled_error(validation_error)
|
|
|
|
|
|
|
|
|
58 |
|
59 |
submission_id = datetime.now().strftime("%Y%m%d%H%M%S")
|
60 |
|
|
|
67 |
row["submission_id"] = submission_id
|
68 |
row["submission_ts"] = submission_ts
|
69 |
|
70 |
+
ds = Dataset.from_pandas(pd_ds).map(add_info)
|
71 |
|
72 |
ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
|
73 |
# print("Creating eval file")
|