tomerz-aai commited on
Commit
a3d4fda
·
1 Parent(s): 37caa62
Files changed (1) hide show
  1. src/submission/submit.py +22 -9
src/submission/submit.py CHANGED
@@ -3,6 +3,7 @@ import os
3
  from datetime import datetime, timezone
4
  import time
5
 
 
6
  import pandas as pd
7
 
8
  from src.datamodel.data import F1Data
@@ -18,6 +19,22 @@ from src.logger import get_logger
18
 
19
  logger = get_logger(__name__)
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def add_new_solutions(
22
  lbdb: F1Data,
23
  submitter: str,
@@ -31,17 +48,13 @@ def add_new_solutions(
31
  return styled_error("Please upload JSONL solutions file")
32
 
33
  try:
34
- ds = pd.read_json(submission_path, lines=True)
35
  except Exception as e:
36
  return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")
37
 
38
- submitted_formulas = set(ds["formula_name"])
39
- if submitted_formulas != lbdb.code_problem_formulas:
40
- missing = lbdb.code_problem_formulas - submitted_formulas
41
- unknown = submitted_formulas - lbdb.code_problem_formulas
42
- return styled_error(f"Mismatched formula names: missing {len(missing)} unknown {len(unknown)}")
43
- if len(ds) > len(lbdb.code_problem_formulas):
44
- return styled_error("Duplicate formula solutions exist in uploaded file")
45
 
46
  submission_id = datetime.now().strftime("%Y%m%d%H%M%S")
47
 
@@ -54,7 +67,7 @@ def add_new_solutions(
54
  row["submission_id"] = submission_id
55
  row["submission_ts"] = submission_ts
56
 
57
- ds = ds.map(add_info)
58
 
59
  ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
60
  # print("Creating eval file")
 
3
  from datetime import datetime, timezone
4
  import time
5
 
6
+ from datasets import Dataset
7
  import pandas as pd
8
 
9
  from src.datamodel.data import F1Data
 
19
 
20
  logger = get_logger(__name__)
21
 
22
+ def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str | None:
23
+ if set(pd_ds.columns) != set(["formula_name", "solution"]):
24
+ return "Bad format of submission"
25
+ if any(type(v) != str for v in pd_ds["formula_name"]):
26
+ return "Not all formula_name values are of type str"
27
+ if any(type(v) != str for v in pd_ds["solution"]):
28
+ return "Not all solution values are of type str"
29
+ submitted_formulas = set(pd_ds["formula_name"])
30
+ if submitted_formulas != lbdb.code_problem_formulas:
31
+ missing = lbdb.code_problem_formulas - submitted_formulas
32
+ unknown = submitted_formulas - lbdb.code_problem_formulas
33
+ return f"Mismatched formula names: missing {len(missing)} unknown {len(unknown)}"
34
+ if len(pd_ds) > len(lbdb.code_problem_formulas):
35
+ return "Duplicate formula solutions exist in uploaded file"
36
+ return None
37
+
38
  def add_new_solutions(
39
  lbdb: F1Data,
40
  submitter: str,
 
48
  return styled_error("Please upload JSONL solutions file")
49
 
50
  try:
51
+ pd_ds = pd.read_json(submission_path, lines=True)
52
  except Exception as e:
53
  return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")
54
 
55
+ validation_error = validate_submission(lbdb, pd_ds)
56
+ if validation_error:
57
+ return styled_error(validation_error)
 
 
 
 
58
 
59
  submission_id = datetime.now().strftime("%Y%m%d%H%M%S")
60
 
 
67
  row["submission_id"] = submission_id
68
  row["submission_ts"] = submission_ts
69
 
70
+ ds = Dataset.from_pandas(pd_ds).map(add_info)
71
 
72
  ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
73
  # print("Creating eval file")