Alvinn-aai commited on
Commit
b74992f
·
1 Parent(s): 80fb2c0

improve validation, testing wip

Browse files
Files changed (3) hide show
  1. app.py +8 -6
  2. src/envs.py +1 -1
  3. src/submission/submit.py +19 -8
app.py CHANGED
@@ -38,12 +38,15 @@ from src.submission.submit import add_new_solutions
38
 
39
  logger = get_logger(__name__)
40
 
 
 
 
41
 
42
  def restart_space():
43
  API.restart_space(repo_id=REPO_ID)
44
 
45
 
46
- lbdb = F1Data(cp_ds_name=CODE_PROBLEMS_REPO, sub_ds_name=SUBMISSIONS_REPO, res_ds_name=RESULTS_REPO)
47
 
48
  logger.info("Initialized LBDB")
49
 
@@ -165,7 +168,6 @@ with demo:
165
  interactive=True,
166
  )
167
 
168
-
169
  # with gr.Column():
170
  submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"])
171
  # precision = gr.Dropdown(
@@ -188,10 +190,10 @@ with demo:
188
  submit_button = gr.Button("Submit")
189
  submission_result = gr.Markdown()
190
 
191
- def add_solution_cbk(system_name,
192
- org,
193
- sys_type, submission_path):
194
- return add_new_solutions(lbdb, system_name, org, sys_type, submission_path)
195
 
196
  submit_button.click(
197
  add_solution_cbk,
 
38
 
39
  logger = get_logger(__name__)
40
 
41
+ SPLIT = "warmup" # TODO temp
42
+ SKIP_VALIDATION = True # TODO temp
43
+
44
 
45
  def restart_space():
46
  API.restart_space(repo_id=REPO_ID)
47
 
48
 
49
+ lbdb = F1Data(cp_ds_name=CODE_PROBLEMS_REPO, sub_ds_name=SUBMISSIONS_REPO, res_ds_name=RESULTS_REPO, split=SPLIT)
50
 
51
  logger.info("Initialized LBDB")
52
 
 
168
  interactive=True,
169
  )
170
 
 
171
  # with gr.Column():
172
  submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"])
173
  # precision = gr.Dropdown(
 
190
  submit_button = gr.Button("Submit")
191
  submission_result = gr.Markdown()
192
 
193
+ def add_solution_cbk(system_name, org, sys_type, submission_path):
194
+ return add_new_solutions(
195
+ lbdb, system_name, org, sys_type, submission_path, skip_validation=SKIP_VALIDATION
196
+ )
197
 
198
  submit_button.click(
199
  add_solution_cbk,
src/envs.py CHANGED
@@ -14,7 +14,7 @@ SUBMISSIONS_REPO = f"{OWNER}/dev-f1-leaderboard-submissions"
14
  RESULTS_REPO = f"{OWNER}/dev-f1-leaderboard-results"
15
 
16
  # If you setup a cache later, just change HF_HOME
17
- CACHE_PATH=os.getenv("HF_HOME", ".")
18
 
19
  print(f"{TOKEN=}")
20
  print(f"{REPO_ID=}")
 
14
  RESULTS_REPO = f"{OWNER}/dev-f1-leaderboard-results"
15
 
16
  # If you setup a cache later, just change HF_HOME
17
+ CACHE_PATH = os.getenv("HF_HOME", ".")
18
 
19
  print(f"{TOKEN=}")
20
  print(f"{REPO_ID=}")
src/submission/submit.py CHANGED
@@ -3,8 +3,9 @@ import os
3
  from datetime import datetime, timezone
4
  import time
5
 
6
- from datasets import Dataset
7
  import pandas as pd
 
8
 
9
  from src.datamodel.data import F1Data
10
  from src.display.formatting import styled_error, styled_message, styled_warning
@@ -25,19 +26,24 @@ logger = get_logger(__name__)
25
  def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str | None:
26
  logger.info("Validating DS size %d columns %s set %s", len(pd_ds), pd_ds.columns, set(pd_ds.columns))
27
  expected_cols = ["problem_id", "solution"]
 
28
  if set(pd_ds.columns) != set(expected_cols):
29
  return f"Expected attributes: {expected_cols}, Got: {pd_ds.columns.tolist()}"
30
- if any(type(v) != str for v in pd_ds["problem_id"]):
31
- return "problem_id must be of type str"
 
 
32
  if any(type(v) != str for v in pd_ds["solution"]):
33
  return "solution must be of type str"
34
- submitted_ids = set(pd_ds["problem_id"])
 
35
  if submitted_ids != lbdb.code_problem_ids:
36
  missing = lbdb.code_problem_ids - submitted_ids
37
  unknown = submitted_ids - lbdb.code_problem_ids
38
  return f"Mismatched problem IDs: {len(missing)} missing, {len(unknown)} unknown"
39
  if len(pd_ds) > len(lbdb.code_problem_ids):
40
  return "Duplicate problem IDs exist in uploaded file"
 
41
  return None
42
 
43
 
@@ -47,6 +53,7 @@ def add_new_solutions(
47
  org: str,
48
  sys_type: str,
49
  submission_path: str,
 
50
  ):
51
  logger.info("ADD SUBMISSION! %s path %s", str((system_name, org, sys_type)), submission_path)
52
  if not system_name:
@@ -67,9 +74,10 @@ def add_new_solutions(
67
  except Exception as e:
68
  return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")
69
 
70
- validation_error = validate_submission(lbdb, submission_df)
71
- if validation_error:
72
- return styled_error(validation_error)
 
73
 
74
  submission_id = f"{system_name}_{org}_{sys_type}_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"
75
 
@@ -86,6 +94,9 @@ def add_new_solutions(
86
 
87
  ds = Dataset.from_pandas(submission_df).map(add_info)
88
 
 
 
 
89
  ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
90
  # print("Creating eval file")
91
  # OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
@@ -108,5 +119,5 @@ def add_new_solutions(
108
  # os.remove(out_path)
109
 
110
  return styled_message(
111
- "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
112
  )
 
3
  from datetime import datetime, timezone
4
  import time
5
 
6
+ from datasets import Dataset, DatasetDict
7
  import pandas as pd
8
+ from pandas.api.types import is_integer_dtype, is_string_dtype
9
 
10
  from src.datamodel.data import F1Data
11
  from src.display.formatting import styled_error, styled_message, styled_warning
 
26
  def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str | None:
27
  logger.info("Validating DS size %d columns %s set %s", len(pd_ds), pd_ds.columns, set(pd_ds.columns))
28
  expected_cols = ["problem_id", "solution"]
29
+
30
  if set(pd_ds.columns) != set(expected_cols):
31
  return f"Expected attributes: {expected_cols}, Got: {pd_ds.columns.tolist()}"
32
+
33
+ if not is_integer_dtype(pd_ds["problem_id"]):
34
+ return "problem_id must be str convertible to int"
35
+
36
  if any(type(v) != str for v in pd_ds["solution"]):
37
  return "solution must be of type str"
38
+
39
+ submitted_ids = set(pd_ds.problem_id.astype(str))
40
  if submitted_ids != lbdb.code_problem_ids:
41
  missing = lbdb.code_problem_ids - submitted_ids
42
  unknown = submitted_ids - lbdb.code_problem_ids
43
  return f"Mismatched problem IDs: {len(missing)} missing, {len(unknown)} unknown"
44
  if len(pd_ds) > len(lbdb.code_problem_ids):
45
  return "Duplicate problem IDs exist in uploaded file"
46
+
47
  return None
48
 
49
 
 
53
  org: str,
54
  sys_type: str,
55
  submission_path: str,
56
+ skip_validation: bool = False,
57
  ):
58
  logger.info("ADD SUBMISSION! %s path %s", str((system_name, org, sys_type)), submission_path)
59
  if not system_name:
 
74
  except Exception as e:
75
  return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")
76
 
77
+ if not skip_validation:
78
+ validation_error = validate_submission(lbdb, submission_df)
79
+ if validation_error:
80
+ return styled_error(validation_error)
81
 
82
  submission_id = f"{system_name}_{org}_{sys_type}_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"
83
 
 
94
 
95
  ds = Dataset.from_pandas(submission_df).map(add_info)
96
 
97
+ # dsdict = DatasetDict({submission_id: ds})
98
+ # dsdict.push_to_hub(SUBMISSIONS_REPO, private=True)
99
+
100
  ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
101
  # print("Creating eval file")
102
  # OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
 
119
  # os.remove(out_path)
120
 
121
  return styled_message(
122
+ "Your request has been submitted to the evaluation queue!\nResults may take up to 24 hours to be processed and shown in the leaderboard."
123
  )