Spaces:

double-ai
/

FormulaOne-Leaderboard

Running on CPU Upgrade

App Files Files Community

Alvinn-aai commited on 27 days ago

Commit

b74992f

1 Parent(s): 80fb2c0

improve validation, testing wip

Browse files

Files changed (3) hide show

app.py +8 -6
src/envs.py +1 -1
src/submission/submit.py +19 -8

app.py CHANGED Viewed

@@ -38,12 +38,15 @@ from src.submission.submit import add_new_solutions
 logger = get_logger(__name__)
 def restart_space():
     API.restart_space(repo_id=REPO_ID)
-lbdb = F1Data(cp_ds_name=CODE_PROBLEMS_REPO, sub_ds_name=SUBMISSIONS_REPO, res_ds_name=RESULTS_REPO)
 logger.info("Initialized LBDB")
@@ -165,7 +168,6 @@ with demo:
                         interactive=True,
                     )
                     # with gr.Column():
                     submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"])
                     # precision = gr.Dropdown(
@@ -188,10 +190,10 @@ with demo:
             submit_button = gr.Button("Submit")
             submission_result = gr.Markdown()
-            def add_solution_cbk(system_name,
-                    org,
-                    sys_type, submission_path):
-                return add_new_solutions(lbdb, system_name, org, sys_type, submission_path)
             submit_button.click(
                 add_solution_cbk,

 logger = get_logger(__name__)
+SPLIT = "warmup"  # TODO temp
+SKIP_VALIDATION = True  # TODO temp
 def restart_space():
     API.restart_space(repo_id=REPO_ID)
+lbdb = F1Data(cp_ds_name=CODE_PROBLEMS_REPO, sub_ds_name=SUBMISSIONS_REPO, res_ds_name=RESULTS_REPO, split=SPLIT)
 logger.info("Initialized LBDB")
                         interactive=True,
                     )
                     # with gr.Column():
                     submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"])
                     # precision = gr.Dropdown(
             submit_button = gr.Button("Submit")
             submission_result = gr.Markdown()
+            def add_solution_cbk(system_name, org, sys_type, submission_path):
+                return add_new_solutions(
+                    lbdb, system_name, org, sys_type, submission_path, skip_validation=SKIP_VALIDATION
+                )
             submit_button.click(
                 add_solution_cbk,

src/envs.py CHANGED Viewed

@@ -14,7 +14,7 @@ SUBMISSIONS_REPO = f"{OWNER}/dev-f1-leaderboard-submissions"
 RESULTS_REPO = f"{OWNER}/dev-f1-leaderboard-results"
 # If you setup a cache later, just change HF_HOME
-CACHE_PATH=os.getenv("HF_HOME", ".")
 print(f"{TOKEN=}")
 print(f"{REPO_ID=}")

 RESULTS_REPO = f"{OWNER}/dev-f1-leaderboard-results"
 # If you setup a cache later, just change HF_HOME
+CACHE_PATH = os.getenv("HF_HOME", ".")
 print(f"{TOKEN=}")
 print(f"{REPO_ID=}")

src/submission/submit.py CHANGED Viewed

@@ -3,8 +3,9 @@ import os
 from datetime import datetime, timezone
 import time
-from datasets import Dataset
 import pandas as pd
 from src.datamodel.data import F1Data
 from src.display.formatting import styled_error, styled_message, styled_warning
@@ -25,19 +26,24 @@ logger = get_logger(__name__)
 def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str | None:
     logger.info("Validating DS size %d columns %s set %s", len(pd_ds), pd_ds.columns, set(pd_ds.columns))
     expected_cols = ["problem_id", "solution"]
     if set(pd_ds.columns) != set(expected_cols):
         return f"Expected attributes: {expected_cols}, Got: {pd_ds.columns.tolist()}"
-    if any(type(v) != str for v in pd_ds["problem_id"]):
-        return "problem_id must be of type str"
     if any(type(v) != str for v in pd_ds["solution"]):
         return "solution must be of type str"
-    submitted_ids = set(pd_ds["problem_id"])
     if submitted_ids != lbdb.code_problem_ids:
         missing = lbdb.code_problem_ids - submitted_ids
         unknown = submitted_ids - lbdb.code_problem_ids
         return f"Mismatched problem IDs: {len(missing)} missing, {len(unknown)} unknown"
     if len(pd_ds) > len(lbdb.code_problem_ids):
         return "Duplicate problem IDs exist in uploaded file"
     return None
@@ -47,6 +53,7 @@ def add_new_solutions(
     org: str,
     sys_type: str,
     submission_path: str,
 ):
     logger.info("ADD SUBMISSION! %s path %s", str((system_name, org, sys_type)), submission_path)
     if not system_name:
@@ -67,9 +74,10 @@ def add_new_solutions(
     except Exception as e:
         return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")
-    validation_error = validate_submission(lbdb, submission_df)
-    if validation_error:
-        return styled_error(validation_error)
     submission_id = f"{system_name}_{org}_{sys_type}_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"
@@ -86,6 +94,9 @@ def add_new_solutions(
     ds = Dataset.from_pandas(submission_df).map(add_info)
     ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
     # print("Creating eval file")
     # OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
@@ -108,5 +119,5 @@ def add_new_solutions(
     # os.remove(out_path)
     return styled_message(
-        "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
     )

 from datetime import datetime, timezone
 import time
+from datasets import Dataset, DatasetDict
 import pandas as pd
+from pandas.api.types import is_integer_dtype, is_string_dtype
 from src.datamodel.data import F1Data
 from src.display.formatting import styled_error, styled_message, styled_warning
 def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str | None:
     logger.info("Validating DS size %d columns %s set %s", len(pd_ds), pd_ds.columns, set(pd_ds.columns))
     expected_cols = ["problem_id", "solution"]
     if set(pd_ds.columns) != set(expected_cols):
         return f"Expected attributes: {expected_cols}, Got: {pd_ds.columns.tolist()}"
+    if not is_integer_dtype(pd_ds["problem_id"]):
+        return "problem_id must be str convertible to int"
     if any(type(v) != str for v in pd_ds["solution"]):
         return "solution must be of type str"
+    submitted_ids = set(pd_ds.problem_id.astype(str))
     if submitted_ids != lbdb.code_problem_ids:
         missing = lbdb.code_problem_ids - submitted_ids
         unknown = submitted_ids - lbdb.code_problem_ids
         return f"Mismatched problem IDs: {len(missing)} missing, {len(unknown)} unknown"
     if len(pd_ds) > len(lbdb.code_problem_ids):
         return "Duplicate problem IDs exist in uploaded file"
     return None
     org: str,
     sys_type: str,
     submission_path: str,
+    skip_validation: bool = False,
 ):
     logger.info("ADD SUBMISSION! %s path %s", str((system_name, org, sys_type)), submission_path)
     if not system_name:
     except Exception as e:
         return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")
+    if not skip_validation:
+        validation_error = validate_submission(lbdb, submission_df)
+        if validation_error:
+            return styled_error(validation_error)
     submission_id = f"{system_name}_{org}_{sys_type}_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"
     ds = Dataset.from_pandas(submission_df).map(add_info)
+    # dsdict = DatasetDict({submission_id: ds})
+    # dsdict.push_to_hub(SUBMISSIONS_REPO, private=True)
     ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
     # print("Creating eval file")
     # OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
     # os.remove(out_path)
     return styled_message(
+        "Your request has been submitted to the evaluation queue!\nResults may take up to 24 hours to be processed and shown in the leaderboard."
     )