Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
8cfcd49
1
Parent(s):
61885ca
data upload script, support both splits
Browse files- app.py +2 -2
- scripts/upload_f1_dataset.py +46 -0
- src/datamodel/data.py +25 -11
- src/submission/submit.py +1 -1
app.py
CHANGED
@@ -158,10 +158,10 @@ with demo:
|
|
158 |
org_textbox = gr.Textbox(label=AutoEvalColumn.organization.name)
|
159 |
# revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
|
160 |
sys_type_dropdown = gr.Dropdown(
|
161 |
-
choices=[t.to_str("
|
162 |
label=AutoEvalColumn.system_type.name,
|
163 |
multiselect=False,
|
164 |
-
value=ModelType.LLM.to_str("
|
165 |
interactive=True,
|
166 |
)
|
167 |
|
|
|
158 |
org_textbox = gr.Textbox(label=AutoEvalColumn.organization.name)
|
159 |
# revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
|
160 |
sys_type_dropdown = gr.Dropdown(
|
161 |
+
choices=[t.to_str(" ") for t in ModelType],
|
162 |
label=AutoEvalColumn.system_type.name,
|
163 |
multiselect=False,
|
164 |
+
value=ModelType.LLM.to_str(" "),
|
165 |
interactive=True,
|
166 |
)
|
167 |
|
scripts/upload_f1_dataset.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import fnmatch
|
3 |
+
import json
|
4 |
+
import os
|
5 |
+
|
6 |
+
from datasets import Dataset
|
7 |
+
|
8 |
+
from src.envs import CODE_PROBLEMS_REPO
|
9 |
+
from src.logger import get_logger
|
10 |
+
|
11 |
+
logger = get_logger(__name__)
|
12 |
+
|
13 |
+
|
14 |
+
def get_args() -> argparse.Namespace:
|
15 |
+
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
16 |
+
parser.add_argument("--input_dir", type=str, help="Dir with .json files", required=True)
|
17 |
+
parser.add_argument("--dataset_name", type=str, default=f"{CODE_PROBLEMS_REPO}")
|
18 |
+
parser.add_argument("--split", type=str, choices=["hard", "warmup"], default="hard")
|
19 |
+
return parser.parse_args()
|
20 |
+
|
21 |
+
|
22 |
+
def main(args: argparse.Namespace) -> None:
|
23 |
+
logger.info("Reading problem files from %s", args.input_dir)
|
24 |
+
input_files = fnmatch.filter(os.listdir(args.input_dir), "*.json")
|
25 |
+
if len(input_files) == 0:
|
26 |
+
raise ValueError(f"No .json files in input dir {args.input_dir}")
|
27 |
+
logger.info("Found %d code problems in %s", len(input_files), args.input_dir)
|
28 |
+
|
29 |
+
def ds_generator():
|
30 |
+
for fname in sorted(input_files):
|
31 |
+
formula_name = os.path.splitext(fname)[0]
|
32 |
+
cp_path = os.path.join(args.input_dir, fname)
|
33 |
+
with open(cp_path, "r", encoding="utf-8") as f:
|
34 |
+
code_problem = json.load(f)
|
35 |
+
logger.info("Read code problem for formula %s from %s", formula_name, cp_path)
|
36 |
+
yield dict(id=code_problem["id"], code_problem=code_problem)
|
37 |
+
|
38 |
+
ds = Dataset.from_generator(ds_generator)
|
39 |
+
logger.info("Created dataset")
|
40 |
+
|
41 |
+
ds.push_to_hub(args.dataset_name, split=args.split, private=True)
|
42 |
+
logger.info("Saved dataset to repo %s", args.dataset_name)
|
43 |
+
|
44 |
+
|
45 |
+
if __name__ == "__main__":
|
46 |
+
main(get_args())
|
src/datamodel/data.py
CHANGED
@@ -3,26 +3,40 @@ import time
|
|
3 |
|
4 |
from datasets import load_dataset
|
5 |
|
6 |
-
from src.envs import TOKEN
|
7 |
from src.logger import get_logger
|
8 |
|
9 |
logger = get_logger(__name__)
|
10 |
|
|
|
11 |
class F1Data:
|
12 |
-
def __init__(self, cp_ds_name: str, sub_ds_name: str, res_ds_name: str):
|
13 |
self.cp_dataset_name = cp_ds_name
|
14 |
self.submissions_dataset_name = sub_ds_name
|
15 |
self.results_dataset_name = res_ds_name
|
16 |
-
self.
|
17 |
-
|
18 |
-
|
19 |
-
def code_problem_formulas(self) -> set[str]:
|
20 |
-
return set(self.code_problems.keys())
|
21 |
|
22 |
-
def
|
23 |
logger.info("Initialize F1Data TOMEN='%s'", TOKEN)
|
24 |
start_time = time.monotonic()
|
25 |
-
cp_ds = load_dataset(self.cp_dataset_name, split=
|
26 |
-
logger.info(
|
27 |
-
|
|
|
|
|
|
|
|
|
28 |
logger.info("Code problems info: %s", self.code_problems)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
from datasets import load_dataset
|
5 |
|
6 |
+
from src.envs import TOKEN, CODE_PROBLEMS_REPO, RESULTS_REPO, SUBMISSIONS_REPO
|
7 |
from src.logger import get_logger
|
8 |
|
9 |
logger = get_logger(__name__)
|
10 |
|
11 |
+
|
12 |
class F1Data:
|
13 |
+
def __init__(self, cp_ds_name: str, sub_ds_name: str, res_ds_name: str, split: str = "hard"):
|
14 |
self.cp_dataset_name = cp_ds_name
|
15 |
self.submissions_dataset_name = sub_ds_name
|
16 |
self.results_dataset_name = res_ds_name
|
17 |
+
self.split = split
|
18 |
+
self.code_problems = None
|
19 |
+
self._initialize()
|
|
|
|
|
20 |
|
21 |
+
def _initialize(self):
|
22 |
logger.info("Initialize F1Data TOMEN='%s'", TOKEN)
|
23 |
start_time = time.monotonic()
|
24 |
+
cp_ds = load_dataset(self.cp_dataset_name, split=self.split, token=TOKEN)
|
25 |
+
logger.info(
|
26 |
+
"Loaded code-problems dataset from %s in %f sec",
|
27 |
+
self.cp_dataset_name,
|
28 |
+
time.monotonic() - start_time,
|
29 |
+
)
|
30 |
+
self.code_problems: dict[str, str] = {r["id"]: r["code_problem"]["problem_description"] for r in cp_ds}
|
31 |
logger.info("Code problems info: %s", self.code_problems)
|
32 |
+
|
33 |
+
@functools.cached_property
|
34 |
+
def code_problem_formulas(self) -> set[str]:
|
35 |
+
return set(self.code_problems.keys())
|
36 |
+
|
37 |
+
|
38 |
+
if __name__ == "__main__":
|
39 |
+
split = "hard"
|
40 |
+
f1_data = F1Data(cp_ds_name=CODE_PROBLEMS_REPO, sub_ds_name=SUBMISSIONS_REPO, res_ds_name=RESULTS_REPO, split=split)
|
41 |
+
|
42 |
+
print(f"Found {len(f1_data.code_problem_formulas)} code problems in {split} split of {f1_data.cp_dataset_name}")
|
src/submission/submit.py
CHANGED
@@ -33,7 +33,7 @@ def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str | None:
|
|
33 |
if submitted_formulas != lbdb.code_problem_formulas:
|
34 |
missing = lbdb.code_problem_formulas - submitted_formulas
|
35 |
unknown = submitted_formulas - lbdb.code_problem_formulas
|
36 |
-
return f"Mismatched formula names:
|
37 |
if len(pd_ds) > len(lbdb.code_problem_formulas):
|
38 |
return "Duplicate formula solutions exist in uploaded file"
|
39 |
return None
|
|
|
33 |
if submitted_formulas != lbdb.code_problem_formulas:
|
34 |
missing = lbdb.code_problem_formulas - submitted_formulas
|
35 |
unknown = submitted_formulas - lbdb.code_problem_formulas
|
36 |
+
return f"Mismatched formula names: {len(missing)} missing, {len(unknown)} unknown"
|
37 |
if len(pd_ds) > len(lbdb.code_problem_formulas):
|
38 |
return "Duplicate formula solutions exist in uploaded file"
|
39 |
return None
|