tomerz-aai commited on
Commit
c887522
·
1 Parent(s): a4be848

initial submit

Browse files
app.py CHANGED
@@ -12,6 +12,8 @@ from src.about import (
12
  LLM_BENCHMARKS_TEXT,
13
  TITLE,
14
  )
 
 
15
  from src.display.css_html_js import custom_css
16
  from src.display.utils import (
17
  BENCHMARK_COLS,
@@ -24,69 +26,53 @@ from src.display.utils import (
24
  WeightType,
25
  Precision
26
  )
27
- from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
28
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
29
- from src.submission.submit import add_new_eval
30
 
31
 
32
  def restart_space():
33
  API.restart_space(repo_id=REPO_ID)
34
 
35
- ### Space initialisation
36
- try:
37
- print(EVAL_REQUESTS_PATH)
38
- snapshot_download(
39
- repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
40
- )
41
- except Exception:
42
- restart_space()
43
- try:
44
- print(EVAL_RESULTS_PATH)
45
- snapshot_download(
46
- repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
47
- )
48
- except Exception:
49
- restart_space()
50
-
51
-
52
- LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
53
-
54
- (
55
- finished_eval_queue_df,
56
- running_eval_queue_df,
57
- pending_eval_queue_df,
58
- ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
59
-
60
- def init_leaderboard(dataframe):
61
- if dataframe is None or dataframe.empty:
62
- raise ValueError("Leaderboard DataFrame is empty or None.")
63
- return Leaderboard(
64
- value=dataframe,
65
- datatype=[c.type for c in fields(AutoEvalColumn)],
66
- select_columns=SelectColumns(
67
- default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
68
- cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
69
- label="Select Columns to Display:",
70
- ),
71
- search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
72
- hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
73
- filter_columns=[
74
- ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
75
- ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
76
- ColumnFilter(
77
- AutoEvalColumn.params.name,
78
- type="slider",
79
- min=0.01,
80
- max=150,
81
- label="Select the number of parameters (B)",
82
- ),
83
- ColumnFilter(
84
- AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
85
- ),
86
- ],
87
- bool_checkboxgroup_label="Hide models",
88
- interactive=False,
89
- )
90
 
91
 
92
  demo = gr.Blocks(css=custom_css)
@@ -95,8 +81,8 @@ with demo:
95
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
96
 
97
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
98
- with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
99
- leaderboard = init_leaderboard(LEADERBOARD_DF)
100
 
101
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
102
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
@@ -106,84 +92,82 @@ with demo:
106
  with gr.Row():
107
  gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
108
 
109
- with gr.Column():
110
- with gr.Accordion(
111
- f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
112
- open=False,
113
- ):
114
- with gr.Row():
115
- finished_eval_table = gr.components.Dataframe(
116
- value=finished_eval_queue_df,
117
- headers=EVAL_COLS,
118
- datatype=EVAL_TYPES,
119
- row_count=5,
120
- )
121
- with gr.Accordion(
122
- f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
123
- open=False,
124
- ):
125
- with gr.Row():
126
- running_eval_table = gr.components.Dataframe(
127
- value=running_eval_queue_df,
128
- headers=EVAL_COLS,
129
- datatype=EVAL_TYPES,
130
- row_count=5,
131
- )
132
-
133
- with gr.Accordion(
134
- f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
135
- open=False,
136
- ):
137
- with gr.Row():
138
- pending_eval_table = gr.components.Dataframe(
139
- value=pending_eval_queue_df,
140
- headers=EVAL_COLS,
141
- datatype=EVAL_TYPES,
142
- row_count=5,
143
- )
144
  with gr.Row():
145
- gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
146
 
147
  with gr.Row():
148
  with gr.Column():
149
- model_name_textbox = gr.Textbox(label="Model name")
150
- revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
151
- model_type = gr.Dropdown(
152
- choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
153
- label="Model type",
154
- multiselect=False,
155
- value=None,
156
- interactive=True,
157
- )
158
 
159
  with gr.Column():
160
- precision = gr.Dropdown(
161
- choices=[i.value.name for i in Precision if i != Precision.Unknown],
162
- label="Precision",
163
- multiselect=False,
164
- value="float16",
165
- interactive=True,
166
- )
167
- weight_type = gr.Dropdown(
168
- choices=[i.value.name for i in WeightType],
169
- label="Weights type",
170
- multiselect=False,
171
- value="Original",
172
- interactive=True,
173
- )
174
- base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
 
175
 
176
  submit_button = gr.Button("Submit Eval")
177
  submission_result = gr.Markdown()
178
  submit_button.click(
179
- add_new_eval,
180
  [
181
- model_name_textbox,
182
- base_model_name_textbox,
183
- revision_name_textbox,
184
- precision,
185
- weight_type,
186
- model_type,
187
  ],
188
  submission_result,
189
  )
@@ -201,4 +185,4 @@ with demo:
201
  scheduler = BackgroundScheduler()
202
  scheduler.add_job(restart_space, "interval", seconds=1800)
203
  scheduler.start()
204
- demo.queue(default_concurrency_limit=40).launch()
 
12
  LLM_BENCHMARKS_TEXT,
13
  TITLE,
14
  )
15
+ from src.datamodel.data import F1Data
16
+
17
  from src.display.css_html_js import custom_css
18
  from src.display.utils import (
19
  BENCHMARK_COLS,
 
26
  WeightType,
27
  Precision
28
  )
29
+ from src.envs import API, REPO_ID, TOKEN, CODE_PROBLEMS_REPO, SUBMISSIONS_REPO, RESULTS_REPO
30
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
31
+ from src.submission.submit import add_new_solutions
32
 
33
 
34
  def restart_space():
35
  API.restart_space(repo_id=REPO_ID)
36
 
37
+ lbdb = F1Data(cp_ds_name=CODE_PROBLEMS_REPO, sub_ds_name=SUBMISSIONS_REPO, res_ds_name=RESULTS_REPO)
38
+
39
+
40
+ # (
41
+ # finished_eval_queue_df,
42
+ # running_eval_queue_df,
43
+ # pending_eval_queue_df,
44
+ # ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
45
+
46
+ # def init_leaderboard(dataframe):
47
+ # if dataframe is None or dataframe.empty:
48
+ # raise ValueError("Leaderboard DataFrame is empty or None.")
49
+ # return Leaderboard(
50
+ # value=dataframe,
51
+ # datatype=[c.type for c in fields(AutoEvalColumn)],
52
+ # select_columns=SelectColumns(
53
+ # default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
54
+ # cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
55
+ # label="Select Columns to Display:",
56
+ # ),
57
+ # search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
58
+ # hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
59
+ # filter_columns=[
60
+ # ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
61
+ # ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
62
+ # ColumnFilter(
63
+ # AutoEvalColumn.params.name,
64
+ # type="slider",
65
+ # min=0.01,
66
+ # max=150,
67
+ # label="Select the number of parameters (B)",
68
+ # ),
69
+ # ColumnFilter(
70
+ # AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
71
+ # ),
72
+ # ],
73
+ # bool_checkboxgroup_label="Hide models",
74
+ # interactive=False,
75
+ # )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
 
78
  demo = gr.Blocks(css=custom_css)
 
81
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
82
 
83
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
84
+ # with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
85
+ # leaderboard = init_leaderboard(LEADERBOARD_DF)
86
 
87
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
88
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
 
92
  with gr.Row():
93
  gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
94
 
95
+ # with gr.Column():
96
+ # with gr.Accordion(
97
+ # f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
98
+ # open=False,
99
+ # ):
100
+ # with gr.Row():
101
+ # finished_eval_table = gr.components.Dataframe(
102
+ # value=finished_eval_queue_df,
103
+ # headers=EVAL_COLS,
104
+ # datatype=EVAL_TYPES,
105
+ # row_count=5,
106
+ # )
107
+ # with gr.Accordion(
108
+ # f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
109
+ # open=False,
110
+ # ):
111
+ # with gr.Row():
112
+ # running_eval_table = gr.components.Dataframe(
113
+ # value=running_eval_queue_df,
114
+ # headers=EVAL_COLS,
115
+ # datatype=EVAL_TYPES,
116
+ # row_count=5,
117
+ # )
118
+
119
+ # with gr.Accordion(
120
+ # f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
121
+ # open=False,
122
+ # ):
123
+ # with gr.Row():
124
+ # pending_eval_table = gr.components.Dataframe(
125
+ # value=pending_eval_queue_df,
126
+ # headers=EVAL_COLS,
127
+ # datatype=EVAL_TYPES,
128
+ # row_count=5,
129
+ # )
130
  with gr.Row():
131
+ gr.Markdown("# ✉️✨ Submit your sulutions here!", elem_classes="markdown-text")
132
 
133
  with gr.Row():
134
  with gr.Column():
135
+ submitter_textbox = gr.Textbox(label="Submitter")
136
+ # revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
137
+ # model_type = gr.Dropdown(
138
+ # choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
139
+ # label="Model type",
140
+ # multiselect=False,
141
+ # value=None,
142
+ # interactive=True,
143
+ # )
144
 
145
  with gr.Column():
146
+ submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"])
147
+ # precision = gr.Dropdown(
148
+ # choices=[i.value.name for i in Precision if i != Precision.Unknown],
149
+ # label="Precision",
150
+ # multiselect=False,
151
+ # value="float16",
152
+ # interactive=True,
153
+ # )
154
+ # weight_type = gr.Dropdown(
155
+ # choices=[i.value.name for i in WeightType],
156
+ # label="Weights type",
157
+ # multiselect=False,
158
+ # value="Original",
159
+ # interactive=True,
160
+ # )
161
+ # base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
162
 
163
  submit_button = gr.Button("Submit Eval")
164
  submission_result = gr.Markdown()
165
  submit_button.click(
166
+ add_new_solutions,
167
  [
168
+ lbdb,
169
+ submitter_textbox,
170
+ submission_file,
 
 
 
171
  ],
172
  submission_result,
173
  )
 
185
  scheduler = BackgroundScheduler()
186
  scheduler.add_job(restart_space, "interval", seconds=1800)
187
  scheduler.start()
188
+ demo.queue(default_concurrency_limit=40).launch()
src/about.py CHANGED
@@ -21,7 +21,7 @@ NUM_FEWSHOT = 0 # Change with your few shot
21
 
22
 
23
  # Your leaderboard name
24
- TITLE = """<h1 align="center" id="space-title">Demo leaderboard</h1>"""
25
 
26
  # What does your leaderboard evaluate?
27
  INTRODUCTION_TEXT = """
 
21
 
22
 
23
  # Your leaderboard name
24
+ TITLE = """<h1 align="center" id="space-title">AAI FormulaOne Leaderboard</h1>"""
25
 
26
  # What does your leaderboard evaluate?
27
  INTRODUCTION_TEXT = """
src/datamodel/__init__.py ADDED
File without changes
src/datamodel/data.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import functools
2
+
3
+ from datasets import load_dataset
4
+
5
+ class F1Data:
6
+ def __init__(self, cp_ds_name: str, sub_ds_name: str, res_ds_name: str):
7
+ self.cp_dataset_name = cp_ds_name
8
+ self.submissions_dataset_name = sub_ds_name
9
+ self.results_dataset_name = res_ds_name
10
+ self.initialize()
11
+
12
+ @functools.cached_property
13
+ def code_problem_formulas(self) -> set[str]:
14
+ return set(self.code_problems.keys())
15
+
16
+ def initialize(self):
17
+ cp_ds = load_dataset(self.cp_dataset_name, split="hard")
18
+ self.code_problems: dict[str, str] = {r["formula_name"]: r["code_problem"]["problem_description"] for r in cp_ds}
19
+
20
+ def add_submission(self, submitter: str, submission_path: str):
21
+ pass
src/envs.py CHANGED
@@ -2,24 +2,18 @@ import os
2
 
3
  from huggingface_hub import HfApi
4
 
5
- # Info to change for your repository
6
- # ----------------------------------
7
- TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
8
 
9
- OWNER = "demo-leaderboard-backend" # Change to your org - don't forget to create a results and request dataset, with the correct format!
10
- # ----------------------------------
11
 
12
- REPO_ID = f"{OWNER}/leaderboard"
13
- QUEUE_REPO = f"{OWNER}/requests"
14
- RESULTS_REPO = f"{OWNER}/results"
 
 
 
15
 
16
  # If you setup a cache later, just change HF_HOME
17
  CACHE_PATH=os.getenv("HF_HOME", ".")
18
 
19
- # Local caches
20
- EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
21
- EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
22
- EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
23
- EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
24
-
25
  API = HfApi(token=TOKEN)
 
2
 
3
  from huggingface_hub import HfApi
4
 
5
+ TOKEN = os.environ.get("HF_TOKEN")
 
 
6
 
7
+ OWNER = "double-ai"
 
8
 
9
+ REPO_ID = f"{OWNER}/FormulaOne-Leaderboard"
10
+
11
+ # Datasets
12
+ CODE_PROBLEMS_REPO = f"{OWNER}/dev-f1-dataset"
13
+ SUBMISSIONS_REPO = f"{OWNER}/dev-f1-leaderboard-submissions"
14
+ RESULTS_REPO = f"{OWNER}/dev-f1-leaderboard-results"
15
 
16
  # If you setup a cache later, just change HF_HOME
17
  CACHE_PATH=os.getenv("HF_HOME", ".")
18
 
 
 
 
 
 
 
19
  API = HfApi(token=TOKEN)
src/submission/check_validity.py CHANGED
@@ -4,12 +4,15 @@ import re
4
  from collections import defaultdict
5
  from datetime import datetime, timedelta, timezone
6
 
 
7
  import huggingface_hub
8
  from huggingface_hub import ModelCard
9
  from huggingface_hub.hf_api import ModelInfo
10
  from transformers import AutoConfig
11
  from transformers.models.auto.tokenization_auto import AutoTokenizer
12
 
 
 
13
  def check_model_card(repo_id: str) -> tuple[bool, str]:
14
  """Checks if the model card and license exist and have been filled"""
15
  try:
 
4
  from collections import defaultdict
5
  from datetime import datetime, timedelta, timezone
6
 
7
+ from datasets import get_dataset_config_names
8
  import huggingface_hub
9
  from huggingface_hub import ModelCard
10
  from huggingface_hub.hf_api import ModelInfo
11
  from transformers import AutoConfig
12
  from transformers.models.auto.tokenization_auto import AutoTokenizer
13
 
14
+ from src.envs import SUBMISSIONS_REPO
15
+
16
  def check_model_card(repo_id: str) -> tuple[bool, str]:
17
  """Checks if the model card and license exist and have been filled"""
18
  try:
src/submission/submit.py CHANGED
@@ -1,118 +1,77 @@
1
  import json
2
  import os
3
  from datetime import datetime, timezone
 
4
 
5
- from src.display.formatting import styled_error, styled_message, styled_warning
6
- from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO
7
- from src.submission.check_validity import (
8
- already_submitted_models,
9
- check_model_card,
10
- get_model_size,
11
- is_model_on_hub,
12
- )
13
-
14
- REQUESTED_MODELS = None
15
- USERS_TO_SUBMISSION_DATES = None
16
 
17
- def add_new_eval(
18
- model: str,
19
- base_model: str,
20
- revision: str,
21
- precision: str,
22
- weight_type: str,
23
- model_type: str,
 
 
 
 
 
 
 
24
  ):
25
- global REQUESTED_MODELS
26
- global USERS_TO_SUBMISSION_DATES
27
- if not REQUESTED_MODELS:
28
- REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
29
-
30
- user_name = ""
31
- model_path = model
32
- if "/" in model:
33
- user_name = model.split("/")[0]
34
- model_path = model.split("/")[1]
35
-
36
- precision = precision.split(" ")[0]
37
- current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
38
 
39
- if model_type is None or model_type == "":
40
- return styled_error("Please select a model type.")
41
 
42
- # Does the model actually exist?
43
- if revision == "":
44
- revision = "main"
45
-
46
- # Is the model on the hub?
47
- if weight_type in ["Delta", "Adapter"]:
48
- base_model_on_hub, error, _ = is_model_on_hub(model_name=base_model, revision=revision, token=TOKEN, test_tokenizer=True)
49
- if not base_model_on_hub:
50
- return styled_error(f'Base model "{base_model}" {error}')
51
-
52
- if not weight_type == "Adapter":
53
- model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True)
54
- if not model_on_hub:
55
- return styled_error(f'Model "{model}" {error}')
56
-
57
- # Is the model info correctly filled?
58
  try:
59
- model_info = API.model_info(repo_id=model, revision=revision)
60
- except Exception:
61
- return styled_error("Could not get your model information. Please fill it up properly.")
62
 
63
- model_size = get_model_size(model_info=model_info, precision=precision)
 
 
 
 
 
 
64
 
65
- # Were the model card and license filled?
66
- try:
67
- license = model_info.cardData["license"]
68
- except Exception:
69
- return styled_error("Please select a license for your model")
70
-
71
- modelcard_OK, error_msg = check_model_card(model)
72
- if not modelcard_OK:
73
- return styled_error(error_msg)
74
 
75
  # Seems good, creating the eval
76
- print("Adding new eval")
77
-
78
- eval_entry = {
79
- "model": model,
80
- "base_model": base_model,
81
- "revision": revision,
82
- "precision": precision,
83
- "weight_type": weight_type,
84
- "status": "PENDING",
85
- "submitted_time": current_time,
86
- "model_type": model_type,
87
- "likes": model_info.likes,
88
- "params": model_size,
89
- "license": license,
90
- "private": False,
91
- }
92
-
93
- # Check for duplicate submission
94
- if f"{model}_{revision}_{precision}" in REQUESTED_MODELS:
95
- return styled_warning("This model has been already submitted.")
96
-
97
- print("Creating eval file")
98
- OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
99
- os.makedirs(OUT_DIR, exist_ok=True)
100
- out_path = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}.json"
101
-
102
- with open(out_path, "w") as f:
103
- f.write(json.dumps(eval_entry))
104
-
105
- print("Uploading eval file")
106
- API.upload_file(
107
- path_or_fileobj=out_path,
108
- path_in_repo=out_path.split("eval-queue/")[1],
109
- repo_id=QUEUE_REPO,
110
- repo_type="dataset",
111
- commit_message=f"Add {model} to eval queue",
112
- )
113
-
114
- # Remove the local file
115
- os.remove(out_path)
116
 
117
  return styled_message(
118
  "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
 
1
  import json
2
  import os
3
  from datetime import datetime, timezone
4
+ import time
5
 
6
+ import pandas as pd
 
 
 
 
 
 
 
 
 
 
7
 
8
+ from src.datamodel.data import F1Data
9
+ from src.display.formatting import styled_error, styled_message, styled_warning
10
+ from src.envs import API, SUBMISSIONS_REPO, TOKEN
11
+ # from src.submission.check_validity import (
12
+ # already_submitted_models,
13
+ # check_model_card,
14
+ # get_model_size,
15
+ # is_model_on_hub,
16
+ # )
17
+
18
+ def add_new_solutions(
19
+ lbdb: F1Data,
20
+ submitter: str,
21
+ submission_path: str,
22
  ):
23
+ if not submitter:
24
+ return styled_error("Please fill submitter name")
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ if not submission_path:
27
+ return styled_error("Please upload JSONL solutions file")
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  try:
30
+ ds = pd.read_json(submission_path, lines=True)
31
+ except Exception as e:
32
+ return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")
33
 
34
+ submitted_formulas = set(ds["formula_name"])
35
+ if submitted_formulas != lbdb.code_problem_formulas:
36
+ missing = lbdb.code_problem_formulas - submitted_formulas
37
+ unknown = submitted_formulas - lbdb.code_problem_formulas
38
+ return styled_error(f"Mismatched formula names: missing {len(missing)} unknown {len(unknown)}")
39
+ if len(ds) > len(lbdb.code_problem_formulas):
40
+ return styled_error("Duplicate formula solutions exist in uploaded file")
41
 
42
+ submission_id = datetime.now().strftime("%Y%m%d%H%M%S")
 
 
 
 
 
 
 
 
43
 
44
  # Seems good, creating the eval
45
+ print(f"Adding new submission {submission_id} from {submitter}")
46
+ submission_ts = time.time_ns()
47
+
48
+ def add_info(row):
49
+ row["submitter"] = submitter
50
+ row["submission_id"] = submission_id
51
+ row["submission_ts"] = submission_ts
52
+
53
+ ds = ds.map(add_info)
54
+
55
+ ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
56
+ # print("Creating eval file")
57
+ # OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
58
+ # os.makedirs(OUT_DIR, exist_ok=True)
59
+ # out_path = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}.json"
60
+
61
+ # with open(out_path, "w") as f:
62
+ # f.write(json.dumps(eval_entry))
63
+
64
+ # print("Uploading eval file")
65
+ # API.upload_file(
66
+ # path_or_fileobj=out_path,
67
+ # path_in_repo=out_path.split("eval-queue/")[1],
68
+ # repo_id=QUEUE_REPO,
69
+ # repo_type="dataset",
70
+ # commit_message=f"Add {model} to eval queue",
71
+ # )
72
+
73
+ # # Remove the local file
74
+ # os.remove(out_path)
 
 
 
 
 
 
 
 
 
 
75
 
76
  return styled_message(
77
  "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."