Alvinn-aai commited on
Commit
61885ca
·
1 Parent(s): 7d20cd0

udpate submit mechanism

Browse files
app.py CHANGED
@@ -154,15 +154,17 @@ with demo:
154
 
155
  with gr.Row():
156
  with gr.Column():
157
- submitter_textbox = gr.Textbox(label="Submitter Name")
 
158
  # revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
159
- # model_type = gr.Dropdown(
160
- # choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
161
- # label="Model type",
162
- # multiselect=False,
163
- # value=None,
164
- # interactive=True,
165
- # )
 
166
 
167
  # with gr.Column():
168
  submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"])
@@ -182,17 +184,21 @@ with demo:
182
  # )
183
  # base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
184
 
185
- logger.info("Submut button")
186
  submit_button = gr.Button("Submit")
187
  submission_result = gr.Markdown()
188
 
189
- def add_solution_cbk(submitter, submission_path):
190
- return add_new_solutions(lbdb, submitter, submission_path)
 
 
191
 
192
  submit_button.click(
193
  add_solution_cbk,
194
  [
195
- submitter_textbox,
 
 
196
  submission_file,
197
  ],
198
  submission_result,
 
154
 
155
  with gr.Row():
156
  with gr.Column():
157
+ system_name_textbox = gr.Textbox(label=AutoEvalColumn.system.name)
158
+ org_textbox = gr.Textbox(label=AutoEvalColumn.organization.name)
159
  # revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
160
+ sys_type_dropdown = gr.Dropdown(
161
+ choices=[t.to_str(" : ") for t in ModelType],
162
+ label=AutoEvalColumn.system_type.name,
163
+ multiselect=False,
164
+ value=ModelType.LLM.to_str(" : "),
165
+ interactive=True,
166
+ )
167
+
168
 
169
  # with gr.Column():
170
  submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"])
 
184
  # )
185
  # base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
186
 
187
+ logger.info("Submit button")
188
  submit_button = gr.Button("Submit")
189
  submission_result = gr.Markdown()
190
 
191
+ def add_solution_cbk(system_name,
192
+ org,
193
+ sys_type, submission_path):
194
+ return add_new_solutions(lbdb, system_name, org, sys_type, submission_path)
195
 
196
  submit_button.click(
197
  add_solution_cbk,
198
  [
199
+ system_name_textbox,
200
+ org_textbox,
201
+ sys_type_dropdown,
202
  submission_file,
203
  ],
204
  submission_result,
src/display/utils.py CHANGED
@@ -86,26 +86,26 @@ class ModelDetails:
86
 
87
 
88
  class ModelType(Enum):
89
- PT = ModelDetails(name="pretrained", symbol="🟢")
90
- FT = ModelDetails(name="fine-tuned", symbol="🔶")
91
- IFT = ModelDetails(name="instruction-tuned", symbol="⭕")
92
- RL = ModelDetails(name="RL-tuned", symbol="🟦")
93
- Unknown = ModelDetails(name="", symbol="?")
94
 
95
  def to_str(self, separator=" "):
96
  return f"{self.value.symbol}{separator}{self.value.name}"
97
 
98
  @staticmethod
99
  def from_str(type):
100
- if "fine-tuned" in type or "🔶" in type:
101
- return ModelType.FT
102
- if "pretrained" in type or "🟢" in type:
103
- return ModelType.PT
104
- if "RL-tuned" in type or "🟦" in type:
105
- return ModelType.RL
106
- if "instruction-tuned" in type or "⭕" in type:
107
- return ModelType.IFT
108
- return ModelType.Unknown
109
 
110
 
111
  class WeightType(Enum):
 
86
 
87
 
88
  class ModelType(Enum):
89
+ LLM = ModelDetails(name="LLM", symbol="🟢")
90
+ AgenticLLM = ModelDetails(name="AgenticLLM", symbol="🔶")
91
+ # IFT = ModelDetails(name="instruction-tuned", symbol="⭕")
92
+ # RL = ModelDetails(name="RL-tuned", symbol="🟦")
93
+ Other = ModelDetails(name="Other", symbol="?")
94
 
95
  def to_str(self, separator=" "):
96
  return f"{self.value.symbol}{separator}{self.value.name}"
97
 
98
  @staticmethod
99
  def from_str(type):
100
+ if "AgenticLLM" in type or "🔶" in type:
101
+ return ModelType.AgenticLLM
102
+ if "LLM" in type or "🟢" in type:
103
+ return ModelType.LLM
104
+ # if "RL-tuned" in type or "🟦" in type:
105
+ # return ModelType.RL
106
+ # if "instruction-tuned" in type or "⭕" in type:
107
+ # return ModelType.IFT
108
+ return ModelType.Other
109
 
110
 
111
  class WeightType(Enum):
src/leaderboard/read_evals.py CHANGED
@@ -23,7 +23,7 @@ class EvalResult:
23
  revision: str # commit hash, "" if main
24
  results: dict
25
  precision: Precision = Precision.Unknown
26
- model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
27
  weight_type: WeightType = WeightType.Original # Original or Adapter
28
  architecture: str = "Unknown"
29
  license: str = "?"
 
23
  revision: str # commit hash, "" if main
24
  results: dict
25
  precision: Precision = Precision.Unknown
26
+ model_type: ModelType = ModelType.LLM # Pretrained, fine tuned, ...
27
  weight_type: WeightType = WeightType.Original # Original or Adapter
28
  architecture: str = "Unknown"
29
  license: str = "?"
src/populate.py CHANGED
@@ -14,7 +14,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
14
  all_data_json = [v.to_dict() for v in raw_data]
15
 
16
  df = pd.DataFrame.from_records(all_data_json)
17
- df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
18
  df = df[cols].round(decimals=2)
19
 
20
  # filter out if any of the benchmarks have not been produced
 
14
  all_data_json = [v.to_dict() for v in raw_data]
15
 
16
  df = pd.DataFrame.from_records(all_data_json)
17
+ df = df.sort_values(by=[AutoEvalColumn.success_rate.name], ascending=False)
18
  df = df[cols].round(decimals=2)
19
 
20
  # filter out if any of the benchmarks have not been produced
src/submission/submit.py CHANGED
@@ -8,6 +8,7 @@ import pandas as pd
8
 
9
  from src.datamodel.data import F1Data
10
  from src.display.formatting import styled_error, styled_message, styled_warning
 
11
  from src.envs import API, SUBMISSIONS_REPO, TOKEN
12
  from src.logger import get_logger
13
  # from src.submission.check_validity import (
@@ -21,8 +22,9 @@ logger = get_logger(__name__)
21
 
22
  def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str | None:
23
  logger.info("Validating DS size %d columns %s set %s", len(pd_ds), pd_ds.columns, set(pd_ds.columns))
24
- if set(pd_ds.columns) != set(["formula_name", "solution"]):
25
- return "Bad format of submission"
 
26
  if any(type(v) != str for v in pd_ds["formula_name"]):
27
  return "Not all formula_name values are of type str"
28
  if any(type(v) != str for v in pd_ds["solution"]):
@@ -38,37 +40,49 @@ def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str | None:
38
 
39
  def add_new_solutions(
40
  lbdb: F1Data,
41
- submitter: str,
 
 
42
  submission_path: str,
43
  ):
44
- logger.info("ADD SUBMISSION! submitter %s path %s", submitter, submission_path)
45
- if not submitter:
46
- return styled_error("Please fill submitter name")
 
 
 
 
 
 
 
47
 
48
  if not submission_path:
49
  return styled_error("Please upload JSONL solutions file")
50
 
51
  try:
52
- pd_ds = pd.read_json(submission_path, lines=True)
53
  except Exception as e:
54
  return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")
55
 
56
- validation_error = validate_submission(lbdb, pd_ds)
57
  if validation_error:
58
  return styled_error(validation_error)
59
 
60
- submission_id = datetime.now().strftime("%Y%m%d%H%M%S")
 
61
 
62
  # Seems good, creating the eval
63
- print(f"Adding new submission {submission_id} from {submitter}")
64
  submission_ts = time.time_ns()
65
 
66
  def add_info(row):
67
- row["submitter"] = submitter
 
 
68
  row["submission_id"] = submission_id
69
  row["submission_ts"] = submission_ts
70
 
71
- ds = Dataset.from_pandas(pd_ds).map(add_info)
72
 
73
  ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
74
  # print("Creating eval file")
 
8
 
9
  from src.datamodel.data import F1Data
10
  from src.display.formatting import styled_error, styled_message, styled_warning
11
+ from src.display.utils import ModelType
12
  from src.envs import API, SUBMISSIONS_REPO, TOKEN
13
  from src.logger import get_logger
14
  # from src.submission.check_validity import (
 
22
 
23
  def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str | None:
24
  logger.info("Validating DS size %d columns %s set %s", len(pd_ds), pd_ds.columns, set(pd_ds.columns))
25
+ expected_cols = ["formula_name", "solution"]
26
+ if set(pd_ds.columns) != set(expected_cols):
27
+ return f"Expected attributes: {expected_cols}, Got: {pd_ds.columns.tolist()}"
28
  if any(type(v) != str for v in pd_ds["formula_name"]):
29
  return "Not all formula_name values are of type str"
30
  if any(type(v) != str for v in pd_ds["solution"]):
 
40
 
41
  def add_new_solutions(
42
  lbdb: F1Data,
43
+ system_name : str,
44
+ org: str,
45
+ sys_type: str,
46
  submission_path: str,
47
  ):
48
+ logger.info("ADD SUBMISSION! %s path %s", str((system_name, org, sys_type)), submission_path)
49
+ if not system_name:
50
+ return styled_error("Please fill system name")
51
+
52
+ if not org:
53
+ return styled_error("Please fill organization name")
54
+
55
+ if not sys_type:
56
+ return styled_error("Please select system type")
57
+ sys_type = ModelType.from_str(sys_type).name
58
 
59
  if not submission_path:
60
  return styled_error("Please upload JSONL solutions file")
61
 
62
  try:
63
+ submission_df = pd.read_json(submission_path, lines=True)
64
  except Exception as e:
65
  return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")
66
 
67
+ validation_error = validate_submission(lbdb, submission_df)
68
  if validation_error:
69
  return styled_error(validation_error)
70
 
71
+
72
+ submission_id = f"{system_name}_{org}_{sys_type}_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"
73
 
74
  # Seems good, creating the eval
75
+ print(f"Adding new submission: {submission_id}")
76
  submission_ts = time.time_ns()
77
 
78
  def add_info(row):
79
+ row["system_name"] = system_name
80
+ row["organization"] = org
81
+ row["system_type"] = sys_type
82
  row["submission_id"] = submission_id
83
  row["submission_ts"] = submission_ts
84
 
85
+ ds = Dataset.from_pandas(submission_df).map(add_info)
86
 
87
  ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
88
  # print("Creating eval file")