Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
61885ca
1
Parent(s):
7d20cd0
udpate submit mechanism
Browse files- app.py +18 -12
- src/display/utils.py +14 -14
- src/leaderboard/read_evals.py +1 -1
- src/populate.py +1 -1
- src/submission/submit.py +26 -12
app.py
CHANGED
@@ -154,15 +154,17 @@ with demo:
|
|
154 |
|
155 |
with gr.Row():
|
156 |
with gr.Column():
|
157 |
-
|
|
|
158 |
# revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
|
|
166 |
|
167 |
# with gr.Column():
|
168 |
submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"])
|
@@ -182,17 +184,21 @@ with demo:
|
|
182 |
# )
|
183 |
# base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
|
184 |
|
185 |
-
logger.info("
|
186 |
submit_button = gr.Button("Submit")
|
187 |
submission_result = gr.Markdown()
|
188 |
|
189 |
-
def add_solution_cbk(
|
190 |
-
|
|
|
|
|
191 |
|
192 |
submit_button.click(
|
193 |
add_solution_cbk,
|
194 |
[
|
195 |
-
|
|
|
|
|
196 |
submission_file,
|
197 |
],
|
198 |
submission_result,
|
|
|
154 |
|
155 |
with gr.Row():
|
156 |
with gr.Column():
|
157 |
+
system_name_textbox = gr.Textbox(label=AutoEvalColumn.system.name)
|
158 |
+
org_textbox = gr.Textbox(label=AutoEvalColumn.organization.name)
|
159 |
# revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
|
160 |
+
sys_type_dropdown = gr.Dropdown(
|
161 |
+
choices=[t.to_str(" : ") for t in ModelType],
|
162 |
+
label=AutoEvalColumn.system_type.name,
|
163 |
+
multiselect=False,
|
164 |
+
value=ModelType.LLM.to_str(" : "),
|
165 |
+
interactive=True,
|
166 |
+
)
|
167 |
+
|
168 |
|
169 |
# with gr.Column():
|
170 |
submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"])
|
|
|
184 |
# )
|
185 |
# base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
|
186 |
|
187 |
+
logger.info("Submit button")
|
188 |
submit_button = gr.Button("Submit")
|
189 |
submission_result = gr.Markdown()
|
190 |
|
191 |
+
def add_solution_cbk(system_name,
|
192 |
+
org,
|
193 |
+
sys_type, submission_path):
|
194 |
+
return add_new_solutions(lbdb, system_name, org, sys_type, submission_path)
|
195 |
|
196 |
submit_button.click(
|
197 |
add_solution_cbk,
|
198 |
[
|
199 |
+
system_name_textbox,
|
200 |
+
org_textbox,
|
201 |
+
sys_type_dropdown,
|
202 |
submission_file,
|
203 |
],
|
204 |
submission_result,
|
src/display/utils.py
CHANGED
@@ -86,26 +86,26 @@ class ModelDetails:
|
|
86 |
|
87 |
|
88 |
class ModelType(Enum):
|
89 |
-
|
90 |
-
|
91 |
-
IFT = ModelDetails(name="instruction-tuned", symbol="⭕")
|
92 |
-
RL = ModelDetails(name="RL-tuned", symbol="🟦")
|
93 |
-
|
94 |
|
95 |
def to_str(self, separator=" "):
|
96 |
return f"{self.value.symbol}{separator}{self.value.name}"
|
97 |
|
98 |
@staticmethod
|
99 |
def from_str(type):
|
100 |
-
if "
|
101 |
-
return ModelType.
|
102 |
-
if "
|
103 |
-
return ModelType.
|
104 |
-
if "RL-tuned" in type or "🟦" in type:
|
105 |
-
|
106 |
-
if "instruction-tuned" in type or "⭕" in type:
|
107 |
-
|
108 |
-
return ModelType.
|
109 |
|
110 |
|
111 |
class WeightType(Enum):
|
|
|
86 |
|
87 |
|
88 |
class ModelType(Enum):
|
89 |
+
LLM = ModelDetails(name="LLM", symbol="🟢")
|
90 |
+
AgenticLLM = ModelDetails(name="AgenticLLM", symbol="🔶")
|
91 |
+
# IFT = ModelDetails(name="instruction-tuned", symbol="⭕")
|
92 |
+
# RL = ModelDetails(name="RL-tuned", symbol="🟦")
|
93 |
+
Other = ModelDetails(name="Other", symbol="?")
|
94 |
|
95 |
def to_str(self, separator=" "):
|
96 |
return f"{self.value.symbol}{separator}{self.value.name}"
|
97 |
|
98 |
@staticmethod
|
99 |
def from_str(type):
|
100 |
+
if "AgenticLLM" in type or "🔶" in type:
|
101 |
+
return ModelType.AgenticLLM
|
102 |
+
if "LLM" in type or "🟢" in type:
|
103 |
+
return ModelType.LLM
|
104 |
+
# if "RL-tuned" in type or "🟦" in type:
|
105 |
+
# return ModelType.RL
|
106 |
+
# if "instruction-tuned" in type or "⭕" in type:
|
107 |
+
# return ModelType.IFT
|
108 |
+
return ModelType.Other
|
109 |
|
110 |
|
111 |
class WeightType(Enum):
|
src/leaderboard/read_evals.py
CHANGED
@@ -23,7 +23,7 @@ class EvalResult:
|
|
23 |
revision: str # commit hash, "" if main
|
24 |
results: dict
|
25 |
precision: Precision = Precision.Unknown
|
26 |
-
model_type: ModelType = ModelType.
|
27 |
weight_type: WeightType = WeightType.Original # Original or Adapter
|
28 |
architecture: str = "Unknown"
|
29 |
license: str = "?"
|
|
|
23 |
revision: str # commit hash, "" if main
|
24 |
results: dict
|
25 |
precision: Precision = Precision.Unknown
|
26 |
+
model_type: ModelType = ModelType.LLM # Pretrained, fine tuned, ...
|
27 |
weight_type: WeightType = WeightType.Original # Original or Adapter
|
28 |
architecture: str = "Unknown"
|
29 |
license: str = "?"
|
src/populate.py
CHANGED
@@ -14,7 +14,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
14 |
all_data_json = [v.to_dict() for v in raw_data]
|
15 |
|
16 |
df = pd.DataFrame.from_records(all_data_json)
|
17 |
-
df = df.sort_values(by=[AutoEvalColumn.
|
18 |
df = df[cols].round(decimals=2)
|
19 |
|
20 |
# filter out if any of the benchmarks have not been produced
|
|
|
14 |
all_data_json = [v.to_dict() for v in raw_data]
|
15 |
|
16 |
df = pd.DataFrame.from_records(all_data_json)
|
17 |
+
df = df.sort_values(by=[AutoEvalColumn.success_rate.name], ascending=False)
|
18 |
df = df[cols].round(decimals=2)
|
19 |
|
20 |
# filter out if any of the benchmarks have not been produced
|
src/submission/submit.py
CHANGED
@@ -8,6 +8,7 @@ import pandas as pd
|
|
8 |
|
9 |
from src.datamodel.data import F1Data
|
10 |
from src.display.formatting import styled_error, styled_message, styled_warning
|
|
|
11 |
from src.envs import API, SUBMISSIONS_REPO, TOKEN
|
12 |
from src.logger import get_logger
|
13 |
# from src.submission.check_validity import (
|
@@ -21,8 +22,9 @@ logger = get_logger(__name__)
|
|
21 |
|
22 |
def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str | None:
|
23 |
logger.info("Validating DS size %d columns %s set %s", len(pd_ds), pd_ds.columns, set(pd_ds.columns))
|
24 |
-
|
25 |
-
|
|
|
26 |
if any(type(v) != str for v in pd_ds["formula_name"]):
|
27 |
return "Not all formula_name values are of type str"
|
28 |
if any(type(v) != str for v in pd_ds["solution"]):
|
@@ -38,37 +40,49 @@ def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str | None:
|
|
38 |
|
39 |
def add_new_solutions(
|
40 |
lbdb: F1Data,
|
41 |
-
|
|
|
|
|
42 |
submission_path: str,
|
43 |
):
|
44 |
-
logger.info("ADD SUBMISSION!
|
45 |
-
if not
|
46 |
-
return styled_error("Please fill
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
if not submission_path:
|
49 |
return styled_error("Please upload JSONL solutions file")
|
50 |
|
51 |
try:
|
52 |
-
|
53 |
except Exception as e:
|
54 |
return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")
|
55 |
|
56 |
-
validation_error = validate_submission(lbdb,
|
57 |
if validation_error:
|
58 |
return styled_error(validation_error)
|
59 |
|
60 |
-
|
|
|
61 |
|
62 |
# Seems good, creating the eval
|
63 |
-
print(f"Adding new submission {submission_id}
|
64 |
submission_ts = time.time_ns()
|
65 |
|
66 |
def add_info(row):
|
67 |
-
row["
|
|
|
|
|
68 |
row["submission_id"] = submission_id
|
69 |
row["submission_ts"] = submission_ts
|
70 |
|
71 |
-
ds = Dataset.from_pandas(
|
72 |
|
73 |
ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
|
74 |
# print("Creating eval file")
|
|
|
8 |
|
9 |
from src.datamodel.data import F1Data
|
10 |
from src.display.formatting import styled_error, styled_message, styled_warning
|
11 |
+
from src.display.utils import ModelType
|
12 |
from src.envs import API, SUBMISSIONS_REPO, TOKEN
|
13 |
from src.logger import get_logger
|
14 |
# from src.submission.check_validity import (
|
|
|
22 |
|
23 |
def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str | None:
|
24 |
logger.info("Validating DS size %d columns %s set %s", len(pd_ds), pd_ds.columns, set(pd_ds.columns))
|
25 |
+
expected_cols = ["formula_name", "solution"]
|
26 |
+
if set(pd_ds.columns) != set(expected_cols):
|
27 |
+
return f"Expected attributes: {expected_cols}, Got: {pd_ds.columns.tolist()}"
|
28 |
if any(type(v) != str for v in pd_ds["formula_name"]):
|
29 |
return "Not all formula_name values are of type str"
|
30 |
if any(type(v) != str for v in pd_ds["solution"]):
|
|
|
40 |
|
41 |
def add_new_solutions(
|
42 |
lbdb: F1Data,
|
43 |
+
system_name : str,
|
44 |
+
org: str,
|
45 |
+
sys_type: str,
|
46 |
submission_path: str,
|
47 |
):
|
48 |
+
logger.info("ADD SUBMISSION! %s path %s", str((system_name, org, sys_type)), submission_path)
|
49 |
+
if not system_name:
|
50 |
+
return styled_error("Please fill system name")
|
51 |
+
|
52 |
+
if not org:
|
53 |
+
return styled_error("Please fill organization name")
|
54 |
+
|
55 |
+
if not sys_type:
|
56 |
+
return styled_error("Please select system type")
|
57 |
+
sys_type = ModelType.from_str(sys_type).name
|
58 |
|
59 |
if not submission_path:
|
60 |
return styled_error("Please upload JSONL solutions file")
|
61 |
|
62 |
try:
|
63 |
+
submission_df = pd.read_json(submission_path, lines=True)
|
64 |
except Exception as e:
|
65 |
return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")
|
66 |
|
67 |
+
validation_error = validate_submission(lbdb, submission_df)
|
68 |
if validation_error:
|
69 |
return styled_error(validation_error)
|
70 |
|
71 |
+
|
72 |
+
submission_id = f"{system_name}_{org}_{sys_type}_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"
|
73 |
|
74 |
# Seems good, creating the eval
|
75 |
+
print(f"Adding new submission: {submission_id}")
|
76 |
submission_ts = time.time_ns()
|
77 |
|
78 |
def add_info(row):
|
79 |
+
row["system_name"] = system_name
|
80 |
+
row["organization"] = org
|
81 |
+
row["system_type"] = sys_type
|
82 |
row["submission_id"] = submission_id
|
83 |
row["submission_ts"] = submission_ts
|
84 |
|
85 |
+
ds = Dataset.from_pandas(submission_df).map(add_info)
|
86 |
|
87 |
ds.push_to_hub(SUBMISSIONS_REPO, submission_id, private=True)
|
88 |
# print("Creating eval file")
|