Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Sean Cho
commited on
Commit
β’
8a7d698
1
Parent(s):
7e4da4b
Remove model size restriction
Browse files- app.py +2 -16
- src/assets/text_content.py +0 -1
app.py
CHANGED
@@ -120,16 +120,6 @@ leaderboard_df = original_df.copy()
|
|
120 |
) = get_evaluation_queue_df(eval_queue, eval_queue_private, EVAL_REQUESTS_PATH, EVAL_COLS)
|
121 |
|
122 |
## INTERACTION FUNCTIONS
|
123 |
-
def is_model_acceptable(model: str) -> bool:
|
124 |
-
# regex for something like '13b'
|
125 |
-
pattern = r'(\d+)[bB]'
|
126 |
-
values = re.findall(pattern, model)
|
127 |
-
for val in values:
|
128 |
-
if int(val) <= 13:
|
129 |
-
return True
|
130 |
-
|
131 |
-
return values == []
|
132 |
-
|
133 |
def add_new_eval(
|
134 |
model: str,
|
135 |
base_model: str,
|
@@ -139,9 +129,6 @@ def add_new_eval(
|
|
139 |
weight_type: str,
|
140 |
model_type: str,
|
141 |
):
|
142 |
-
if not is_model_acceptable(model):
|
143 |
-
return styled_error("Please submit a model with less than 13B parameters.")
|
144 |
-
|
145 |
precision = precision.split(" ")[0]
|
146 |
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
147 |
|
@@ -261,8 +248,8 @@ NUMERIC_INTERVALS = {
|
|
261 |
"~3B": pd.Interval(1.5, 5, closed="right"),
|
262 |
"~7B": pd.Interval(6, 11, closed="right"),
|
263 |
"~13B": pd.Interval(12, 15, closed="right"),
|
264 |
-
|
265 |
-
|
266 |
}
|
267 |
|
268 |
def filter_models(
|
@@ -571,7 +558,6 @@ with demo:
|
|
571 |
base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
|
572 |
|
573 |
submit_button = gr.Button("Submit Evalulation!")
|
574 |
-
gr.Markdown("_We accept models with less than or equal to 13B parameters now._")
|
575 |
submission_result = gr.Markdown()
|
576 |
submit_button.click(
|
577 |
add_new_eval,
|
|
|
120 |
) = get_evaluation_queue_df(eval_queue, eval_queue_private, EVAL_REQUESTS_PATH, EVAL_COLS)
|
121 |
|
122 |
## INTERACTION FUNCTIONS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
def add_new_eval(
|
124 |
model: str,
|
125 |
base_model: str,
|
|
|
129 |
weight_type: str,
|
130 |
model_type: str,
|
131 |
):
|
|
|
|
|
|
|
132 |
precision = precision.split(" ")[0]
|
133 |
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
134 |
|
|
|
248 |
"~3B": pd.Interval(1.5, 5, closed="right"),
|
249 |
"~7B": pd.Interval(6, 11, closed="right"),
|
250 |
"~13B": pd.Interval(12, 15, closed="right"),
|
251 |
+
"~35B": pd.Interval(16, 55, closed="right"),
|
252 |
+
"60B+": pd.Interval(55, 10000, closed="right"),
|
253 |
}
|
254 |
|
255 |
def filter_models(
|
|
|
558 |
base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
|
559 |
|
560 |
submit_button = gr.Button("Submit Evalulation!")
|
|
|
561 |
submission_result = gr.Markdown()
|
562 |
submit_button.click(
|
563 |
add_new_eval,
|
src/assets/text_content.py
CHANGED
@@ -7,7 +7,6 @@ INTRODUCTION_TEXT = f"""
|
|
7 |
π The Open Ko-LLM Leaderboard π°π· objectively evaluates the performance of Korean Large Language Model (LLM).
|
8 |
|
9 |
When you submit a model on the "Submit here!" page, it is automatically evaluated. The GPU used for evaluation is operated with the support of KT.
|
10 |
-
We accept models with less than or equal to 13B parameters now.
|
11 |
The data used for evaluation consists of datasets to assess reasoning, language understanding, hallucination, and commonsense.
|
12 |
The evaluation dataset is exclusively private and only available for evaluation process.
|
13 |
More detailed information about the benchmark dataset is provided on the βAboutβ page.
|
|
|
7 |
π The Open Ko-LLM Leaderboard π°π· objectively evaluates the performance of Korean Large Language Model (LLM).
|
8 |
|
9 |
When you submit a model on the "Submit here!" page, it is automatically evaluated. The GPU used for evaluation is operated with the support of KT.
|
|
|
10 |
The data used for evaluation consists of datasets to assess reasoning, language understanding, hallucination, and commonsense.
|
11 |
The evaluation dataset is exclusively private and only available for evaluation process.
|
12 |
More detailed information about the benchmark dataset is provided on the βAboutβ page.
|