Spaces:

upstage
/

open-ko-llm-leaderboard

Running on CPU Upgrade

App Files Files Community

Sean Cho commited on Sep 27, 2023

Commit

8a7d698

•

1 Parent(s): 7e4da4b

Remove model size restriction

Browse files

Files changed (2) hide show

app.py +2 -16
src/assets/text_content.py +0 -1

app.py CHANGED Viewed

@@ -120,16 +120,6 @@ leaderboard_df = original_df.copy()
 ) = get_evaluation_queue_df(eval_queue, eval_queue_private, EVAL_REQUESTS_PATH, EVAL_COLS)
 ## INTERACTION FUNCTIONS
-def is_model_acceptable(model: str) -> bool:
-    # regex for something like '13b'
-    pattern = r'(\d+)[bB]'
-    values = re.findall(pattern, model)
-    for val in values:
-        if int(val) <= 13:
-            return True
-    return values == []
 def add_new_eval(
     model: str,
     base_model: str,
@@ -139,9 +129,6 @@ def add_new_eval(
     weight_type: str,
     model_type: str,
 ):
-    if not is_model_acceptable(model):
-        return styled_error("Please submit a model with less than 13B parameters.")
     precision = precision.split(" ")[0]
     current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
@@ -261,8 +248,8 @@ NUMERIC_INTERVALS = {
     "~3B": pd.Interval(1.5, 5, closed="right"),
     "~7B": pd.Interval(6, 11, closed="right"),
     "~13B": pd.Interval(12, 15, closed="right"),
-    # "~35B": pd.Interval(16, 55, closed="right"),
-    # "60B+": pd.Interval(55, 10000, closed="right"),
 }
 def filter_models(
@@ -571,7 +558,6 @@ with demo:
                     base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
             submit_button = gr.Button("Submit Evalulation!")
-            gr.Markdown("_We accept models with less than or equal to 13B parameters now._")
             submission_result = gr.Markdown()
             submit_button.click(
                 add_new_eval,

 ) = get_evaluation_queue_df(eval_queue, eval_queue_private, EVAL_REQUESTS_PATH, EVAL_COLS)
 ## INTERACTION FUNCTIONS
 def add_new_eval(
     model: str,
     base_model: str,
     weight_type: str,
     model_type: str,
 ):
     precision = precision.split(" ")[0]
     current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
     "~3B": pd.Interval(1.5, 5, closed="right"),
     "~7B": pd.Interval(6, 11, closed="right"),
     "~13B": pd.Interval(12, 15, closed="right"),
+    "~35B": pd.Interval(16, 55, closed="right"),
+    "60B+": pd.Interval(55, 10000, closed="right"),
 }
 def filter_models(
                     base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
             submit_button = gr.Button("Submit Evalulation!")
             submission_result = gr.Markdown()
             submit_button.click(
                 add_new_eval,

src/assets/text_content.py CHANGED Viewed

@@ -7,7 +7,6 @@ INTRODUCTION_TEXT = f"""
 🚀 The Open Ko-LLM Leaderboard 🇰🇷 objectively evaluates the performance of Korean Large Language Model (LLM).
 When you submit a model on the "Submit here!" page, it is automatically evaluated. The GPU used for evaluation is operated with the support of KT.
-We accept models with less than or equal to 13B parameters now.
 The data used for evaluation consists of datasets to assess reasoning, language understanding, hallucination, and commonsense.
 The evaluation dataset is exclusively private and only available for evaluation process.
 More detailed information about the benchmark dataset is provided on the “About” page.

 🚀 The Open Ko-LLM Leaderboard 🇰🇷 objectively evaluates the performance of Korean Large Language Model (LLM).
 When you submit a model on the "Submit here!" page, it is automatically evaluated. The GPU used for evaluation is operated with the support of KT.
 The data used for evaluation consists of datasets to assess reasoning, language understanding, hallucination, and commonsense.
 The evaluation dataset is exclusively private and only available for evaluation process.
 More detailed information about the benchmark dataset is provided on the “About” page.