fdisk commited on
Commit
3c4ac54
·
1 Parent(s): f1a85ad

탭 정리

Browse files
Files changed (2) hide show
  1. app.py +20 -108
  2. src/display/utils.py +18 -12
app.py CHANGED
@@ -7,7 +7,6 @@ from huggingface_hub import snapshot_download
7
  from src.about import (
8
  CITATION_BUTTON_LABEL,
9
  CITATION_BUTTON_TEXT,
10
- EVALUATION_QUEUE_TEXT,
11
  INTRODUCTION_TEXT,
12
  LLM_BENCHMARKS_TEXT,
13
  TITLE,
@@ -17,39 +16,38 @@ from src.display.utils import (
17
  BENCHMARK_COLS,
18
  COLS,
19
  EVAL_COLS,
20
- EVAL_TYPES,
21
  NUMERIC_INTERVALS,
22
  TYPES,
23
  AutoEvalColumn,
24
  ModelType,
25
  fields,
26
- WeightType,
27
  Precision
28
  )
29
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
30
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
31
- from src.submission.submit import add_new_eval
32
 
33
 
34
  def restart_space():
35
  API.restart_space(repo_id=REPO_ID)
36
 
 
37
  try:
38
  print(EVAL_REQUESTS_PATH)
39
  snapshot_download(
40
- repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
 
41
  )
42
  except Exception:
43
  restart_space()
44
  try:
45
  print(EVAL_RESULTS_PATH)
46
  snapshot_download(
47
- repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
 
48
  )
49
  except Exception:
50
  restart_space()
51
 
52
-
53
  raw_data, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
54
  leaderboard_df = original_df.copy()
55
 
@@ -62,13 +60,13 @@ leaderboard_df = original_df.copy()
62
 
63
  # Searching and filtering
64
  def update_table(
65
- hidden_df: pd.DataFrame,
66
- columns: list,
67
- type_query: list,
68
- precision_query: str,
69
- size_query: list,
70
- show_deleted: bool,
71
- query: str,
72
  ):
73
  filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
74
  filtered_df = filter_queries(query, filtered_df)
@@ -87,8 +85,8 @@ def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
87
  ]
88
  # We use COLS to maintain sorting
89
  filtered_df = df[
90
- always_here_cols + [c for c in COLS if c in df.columns and c in columns]
91
- ]
92
  return filtered_df
93
 
94
 
@@ -112,7 +110,7 @@ def filter_queries(query: str, filtered_df: pd.DataFrame) -> pd.DataFrame:
112
 
113
 
114
  def filter_models(
115
- df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool
116
  ) -> pd.DataFrame:
117
  # Show all models
118
  if show_deleted:
@@ -168,7 +166,7 @@ with demo:
168
  value=False, label="Show gated/private/deleted models", interactive=True
169
  )
170
  with gr.Column(min_width=320):
171
- #with gr.Box(elem_id="box-filter"):
172
  filter_columns_type = gr.CheckboxGroup(
173
  label="Model types",
174
  choices=[t.to_str() for t in ModelType],
@@ -195,7 +193,7 @@ with demo:
195
  value=leaderboard_df[
196
  [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
197
  + shown_columns.value
198
- ],
199
  headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
200
  datatype=TYPES,
201
  elem_id="leaderboard-table",
@@ -223,7 +221,8 @@ with demo:
223
  ],
224
  leaderboard_table,
225
  )
226
- for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size, deleted_models_visibility]:
 
227
  selector.change(
228
  update_table,
229
  [
@@ -242,93 +241,6 @@ with demo:
242
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
243
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
244
 
245
- with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
246
- with gr.Column():
247
- with gr.Row():
248
- gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
249
-
250
- with gr.Column():
251
- with gr.Accordion(
252
- f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
253
- open=False,
254
- ):
255
- with gr.Row():
256
- finished_eval_table = gr.components.Dataframe(
257
- value=finished_eval_queue_df,
258
- headers=EVAL_COLS,
259
- datatype=EVAL_TYPES,
260
- row_count=5,
261
- )
262
- with gr.Accordion(
263
- f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
264
- open=False,
265
- ):
266
- with gr.Row():
267
- running_eval_table = gr.components.Dataframe(
268
- value=running_eval_queue_df,
269
- headers=EVAL_COLS,
270
- datatype=EVAL_TYPES,
271
- row_count=5,
272
- )
273
-
274
- with gr.Accordion(
275
- f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
276
- open=False,
277
- ):
278
- with gr.Row():
279
- pending_eval_table = gr.components.Dataframe(
280
- value=pending_eval_queue_df,
281
- headers=EVAL_COLS,
282
- datatype=EVAL_TYPES,
283
- row_count=5,
284
- )
285
- with gr.Row():
286
- gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
287
-
288
- with gr.Row():
289
- with gr.Column():
290
- model_name_textbox = gr.Textbox(label="Model name")
291
- revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
292
- model_type = gr.Dropdown(
293
- choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
294
- label="Model type",
295
- multiselect=False,
296
- value=None,
297
- interactive=True,
298
- )
299
-
300
- with gr.Column():
301
- precision = gr.Dropdown(
302
- choices=[i.value.name for i in Precision if i != Precision.Unknown],
303
- label="Precision",
304
- multiselect=False,
305
- value="float16",
306
- interactive=True,
307
- )
308
- weight_type = gr.Dropdown(
309
- choices=[i.value.name for i in WeightType],
310
- label="Weights type",
311
- multiselect=False,
312
- value="Original",
313
- interactive=True,
314
- )
315
- base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
316
-
317
- submit_button = gr.Button("Submit Eval")
318
- submission_result = gr.Markdown()
319
- submit_button.click(
320
- add_new_eval,
321
- [
322
- model_name_textbox,
323
- base_model_name_textbox,
324
- revision_name_textbox,
325
- precision,
326
- weight_type,
327
- model_type,
328
- ],
329
- submission_result,
330
- )
331
-
332
  with gr.Row():
333
  with gr.Accordion("📙 Citation", open=False):
334
  citation_button = gr.Textbox(
@@ -342,4 +254,4 @@ with demo:
342
  scheduler = BackgroundScheduler()
343
  scheduler.add_job(restart_space, "interval", seconds=1800)
344
  scheduler.start()
345
- demo.queue(default_concurrency_limit=40).launch()
 
7
  from src.about import (
8
  CITATION_BUTTON_LABEL,
9
  CITATION_BUTTON_TEXT,
 
10
  INTRODUCTION_TEXT,
11
  LLM_BENCHMARKS_TEXT,
12
  TITLE,
 
16
  BENCHMARK_COLS,
17
  COLS,
18
  EVAL_COLS,
 
19
  NUMERIC_INTERVALS,
20
  TYPES,
21
  AutoEvalColumn,
22
  ModelType,
23
  fields,
 
24
  Precision
25
  )
26
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
27
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
 
28
 
29
 
30
  def restart_space():
31
  API.restart_space(repo_id=REPO_ID)
32
 
33
+
34
  try:
35
  print(EVAL_REQUESTS_PATH)
36
  snapshot_download(
37
+ repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30,
38
+ token=TOKEN
39
  )
40
  except Exception:
41
  restart_space()
42
  try:
43
  print(EVAL_RESULTS_PATH)
44
  snapshot_download(
45
+ repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30,
46
+ token=TOKEN
47
  )
48
  except Exception:
49
  restart_space()
50
 
 
51
  raw_data, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
52
  leaderboard_df = original_df.copy()
53
 
 
60
 
61
  # Searching and filtering
62
  def update_table(
63
+ hidden_df: pd.DataFrame,
64
+ columns: list,
65
+ type_query: list,
66
+ precision_query: str,
67
+ size_query: list,
68
+ show_deleted: bool,
69
+ query: str,
70
  ):
71
  filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
72
  filtered_df = filter_queries(query, filtered_df)
 
85
  ]
86
  # We use COLS to maintain sorting
87
  filtered_df = df[
88
+ always_here_cols + [c for c in COLS if c in df.columns and c in columns]
89
+ ]
90
  return filtered_df
91
 
92
 
 
110
 
111
 
112
  def filter_models(
113
+ df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool
114
  ) -> pd.DataFrame:
115
  # Show all models
116
  if show_deleted:
 
166
  value=False, label="Show gated/private/deleted models", interactive=True
167
  )
168
  with gr.Column(min_width=320):
169
+ # with gr.Box(elem_id="box-filter"):
170
  filter_columns_type = gr.CheckboxGroup(
171
  label="Model types",
172
  choices=[t.to_str() for t in ModelType],
 
193
  value=leaderboard_df[
194
  [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
195
  + shown_columns.value
196
+ ],
197
  headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
198
  datatype=TYPES,
199
  elem_id="leaderboard-table",
 
221
  ],
222
  leaderboard_table,
223
  )
224
+ for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size,
225
+ deleted_models_visibility]:
226
  selector.change(
227
  update_table,
228
  [
 
241
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
242
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  with gr.Row():
245
  with gr.Accordion("📙 Citation", open=False):
246
  citation_button = gr.Textbox(
 
254
  scheduler = BackgroundScheduler()
255
  scheduler.add_job(restart_space, "interval", seconds=1800)
256
  scheduler.start()
257
+ demo.queue(default_concurrency_limit=40).launch()
src/display/utils.py CHANGED
@@ -5,6 +5,7 @@ import pandas as pd
5
 
6
  from src.about import Tasks
7
 
 
8
  def fields(raw_class):
9
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
10
 
@@ -20,13 +21,13 @@ class ColumnContent:
20
  hidden: bool = False
21
  never_hidden: bool = False
22
 
 
23
  ## Leaderboard columns
24
- auto_eval_column_dict = []
 
 
25
  # Init
26
- auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
27
- auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
28
- #Scores
29
- auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
30
  for task in Tasks:
31
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
32
  # Model information
@@ -43,6 +44,7 @@ auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sh
43
  # We use make dataclass to dynamically fill the scores from Tasks
44
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
45
 
 
46
  ## For the queue columns in the submission tab
47
  @dataclass(frozen=True)
48
  class EvalQueueColumn: # Queue column
@@ -53,12 +55,13 @@ class EvalQueueColumn: # Queue column
53
  weight_type = ColumnContent("weight_type", "str", "Original")
54
  status = ColumnContent("status", "str", True)
55
 
 
56
  ## All the model information that we might need
57
  @dataclass
58
  class ModelDetails:
59
  name: str
60
  display_name: str = ""
61
- symbol: str = "" # emoji
62
 
63
 
64
  class ModelType(Enum):
@@ -83,18 +86,20 @@ class ModelType(Enum):
83
  return ModelType.IFT
84
  return ModelType.Unknown
85
 
 
86
  class WeightType(Enum):
87
  Adapter = ModelDetails("Adapter")
88
  Original = ModelDetails("Original")
89
  Delta = ModelDetails("Delta")
90
 
 
91
  class Precision(Enum):
92
  float16 = ModelDetails("float16")
93
  bfloat16 = ModelDetails("bfloat16")
94
  float32 = ModelDetails("float32")
95
- #qt_8bit = ModelDetails("8bit")
96
- #qt_4bit = ModelDetails("4bit")
97
- #qt_GPTQ = ModelDetails("GPTQ")
98
  Unknown = ModelDetails("?")
99
 
100
  def from_str(precision):
@@ -104,14 +109,15 @@ class Precision(Enum):
104
  return Precision.bfloat16
105
  if precision in ["float32"]:
106
  return Precision.float32
107
- #if precision in ["8bit"]:
108
  # return Precision.qt_8bit
109
- #if precision in ["4bit"]:
110
  # return Precision.qt_4bit
111
- #if precision in ["GPTQ", "None"]:
112
  # return Precision.qt_GPTQ
113
  return Precision.Unknown
114
 
 
115
  # Column selection
116
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
117
  TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
 
5
 
6
  from src.about import Tasks
7
 
8
+
9
  def fields(raw_class):
10
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
11
 
 
21
  hidden: bool = False
22
  never_hidden: bool = False
23
 
24
+
25
  ## Leaderboard columns
26
+ auto_eval_column_dict = [["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)],
27
+ ["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)],
28
+ ["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)]]
29
  # Init
30
+ # Scores
 
 
 
31
  for task in Tasks:
32
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
33
  # Model information
 
44
  # We use make dataclass to dynamically fill the scores from Tasks
45
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
46
 
47
+
48
  ## For the queue columns in the submission tab
49
  @dataclass(frozen=True)
50
  class EvalQueueColumn: # Queue column
 
55
  weight_type = ColumnContent("weight_type", "str", "Original")
56
  status = ColumnContent("status", "str", True)
57
 
58
+
59
  ## All the model information that we might need
60
  @dataclass
61
  class ModelDetails:
62
  name: str
63
  display_name: str = ""
64
+ symbol: str = "" # emoji
65
 
66
 
67
  class ModelType(Enum):
 
86
  return ModelType.IFT
87
  return ModelType.Unknown
88
 
89
+
90
  class WeightType(Enum):
91
  Adapter = ModelDetails("Adapter")
92
  Original = ModelDetails("Original")
93
  Delta = ModelDetails("Delta")
94
 
95
+
96
  class Precision(Enum):
97
  float16 = ModelDetails("float16")
98
  bfloat16 = ModelDetails("bfloat16")
99
  float32 = ModelDetails("float32")
100
+ # qt_8bit = ModelDetails("8bit")
101
+ # qt_4bit = ModelDetails("4bit")
102
+ # qt_GPTQ = ModelDetails("GPTQ")
103
  Unknown = ModelDetails("?")
104
 
105
  def from_str(precision):
 
109
  return Precision.bfloat16
110
  if precision in ["float32"]:
111
  return Precision.float32
112
+ # if precision in ["8bit"]:
113
  # return Precision.qt_8bit
114
+ # if precision in ["4bit"]:
115
  # return Precision.qt_4bit
116
+ # if precision in ["GPTQ", "None"]:
117
  # return Precision.qt_GPTQ
118
  return Precision.Unknown
119
 
120
+
121
  # Column selection
122
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
123
  TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]