Jellyfish042 commited on
Commit
6e57415
β€’
1 Parent(s): 8171dbf

update 14B

Browse files
app.py CHANGED
@@ -17,6 +17,7 @@ load_dotenv()
17
  webhook_url = os.environ.get("WEBHOOK_URL")
18
 
19
  file_name_list = [
 
20
  '9b',
21
  '7b',
22
  '3b',
@@ -36,6 +37,7 @@ metric_list = [
36
  ]
37
 
38
  model_size_list = [
 
39
  '~9B',
40
  '~7B',
41
  '~3B',
@@ -49,25 +51,13 @@ metric_to_sheet = {
49
  }
50
 
51
  model_size_to_file_name = {
 
52
  '~9B': '9b',
53
  '~7B': '7b',
54
  '~3B': '3b',
55
  '~1.5B': '1b5',
56
  }
57
 
58
- css = """
59
- .gr-dataframe table {
60
- table-layout: fixed;
61
- width: 100%; /* Ensures the table fills its container */
62
- }
63
- .gr-dataframe th, .gr-dataframe td {
64
- width: 100px; /* Set the exact width of each cell */
65
- overflow: hidden; /* Ensures the content doesn't overflow */
66
- text-overflow: ellipsis; /* Adds an ellipsis (...) if the text overflows */
67
- white-space: nowrap; /* Keeps the content on a single line */
68
- }
69
- """
70
-
71
  about_md = """
72
  # Uncheatable Eval
73
 
@@ -167,61 +157,49 @@ def update_table(period: str,
167
  if 'Average (The lower the better)' in combined_data.columns:
168
  relevant_columns = [col for col in visible_columns if
169
  col not in ['Name', 'Parameters Count (B)', 'Average (The lower the better)']]
170
- combined_data['Average (The lower the better)'] = round(combined_data[relevant_columns].mean(axis=1), 3)
171
-
172
- sorted_data = combined_data.sort_values(by=sort_by, ascending=ascending)
173
- sorted_data = sorted_data.rename(columns={'Average (The lower the better)': 'Average (lower=better)'})
174
- visible_columns = ['Name', 'Parameters Count (B)', 'Average (lower=better)'] + visible_columns
175
- filtered_data = sorted_data[visible_columns]
176
-
177
- filtered_data.columns = [col.replace('_', ' ') for col in filtered_data.columns]
178
-
179
- formatter = {col: "{:.3f}" for col in filtered_data.columns if
180
- filtered_data[col].dtype in ['float64', 'float32']}
181
-
182
- def color_column(s):
183
- return ['background-color: #fffdd0' if pd.notna(x) else 'default' for x in s]
184
-
185
- # color gradient
186
- colors = ["#63be7b", "#ffffff", "#f8696b"]
187
- cmap = LinearSegmentedColormap.from_list("custom_cmap", colors)
188
- target_color_columns = []
189
- if 'Average' in color_columns:
190
- target_color_columns.append('Average (lower=better)')
191
- if 'Individual Tests' in color_columns:
192
- target_color_columns.extend([col for col in filtered_data.columns if
193
- col not in ['Name', 'Parameters Count (B)', 'Average (lower=better)']])
194
-
195
- # styler = filtered_data.style.format(formatter).background_gradient(
196
- # cmap=cmap,
197
- # subset=target_color_columns,
198
- # vmin=min_value,
199
- # vmax=max_value
200
- # ).apply(color_column, subset=['Parameters Count (B)'])
201
-
202
- # for better visualization
203
- vmin = {}
204
- vmax = {}
205
- for column in filtered_data.columns:
206
- if column in ['Name', 'Parameters Count (B)']:
207
- continue
208
- col_values = filtered_data[column]
209
- second_largest = col_values.nlargest(2).iloc[-1]
210
- vmin[column] = col_values.min()
211
- vmax[column] = second_largest
212
-
213
- target_color_columns = []
214
- if 'Average' in color_columns:
215
- target_color_columns.append('Average (lower=better)')
216
- if 'Individual Tests' in color_columns:
217
- target_color_columns.extend([col for col in filtered_data.columns if
218
- col not in ['Name', 'Parameters Count (B)', 'Average (lower=better)']])
219
-
220
- styler = filtered_data.style.format(formatter).map(color_cell, subset=['Parameters Count (B)'])
221
- for column in target_color_columns:
222
- styler = styler.background_gradient(cmap=cmap, subset=[column], vmin=vmin[column], vmax=vmax[column])
223
-
224
- return styler
225
  else:
226
  return pd.DataFrame()
227
 
@@ -334,7 +312,8 @@ def create_scaling_plot(all_data, period):
334
  y_tick_text = [f"{val:.1f}" for val in y_tick_vals]
335
 
336
  fig.update_xaxes(tickvals=np.log(x_tick_vals), ticktext=x_tick_text, title='Params(B)')
337
- fig.update_yaxes(tickvals=np.log(y_tick_vals), ticktext=y_tick_text, title='Compression Rate (%)', autorange='reversed')
 
338
 
339
  fig.update_layout(
340
  xaxis=dict(showgrid=True, zeroline=False),
@@ -346,20 +325,26 @@ def create_scaling_plot(all_data, period):
346
  return fig
347
 
348
 
349
- all_data = {}
350
- time_list = []
351
- for folder in get_folders_matching_format('data'):
352
- folder_name = os.path.basename(folder)
353
- time_list.append(folder_name)
354
- if all_data.get(folder) is None:
355
- all_data[folder_name] = {}
356
- for file_name in file_name_list:
357
- if all_data.get(file_name) is None:
358
- all_data[folder_name][file_name] = {}
359
- for sheet_name in sheet_name_list:
360
- final_file_name = os.path.join(folder, file_name)
361
- all_data[folder_name][file_name][sheet_name] = rename_columns(
362
- pd.read_excel(final_file_name + '.xlsx', sheet_name=sheet_name))
 
 
 
 
 
 
363
 
364
  initial_fig = create_scaling_plot(all_data, time_list[-1])
365
 
@@ -385,10 +370,12 @@ css = '''
385
 
386
  '''
387
 
 
 
 
388
  with gr.Blocks(css=css) as demo:
389
- gr.HTML('<h1 style="text-align:center"><span style="font-size:1.3em">πŸ† LLM Compression Leaderboard</span></h1>')
390
- gr.HTML(
391
- "<h1 style='text-align:center'><span style='font-size:0.8em'>Welcome to Uncheatable Eval LLM Compression Leaderboard, where fancy fine-tuning and cheating won’t work 🚫; only compute πŸ’», data πŸ“Š, and real innovation πŸ”₯ can prevail!</span></h1>")
392
  with gr.Tabs() as tabs:
393
  with gr.Tab("πŸ† Leaderboard"):
394
  with gr.Row():
@@ -430,6 +417,7 @@ with gr.Blocks(css=css) as demo:
430
  with gr.Tab("πŸ“ˆ Scaling Law"):
431
  period_selector_2 = gr.Dropdown(label="Period", choices=time_list, value=time_list[0])
432
 
 
433
  def update_plot(period):
434
  new_fig = create_scaling_plot(all_data, period)
435
  return new_fig
 
17
  webhook_url = os.environ.get("WEBHOOK_URL")
18
 
19
  file_name_list = [
20
+ '14b',
21
  '9b',
22
  '7b',
23
  '3b',
 
37
  ]
38
 
39
  model_size_list = [
40
+ '~14B',
41
  '~9B',
42
  '~7B',
43
  '~3B',
 
51
  }
52
 
53
  model_size_to_file_name = {
54
+ '~14B': '14b',
55
  '~9B': '9b',
56
  '~7B': '7b',
57
  '~3B': '3b',
58
  '~1.5B': '1b5',
59
  }
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  about_md = """
62
  # Uncheatable Eval
63
 
 
157
  if 'Average (The lower the better)' in combined_data.columns:
158
  relevant_columns = [col for col in visible_columns if
159
  col not in ['Name', 'Parameters Count (B)', 'Average (The lower the better)']]
160
+ if len(combined_data) > 0:
161
+ combined_data['Average (The lower the better)'] = round(combined_data[relevant_columns].mean(axis=1), 3)
162
+
163
+ if len(combined_data) > 0:
164
+ sorted_data = combined_data.sort_values(by=sort_by, ascending=ascending)
165
+ sorted_data = sorted_data.rename(columns={'Average (The lower the better)': 'Average (lower=better)'})
166
+ visible_columns = ['Name', 'Parameters Count (B)', 'Average (lower=better)'] + visible_columns
167
+ filtered_data = sorted_data[visible_columns]
168
+
169
+ filtered_data.columns = [col.replace('_', ' ') for col in filtered_data.columns]
170
+
171
+ formatter = {col: "{:.3f}" for col in filtered_data.columns if
172
+ filtered_data[col].dtype in ['float64', 'float32']}
173
+
174
+ # color gradient
175
+ colors = ["#63be7b", "#ffffff", "#f8696b"]
176
+ cmap = LinearSegmentedColormap.from_list("custom_cmap", colors)
177
+ vmin = {}
178
+ vmax = {}
179
+ for column in filtered_data.columns:
180
+ if column in ['Name', 'Parameters Count (B)']:
181
+ continue
182
+ col_values = filtered_data[column]
183
+ if len(col_values) > 1:
184
+ second_largest = col_values.nlargest(2).iloc[-1]
185
+ vmin[column] = col_values.min()
186
+ vmax[column] = second_largest
187
+
188
+ target_color_columns = []
189
+ if 'Average' in color_columns:
190
+ target_color_columns.append('Average (lower=better)')
191
+ if 'Individual Tests' in color_columns:
192
+ target_color_columns.extend([col for col in filtered_data.columns if
193
+ col not in ['Name', 'Parameters Count (B)', 'Average (lower=better)']])
194
+
195
+ styler = filtered_data.style.format(formatter)
196
+ for column in target_color_columns:
197
+ if column in vmin and column in vmax: # Ensure that the vmin and vmax dicts contain the column
198
+ styler = styler.background_gradient(cmap=cmap, subset=[column], vmin=vmin[column], vmax=vmax[column])
199
+
200
+ return styler
201
+ else:
202
+ return pd.DataFrame()
 
 
 
 
 
 
 
 
 
 
 
 
203
  else:
204
  return pd.DataFrame()
205
 
 
312
  y_tick_text = [f"{val:.1f}" for val in y_tick_vals]
313
 
314
  fig.update_xaxes(tickvals=np.log(x_tick_vals), ticktext=x_tick_text, title='Params(B)')
315
+ fig.update_yaxes(tickvals=np.log(y_tick_vals), ticktext=y_tick_text, title='Compression Rate (%)',
316
+ autorange='reversed')
317
 
318
  fig.update_layout(
319
  xaxis=dict(showgrid=True, zeroline=False),
 
325
  return fig
326
 
327
 
328
+ def read_all_data(folder_name):
329
+ all_data = {}
330
+ time_list = []
331
+ for folder in get_folders_matching_format(folder_name):
332
+ folder_name = os.path.basename(folder)
333
+ time_list.append(folder_name)
334
+ if all_data.get(folder) is None:
335
+ all_data[folder_name] = {}
336
+ for file_name in file_name_list:
337
+ if all_data.get(file_name) is None:
338
+ all_data[folder_name][file_name] = {}
339
+ for sheet_name in sheet_name_list:
340
+ final_file_name = os.path.join(folder, file_name)
341
+ all_data[folder_name][file_name][sheet_name] = rename_columns(
342
+ pd.read_excel(final_file_name + '.xlsx', sheet_name=sheet_name))
343
+
344
+ return all_data, time_list
345
+
346
+
347
+ all_data, time_list = read_all_data('data')
348
 
349
  initial_fig = create_scaling_plot(all_data, time_list[-1])
350
 
 
370
 
371
  '''
372
 
373
+ TITLE_HTML = '<h1 style="text-align:center"><span style="font-size:1.3em">πŸ† LLM Compression Leaderboard</span></h1>'
374
+ SUBTITLE_HTML = "<h1 style='text-align:center'><span style='font-size:0.8em'>Welcome to Uncheatable Eval LLM Compression Leaderboard, where fancy fine-tuning and cheating won’t work 🚫; only compute πŸ’», data πŸ“Š, and real innovation πŸ”₯ can prevail!</span></h1>"
375
+
376
  with gr.Blocks(css=css) as demo:
377
+ gr.HTML(TITLE_HTML)
378
+ gr.HTML(SUBTITLE_HTML)
 
379
  with gr.Tabs() as tabs:
380
  with gr.Tab("πŸ† Leaderboard"):
381
  with gr.Row():
 
417
  with gr.Tab("πŸ“ˆ Scaling Law"):
418
  period_selector_2 = gr.Dropdown(label="Period", choices=time_list, value=time_list[0])
419
 
420
+
421
  def update_plot(period):
422
  new_fig = create_scaling_plot(all_data, period)
423
  return new_fig
data/2024-05/14b.xlsx ADDED
Binary file (10.5 kB). View file
 
data/2024-06/14b.xlsx ADDED
Binary file (10.5 kB). View file
 
data/2024-07/14b.xlsx ADDED
Binary file (11.3 kB). View file