Jellyfish042 commited on
Commit
c89c654
Β·
1 Parent(s): ca643ea
Files changed (1) hide show
  1. app.py +177 -169
app.py CHANGED
@@ -19,45 +19,45 @@ load_dotenv()
19
  webhook_url = os.environ.get("WEBHOOK_URL")
20
 
21
  file_name_list = [
22
- '14b',
23
- '9b',
24
- '7b',
25
- '3b',
26
- '1b5',
27
  ]
28
 
29
  sheet_name_list = [
30
- 'cr',
31
- 'bpc',
32
- 'bpb',
33
  ]
34
 
35
  metric_list = [
36
- 'Compression Rate (%)',
37
- 'Bits Per Character (BPC)',
38
- 'Bits Per Byte (BPB)',
39
  ]
40
 
41
  model_size_list = [
42
- '~14B',
43
- '~9B',
44
- '~7B',
45
- '~3B',
46
- '~1.5B',
47
  ]
48
 
49
  metric_to_sheet = {
50
- 'Compression Rate (%)': 'cr',
51
- 'Bits Per Character (BPC)': 'bpc',
52
- 'Bits Per Byte (BPB)': 'bpb',
53
  }
54
 
55
  model_size_to_file_name = {
56
- '~14B': '14b',
57
- '~9B': '9b',
58
- '~7B': '7b',
59
- '~3B': '3b',
60
- '~1.5B': '1b5',
61
  }
62
 
63
  about_md = """
@@ -100,12 +100,12 @@ In fact, the model rankings obtained through Uncheatable Eval are very stable. F
100
 
101
 
102
  def rename_columns(df):
103
- df.columns = [col.rsplit('_', maxsplit=1)[0] for col in df.columns]
104
  return df
105
 
106
 
107
  def get_folders_matching_format(directory):
108
- pattern = re.compile(r'^\d{4}-\d{2}$')
109
  folders = []
110
 
111
  if not os.path.exists(directory):
@@ -131,52 +131,60 @@ def get_unique_column_names(all_data):
131
  #
132
  # return list(column_names.keys())
133
 
134
- return ['ao3_\u200benglish', 'bbc_\u200bnews', 'wikipedia_\u200benglish', 'arxiv_\u200bcomputer_\u200bscience',
135
- 'arxiv_\u200bphysics', 'github_\u200bcpp', 'github_\u200bpython', 'ao3_\u200bchinese']
 
 
 
 
 
 
 
 
136
 
137
 
138
  def color_cell(value):
139
- return 'background-color: #fffdd0' if pd.notna(value) else 'default'
140
-
141
-
142
- def update_table(period: str,
143
- models: list,
144
- metric: str,
145
- visible_columns: list,
146
- color_columns: list,
147
- size_range: list,
148
- sort_by: str = 'Average (The lower the better)',
149
- ascending: bool = True):
 
 
150
  target_data = all_data[period]
151
  target_metric = metric_to_sheet[metric]
152
 
153
  if models:
154
  target_model_size = [model_size_to_file_name[model] for model in models]
155
  combined_data = pd.concat([target_data[model][target_metric] for model in target_model_size], axis=0)
156
- combined_data['Name'] = combined_data['Name'].apply(lambda x: x.replace('.pth', ''))
157
 
158
  # Filter models based on the size range
159
- combined_data = combined_data[combined_data['Parameters Count (B)'].between(size_range[0], size_range[1])]
160
 
161
  combined_data.reset_index(drop=True, inplace=True)
162
 
163
- if 'Average (The lower the better)' in combined_data.columns:
164
- relevant_columns = [col for col in visible_columns if
165
- col not in ['Name', 'Parameters Count (B)', 'Average (The lower the better)']]
166
  if len(combined_data) > 0:
167
- combined_data['Average (The lower the better)'] = round(combined_data[relevant_columns].mean(axis=1), 3)
168
 
169
  if len(combined_data) > 0:
170
  sorted_data = combined_data.sort_values(by=sort_by, ascending=ascending)
171
- sorted_data = sorted_data.rename(columns={'Average (The lower the better)': 'Average (lower=better)'})
172
- sorted_data = sorted_data.rename(columns={'Parameters Count (B)': 'Params (B)'})
173
- visible_columns = ['Name', 'Params (B)', 'Average (lower=better)'] + visible_columns
174
  filtered_data = sorted_data[visible_columns]
175
 
176
- filtered_data.columns = [col.replace('_', ' ') for col in filtered_data.columns]
177
 
178
- formatter = {col: "{:.3f}" for col in filtered_data.columns if
179
- filtered_data[col].dtype in ['float64', 'float32']}
180
 
181
  # color gradient
182
  colors = ["#63be7b", "#ffffff", "#f8696b"]
@@ -184,7 +192,7 @@ def update_table(period: str,
184
  vmin = {}
185
  vmax = {}
186
  for column in filtered_data.columns:
187
- if column in ['Name', 'Params (B)']:
188
  continue
189
  col_values = filtered_data[column]
190
  if len(col_values) > 1:
@@ -193,14 +201,12 @@ def update_table(period: str,
193
  vmax[column] = second_largest
194
 
195
  target_color_columns = []
196
- if 'Average' in color_columns:
197
- target_color_columns.append('Average (lower=better)')
198
- if 'Individual Tests' in color_columns:
199
- target_color_columns.extend([col for col in filtered_data.columns if
200
- col not in ['Name', 'Params (B)', 'Average (lower=better)']])
201
-
202
 
203
- styler = filtered_data.style.format(formatter).applymap(color_cell, subset=['Params (B)'])
204
 
205
  for column in target_color_columns:
206
  if column in vmin and column in vmax: # Ensure that the vmin and vmax dicts contain the column
@@ -212,30 +218,35 @@ def update_table(period: str,
212
  else:
213
  return pd.DataFrame()
214
 
 
215
  def create_world_languages_gdp_chart():
216
- languages = ['English', 'Chinese', 'Spanish', 'Japanese', 'German', 'French', 'Arabic', 'Italian', 'Portuguese', 'Korean', 'Other']
217
  shares = [27, 18, 8, 6, 5, 4, 3, 2, 2, 2, 23]
218
- colors = ['#FF7F7F', '#FFA07A', '#FFDB58', '#90EE90', '#98FB98', '#87CEFA', '#B0C4DE', '#DDA0DD', '#D8BFD8', '#F0E68C', '#E0FFFF']
219
-
220
- fig = go.Figure(data=[go.Pie(
221
- labels=languages,
222
- values=shares,
223
- hole=0.3,
224
- marker=dict(colors=colors, line=dict(color='#FFFFFF', width=2)),
225
- textinfo='label+percent',
226
- textposition='outside',
227
- insidetextorientation='radial',
228
- textfont=dict(size=12),
229
- )])
 
 
 
 
230
 
231
  fig.update_layout(
232
  title={
233
- 'text': "World Languages by Share of Global GDP",
234
- 'y':0.95,
235
- 'x':0.5,
236
- 'xanchor': 'center',
237
- 'yanchor': 'top',
238
- 'font': dict(size=20, color='black')
239
  },
240
  showlegend=False,
241
  width=700,
@@ -245,6 +256,7 @@ def create_world_languages_gdp_chart():
245
 
246
  return fig
247
 
 
248
  def check_model_exists(model_id):
249
  api = HfApi()
250
  try:
@@ -260,14 +272,14 @@ def check_model_exists(model_id):
260
 
261
 
262
  def submit_model(name):
263
- if 'Exists' not in check_model_exists(name):
264
  return f"# ERROR: Model {name} does not exist on Hugging Face!"
265
 
266
  try:
267
  response = requests.post(webhook_url, json={"content": name})
268
  if response.status_code == 200:
269
  response_data = response.json()
270
- if response_data.get('status') == 'success':
271
  return "# SUCCESS: We will check the model as soon as possible. Thank you for your submission!"
272
  else:
273
  return f"# ERROR: {response_data.get('message', 'Unknown error')}"
@@ -281,54 +293,59 @@ def submit_model(name):
281
 
282
 
283
  def create_scaling_plot(all_data, period):
284
- selected_columns = ['Name', 'Parameters Count (B)', 'Average (The lower the better)']
285
  target_data = all_data[period]
286
  new_df = pd.DataFrame()
287
 
288
  for size in target_data.keys():
289
- new_df = pd.concat([new_df, target_data[size]['cr'].loc[:, selected_columns]], axis=0)
290
 
291
- new_df.rename(columns={
292
- 'Parameters Count (B)': 'Params(B)',
293
- 'Average (The lower the better)': 'Compression Rate (%)'
294
- }, inplace=True)
295
 
296
- new_df['Log Params(B)'] = np.log(new_df['Params(B)'])
297
- new_df['Log Compression Rate (%)'] = np.log(new_df['Compression Rate (%)'])
298
 
299
- fig = px.scatter(new_df,
300
- x='Log Params(B)',
301
- y='Log Compression Rate (%)',
302
- title='Compression Rate Scaling Law',
303
- hover_name='Name',
304
- custom_data=['Params(B)', 'Compression Rate (%)']
305
- )
 
306
 
307
  fig.update_traces(
308
  hovertemplate="<b>%{hovertext}</b><br>Params(B): %{customdata[0]:.2f} B<br>Compression Rate (%): %{customdata[1]:.2f}<extra></extra>"
309
  )
310
-
311
  names_to_connect_dict = {
312
- '2024-05': ['Meta-Llama-3-8B', 'stablelm-3b-4e1t', 'Qwen2-1.5B', 'TinyLlama-1.1B-intermediate-step-1431k-3T', 'Mistral-Nemo-Base-2407'],
313
- '2024-06': ['Meta-Llama-3-8B', 'stablelm-3b-4e1t', 'Qwen2-1.5B', 'TinyLlama-1.1B-intermediate-step-1431k-3T', 'Mistral-Nemo-Base-2407'],
314
- '2024-07': ['Meta-Llama-3.1-8B', 'stablelm-3b-4e1t', 'Qwen2-1.5B', 'TinyLlama-1.1B-intermediate-step-1431k-3T', 'Mistral-Nemo-Base-2407'],
315
- '2024-08': ['Meta-Llama-3.1-8B', 'Rene-v0.1-1.3b-pytorch', 'stablelm-3b-4e1t', 'Qwen2-1.5B', 'TinyLlama-1.1B-intermediate-step-1431k-3T', 'Mistral-Nemo-Base-2407'],
 
 
 
 
 
 
 
316
  }
317
 
318
- names_to_connect = names_to_connect_dict.get(period, names_to_connect_dict['2024-08'])
319
-
320
- connection_points = new_df[new_df['Name'].isin(names_to_connect)]
321
 
322
- new_df['Color'] = new_df['Name'].apply(lambda name: '#39C5BB' if name in names_to_connect else '#636efa')
323
 
324
- fig.update_traces(marker=dict(color=new_df['Color']))
325
 
326
- X = connection_points['Log Params(B)'].values.reshape(-1, 1)
327
- y = connection_points['Log Compression Rate (%)'].values
328
  model = LinearRegression().fit(X, y)
329
 
330
- x_min = connection_points['Log Params(B)'].min()
331
- x_max = connection_points['Log Params(B)'].max()
332
  extended_x = np.linspace(x_min, x_max * 1.5, 100)
333
  extended_x_original = np.exp(extended_x)
334
  trend_line_y = model.predict(extended_x.reshape(-1, 1))
@@ -337,34 +354,29 @@ def create_scaling_plot(all_data, period):
337
  trend_line = go.Scatter(
338
  x=extended_x,
339
  y=trend_line_y,
340
- mode='lines',
341
- line=dict(color='skyblue', dash='dash'),
342
- name='Trend Line',
343
- hovertemplate='<b>Params(B):</b> %{customdata[0]:.2f}<br>' +
344
- '<b>Compression Rate (%):</b> %{customdata[1]:.2f}<extra></extra>',
345
- customdata=np.stack((extended_x_original, trend_line_y_original), axis=-1)
346
  )
347
 
348
  fig.add_trace(trend_line)
349
 
350
- x_min = new_df['Params(B)'].min()
351
- x_max = new_df['Params(B)'].max()
352
  x_tick_vals = np.geomspace(x_min, x_max, num=5)
353
  x_tick_text = [f"{val:.1f}" for val in x_tick_vals]
354
 
355
- y_min = new_df['Compression Rate (%)'].min()
356
- y_max = new_df['Compression Rate (%)'].max()
357
  y_tick_vals = np.geomspace(y_min, y_max, num=5)
358
  y_tick_text = [f"{val:.1f}" for val in y_tick_vals]
359
 
360
- fig.update_xaxes(tickvals=np.log(x_tick_vals), ticktext=x_tick_text, title='Params(B)')
361
- fig.update_yaxes(tickvals=np.log(y_tick_vals), ticktext=y_tick_text, title='Compression Rate (%)',
362
- autorange='reversed')
363
 
364
- fig.update_layout(
365
- xaxis=dict(showgrid=True, zeroline=False),
366
- yaxis=dict(showgrid=True, zeroline=False)
367
- )
368
 
369
  fig.update_traces(marker=dict(size=12))
370
 
@@ -384,8 +396,7 @@ def read_all_data(folder_name):
384
  all_data[folder_name][file_name] = {}
385
  for sheet_name in sheet_name_list:
386
  final_file_name = os.path.join(folder, file_name)
387
- all_data[folder_name][file_name][sheet_name] = rename_columns(
388
- pd.read_excel(final_file_name + '.xlsx', sheet_name=sheet_name))
389
 
390
  return all_data, time_list
391
 
@@ -404,7 +415,7 @@ def read_all_data(folder_name):
404
  # return mutilange_data, time_list
405
 
406
 
407
- all_data, time_list = read_all_data('data')
408
  # muti_lang_data, muti_lang_time_list = read_mutilange_data()
409
 
410
  time_list.sort()
@@ -415,11 +426,13 @@ initial_period = last_period
415
  initial_models = model_size_list
416
  initial_metric = metric_list[0]
417
  initial_columns = get_unique_column_names(all_data)
418
- initial_colors = ['Average']
 
 
419
  initial_size_range = [0, 15]
420
  initial_data = update_table(initial_period, initial_models, initial_metric, initial_columns, initial_colors, initial_size_range)
421
 
422
- css = '''
423
  .gradio-container {
424
  max-width: 95% !important;
425
  }
@@ -431,7 +444,7 @@ css = '''
431
  word-break: break-word;
432
  }
433
 
434
- '''
435
 
436
  TITLE_HTML = '<h1 style="text-align:center"><span style="font-size:1.3em">πŸ† LLM Compression Leaderboard</span></h1>'
437
  SUBTITLE_HTML = "<h1 style='text-align:center'><span style='font-size:0.8em'>Welcome to Uncheatable Eval LLM Compression Leaderboard, where fancy fine-tuning and cheating won’t work 🚫; only compute πŸ’», data πŸ“Š, and real innovation πŸ”₯ can prevail!</span></h1>"
@@ -448,37 +461,36 @@ with gr.Blocks(css=css) as demo:
448
  size_range_slider = RangeSlider(minimum=0, maximum=15, value=[0, 15], step=0.1, label="Model Size Range")
449
  metric_selector = gr.Dropdown(label="Metric", choices=metric_list, value=metric_list[0])
450
  with gr.Column():
451
- color_selector = gr.CheckboxGroup(label="Colored Columns",
452
- choices=['Average', 'Individual Tests'],
453
- value=['Average'])
454
- colfilter = gr.CheckboxGroup(label="Data Source",
455
- choices=get_unique_column_names(all_data),
456
- value=get_unique_column_names(all_data))
457
-
458
- table = gr.Dataframe(initial_data,
459
- column_widths=[130, 50, 50, 35, 35, 35, 35, 35, 35, 35, 35],
460
- wrap=True,
461
- height=800,
462
- )
463
-
464
- period_selector.change(update_table,
465
- inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider],
466
- outputs=table)
467
- model_selector.change(update_table,
468
- inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider],
469
- outputs=table)
470
- metric_selector.change(update_table,
471
- inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider],
472
- outputs=table)
473
- colfilter.change(update_table,
474
- inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider],
475
- outputs=table)
476
- color_selector.change(update_table,
477
- inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider],
478
- outputs=table)
479
- size_range_slider.change(update_table,
480
- inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider],
481
- outputs=table)
482
 
483
  with gr.Tab("🌍 MultiLang"):
484
  gr.Markdown("## Coming soon...")
@@ -499,13 +511,9 @@ with gr.Blocks(css=css) as demo:
499
  with gr.Tab("πŸš€ Submit"):
500
  with gr.Group():
501
  with gr.Row():
502
- model_name = gr.Textbox(max_lines=1,
503
- placeholder="Enter model name...",
504
- show_label=False,
505
- scale=4)
506
  submit = gr.Button("Submit", variant="primary", scale=0)
507
- output = gr.Markdown(
508
- "# Enter a public HF repo id, then hit Submit to add it to the evaluation queue.")
509
 
510
  submit.click(fn=submit_model, inputs=model_name, outputs=output)
511
 
 
19
  webhook_url = os.environ.get("WEBHOOK_URL")
20
 
21
  file_name_list = [
22
+ "14b",
23
+ "9b",
24
+ "7b",
25
+ "3b",
26
+ "1b5",
27
  ]
28
 
29
  sheet_name_list = [
30
+ "cr",
31
+ "bpc",
32
+ "bpb",
33
  ]
34
 
35
  metric_list = [
36
+ "Compression Rate (%)",
37
+ "Bits Per Character (BPC)",
38
+ "Bits Per Byte (BPB)",
39
  ]
40
 
41
  model_size_list = [
42
+ "~14B",
43
+ "~9B",
44
+ "~7B",
45
+ "~3B",
46
+ "~1.5B",
47
  ]
48
 
49
  metric_to_sheet = {
50
+ "Compression Rate (%)": "cr",
51
+ "Bits Per Character (BPC)": "bpc",
52
+ "Bits Per Byte (BPB)": "bpb",
53
  }
54
 
55
  model_size_to_file_name = {
56
+ "~14B": "14b",
57
+ "~9B": "9b",
58
+ "~7B": "7b",
59
+ "~3B": "3b",
60
+ "~1.5B": "1b5",
61
  }
62
 
63
  about_md = """
 
100
 
101
 
102
  def rename_columns(df):
103
+ df.columns = [col.rsplit("_", maxsplit=1)[0] for col in df.columns]
104
  return df
105
 
106
 
107
  def get_folders_matching_format(directory):
108
+ pattern = re.compile(r"^\d{4}-\d{2}$")
109
  folders = []
110
 
111
  if not os.path.exists(directory):
 
131
  #
132
  # return list(column_names.keys())
133
 
134
+ return [
135
+ "ao3_\u200benglish",
136
+ "bbc_\u200bnews",
137
+ "wikipedia_\u200benglish",
138
+ "arxiv_\u200bcomputer_\u200bscience",
139
+ "arxiv_\u200bphysics",
140
+ "github_\u200bcpp",
141
+ "github_\u200bpython",
142
+ "ao3_\u200bchinese",
143
+ ]
144
 
145
 
146
  def color_cell(value):
147
+ return "background-color: #fffdd0" if pd.notna(value) else "default"
148
+
149
+
150
+ def update_table(
151
+ period: str,
152
+ models: list,
153
+ metric: str,
154
+ visible_columns: list,
155
+ color_columns: list,
156
+ size_range: list,
157
+ sort_by: str = "Average (The lower the better)",
158
+ ascending: bool = True,
159
+ ):
160
  target_data = all_data[period]
161
  target_metric = metric_to_sheet[metric]
162
 
163
  if models:
164
  target_model_size = [model_size_to_file_name[model] for model in models]
165
  combined_data = pd.concat([target_data[model][target_metric] for model in target_model_size], axis=0)
166
+ combined_data["Name"] = combined_data["Name"].apply(lambda x: x.replace(".pth", ""))
167
 
168
  # Filter models based on the size range
169
+ combined_data = combined_data[combined_data["Parameters Count (B)"].between(size_range[0], size_range[1])]
170
 
171
  combined_data.reset_index(drop=True, inplace=True)
172
 
173
+ if "Average (The lower the better)" in combined_data.columns:
174
+ relevant_columns = [col for col in visible_columns if col not in ["Name", "Parameters Count (B)", "Average (The lower the better)"]]
 
175
  if len(combined_data) > 0:
176
+ combined_data["Average (The lower the better)"] = round(combined_data[relevant_columns].mean(axis=1), 3)
177
 
178
  if len(combined_data) > 0:
179
  sorted_data = combined_data.sort_values(by=sort_by, ascending=ascending)
180
+ sorted_data = sorted_data.rename(columns={"Average (The lower the better)": "Average (lower=better)"})
181
+ sorted_data = sorted_data.rename(columns={"Parameters Count (B)": "Params (B)"})
182
+ visible_columns = ["Name", "Params (B)", "Average (lower=better)"] + visible_columns
183
  filtered_data = sorted_data[visible_columns]
184
 
185
+ filtered_data.columns = [col.replace("_", " ") for col in filtered_data.columns]
186
 
187
+ formatter = {col: "{:.3f}" for col in filtered_data.columns if filtered_data[col].dtype in ["float64", "float32"]}
 
188
 
189
  # color gradient
190
  colors = ["#63be7b", "#ffffff", "#f8696b"]
 
192
  vmin = {}
193
  vmax = {}
194
  for column in filtered_data.columns:
195
+ if column in ["Name", "Params (B)"]:
196
  continue
197
  col_values = filtered_data[column]
198
  if len(col_values) > 1:
 
201
  vmax[column] = second_largest
202
 
203
  target_color_columns = []
204
+ if "Average" in color_columns:
205
+ target_color_columns.append("Average (lower=better)")
206
+ if "Individual Tests" in color_columns:
207
+ target_color_columns.extend([col for col in filtered_data.columns if col not in ["Name", "Params (B)", "Average (lower=better)"]])
 
 
208
 
209
+ styler = filtered_data.style.format(formatter).applymap(color_cell, subset=["Params (B)"])
210
 
211
  for column in target_color_columns:
212
  if column in vmin and column in vmax: # Ensure that the vmin and vmax dicts contain the column
 
218
  else:
219
  return pd.DataFrame()
220
 
221
+
222
  def create_world_languages_gdp_chart():
223
+ languages = ["English", "Chinese", "Spanish", "Japanese", "German", "French", "Arabic", "Italian", "Portuguese", "Korean", "Other"]
224
  shares = [27, 18, 8, 6, 5, 4, 3, 2, 2, 2, 23]
225
+ colors = ["#FF7F7F", "#FFA07A", "#FFDB58", "#90EE90", "#98FB98", "#87CEFA", "#B0C4DE", "#DDA0DD", "#D8BFD8", "#F0E68C", "#E0FFFF"]
226
+
227
+ fig = go.Figure(
228
+ data=[
229
+ go.Pie(
230
+ labels=languages,
231
+ values=shares,
232
+ hole=0.3,
233
+ marker=dict(colors=colors, line=dict(color="#FFFFFF", width=2)),
234
+ textinfo="label+percent",
235
+ textposition="outside",
236
+ insidetextorientation="radial",
237
+ textfont=dict(size=12),
238
+ )
239
+ ]
240
+ )
241
 
242
  fig.update_layout(
243
  title={
244
+ "text": "World Languages by Share of Global GDP",
245
+ "y": 0.95,
246
+ "x": 0.5,
247
+ "xanchor": "center",
248
+ "yanchor": "top",
249
+ "font": dict(size=20, color="black"),
250
  },
251
  showlegend=False,
252
  width=700,
 
256
 
257
  return fig
258
 
259
+
260
  def check_model_exists(model_id):
261
  api = HfApi()
262
  try:
 
272
 
273
 
274
  def submit_model(name):
275
+ if "Exists" not in check_model_exists(name):
276
  return f"# ERROR: Model {name} does not exist on Hugging Face!"
277
 
278
  try:
279
  response = requests.post(webhook_url, json={"content": name})
280
  if response.status_code == 200:
281
  response_data = response.json()
282
+ if response_data.get("status") == "success":
283
  return "# SUCCESS: We will check the model as soon as possible. Thank you for your submission!"
284
  else:
285
  return f"# ERROR: {response_data.get('message', 'Unknown error')}"
 
293
 
294
 
295
  def create_scaling_plot(all_data, period):
296
+ selected_columns = ["Name", "Parameters Count (B)", "Average (The lower the better)"]
297
  target_data = all_data[period]
298
  new_df = pd.DataFrame()
299
 
300
  for size in target_data.keys():
301
+ new_df = pd.concat([new_df, target_data[size]["cr"].loc[:, selected_columns]], axis=0)
302
 
303
+ new_df.rename(columns={"Parameters Count (B)": "Params(B)", "Average (The lower the better)": "Compression Rate (%)"}, inplace=True)
 
 
 
304
 
305
+ new_df["Log Params(B)"] = np.log(new_df["Params(B)"])
306
+ new_df["Log Compression Rate (%)"] = np.log(new_df["Compression Rate (%)"])
307
 
308
+ fig = px.scatter(
309
+ new_df,
310
+ x="Log Params(B)",
311
+ y="Log Compression Rate (%)",
312
+ title="Compression Rate Scaling Law",
313
+ hover_name="Name",
314
+ custom_data=["Params(B)", "Compression Rate (%)"],
315
+ )
316
 
317
  fig.update_traces(
318
  hovertemplate="<b>%{hovertext}</b><br>Params(B): %{customdata[0]:.2f} B<br>Compression Rate (%): %{customdata[1]:.2f}<extra></extra>"
319
  )
320
+
321
  names_to_connect_dict = {
322
+ "2024-05": ["Meta-Llama-3-8B", "stablelm-3b-4e1t", "Qwen2-1.5B", "TinyLlama-1.1B-intermediate-step-1431k-3T", "Mistral-Nemo-Base-2407"],
323
+ "2024-06": ["Meta-Llama-3-8B", "stablelm-3b-4e1t", "Qwen2-1.5B", "TinyLlama-1.1B-intermediate-step-1431k-3T", "Mistral-Nemo-Base-2407"],
324
+ "2024-07": ["Meta-Llama-3.1-8B", "stablelm-3b-4e1t", "Qwen2-1.5B", "TinyLlama-1.1B-intermediate-step-1431k-3T", "Mistral-Nemo-Base-2407"],
325
+ "2024-08": [
326
+ "Meta-Llama-3.1-8B",
327
+ "Rene-v0.1-1.3b-pytorch",
328
+ "stablelm-3b-4e1t",
329
+ "Qwen2-1.5B",
330
+ "TinyLlama-1.1B-intermediate-step-1431k-3T",
331
+ "Mistral-Nemo-Base-2407",
332
+ ],
333
  }
334
 
335
+ names_to_connect = names_to_connect_dict.get(period, names_to_connect_dict["2024-08"])
336
+
337
+ connection_points = new_df[new_df["Name"].isin(names_to_connect)]
338
 
339
+ new_df["Color"] = new_df["Name"].apply(lambda name: "#39C5BB" if name in names_to_connect else "#636efa")
340
 
341
+ fig.update_traces(marker=dict(color=new_df["Color"]))
342
 
343
+ X = connection_points["Log Params(B)"].values.reshape(-1, 1)
344
+ y = connection_points["Log Compression Rate (%)"].values
345
  model = LinearRegression().fit(X, y)
346
 
347
+ x_min = connection_points["Log Params(B)"].min()
348
+ x_max = connection_points["Log Params(B)"].max()
349
  extended_x = np.linspace(x_min, x_max * 1.5, 100)
350
  extended_x_original = np.exp(extended_x)
351
  trend_line_y = model.predict(extended_x.reshape(-1, 1))
 
354
  trend_line = go.Scatter(
355
  x=extended_x,
356
  y=trend_line_y,
357
+ mode="lines",
358
+ line=dict(color="skyblue", dash="dash"),
359
+ name="Trend Line",
360
+ hovertemplate="<b>Params(B):</b> %{customdata[0]:.2f}<br>" + "<b>Compression Rate (%):</b> %{customdata[1]:.2f}<extra></extra>",
361
+ customdata=np.stack((extended_x_original, trend_line_y_original), axis=-1),
 
362
  )
363
 
364
  fig.add_trace(trend_line)
365
 
366
+ x_min = new_df["Params(B)"].min()
367
+ x_max = new_df["Params(B)"].max()
368
  x_tick_vals = np.geomspace(x_min, x_max, num=5)
369
  x_tick_text = [f"{val:.1f}" for val in x_tick_vals]
370
 
371
+ y_min = new_df["Compression Rate (%)"].min()
372
+ y_max = new_df["Compression Rate (%)"].max()
373
  y_tick_vals = np.geomspace(y_min, y_max, num=5)
374
  y_tick_text = [f"{val:.1f}" for val in y_tick_vals]
375
 
376
+ fig.update_xaxes(tickvals=np.log(x_tick_vals), ticktext=x_tick_text, title="Params(B)")
377
+ fig.update_yaxes(tickvals=np.log(y_tick_vals), ticktext=y_tick_text, title="Compression Rate (%)", autorange="reversed")
 
378
 
379
+ fig.update_layout(xaxis=dict(showgrid=True, zeroline=False), yaxis=dict(showgrid=True, zeroline=False))
 
 
 
380
 
381
  fig.update_traces(marker=dict(size=12))
382
 
 
396
  all_data[folder_name][file_name] = {}
397
  for sheet_name in sheet_name_list:
398
  final_file_name = os.path.join(folder, file_name)
399
+ all_data[folder_name][file_name][sheet_name] = rename_columns(pd.read_excel(final_file_name + ".xlsx", sheet_name=sheet_name))
 
400
 
401
  return all_data, time_list
402
 
 
415
  # return mutilange_data, time_list
416
 
417
 
418
+ all_data, time_list = read_all_data("data")
419
  # muti_lang_data, muti_lang_time_list = read_mutilange_data()
420
 
421
  time_list.sort()
 
426
  initial_models = model_size_list
427
  initial_metric = metric_list[0]
428
  initial_columns = get_unique_column_names(all_data)
429
+ initial_columns = initial_columns[:-1]
430
+ # initial_colors = ["Average"]
431
+ initial_colors = ["Average", "Individual Tests"]
432
  initial_size_range = [0, 15]
433
  initial_data = update_table(initial_period, initial_models, initial_metric, initial_columns, initial_colors, initial_size_range)
434
 
435
+ css = """
436
  .gradio-container {
437
  max-width: 95% !important;
438
  }
 
444
  word-break: break-word;
445
  }
446
 
447
+ """
448
 
449
  TITLE_HTML = '<h1 style="text-align:center"><span style="font-size:1.3em">πŸ† LLM Compression Leaderboard</span></h1>'
450
  SUBTITLE_HTML = "<h1 style='text-align:center'><span style='font-size:0.8em'>Welcome to Uncheatable Eval LLM Compression Leaderboard, where fancy fine-tuning and cheating won’t work 🚫; only compute πŸ’», data πŸ“Š, and real innovation πŸ”₯ can prevail!</span></h1>"
 
461
  size_range_slider = RangeSlider(minimum=0, maximum=15, value=[0, 15], step=0.1, label="Model Size Range")
462
  metric_selector = gr.Dropdown(label="Metric", choices=metric_list, value=metric_list[0])
463
  with gr.Column():
464
+ color_selector = gr.CheckboxGroup(label="Colored Columns", choices=["Average", "Individual Tests"], value=["Average"])
465
+ colfilter = gr.CheckboxGroup(
466
+ label="Data Source", choices=get_unique_column_names(all_data), value=get_unique_column_names(all_data)
467
+ )
468
+
469
+ table = gr.Dataframe(
470
+ initial_data,
471
+ column_widths=[130, 50, 50, 35, 35, 35, 35, 35, 35, 35, 35],
472
+ wrap=True,
473
+ height=800,
474
+ )
475
+
476
+ period_selector.change(
477
+ update_table, inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider], outputs=table
478
+ )
479
+ model_selector.change(
480
+ update_table, inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider], outputs=table
481
+ )
482
+ metric_selector.change(
483
+ update_table, inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider], outputs=table
484
+ )
485
+ colfilter.change(
486
+ update_table, inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider], outputs=table
487
+ )
488
+ color_selector.change(
489
+ update_table, inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider], outputs=table
490
+ )
491
+ size_range_slider.change(
492
+ update_table, inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider], outputs=table
493
+ )
 
494
 
495
  with gr.Tab("🌍 MultiLang"):
496
  gr.Markdown("## Coming soon...")
 
511
  with gr.Tab("πŸš€ Submit"):
512
  with gr.Group():
513
  with gr.Row():
514
+ model_name = gr.Textbox(max_lines=1, placeholder="Enter model name...", show_label=False, scale=4)
 
 
 
515
  submit = gr.Button("Submit", variant="primary", scale=0)
516
+ output = gr.Markdown("# Enter a public HF repo id, then hit Submit to add it to the evaluation queue.")
 
517
 
518
  submit.click(fn=submit_model, inputs=model_name, outputs=output)
519