Jellyfish042 commited on
Commit
9702a67
·
1 Parent(s): 616bf01

support dark mode

Browse files
Files changed (1) hide show
  1. app.py +95 -322
app.py CHANGED
@@ -7,18 +7,17 @@ from dotenv import load_dotenv
7
  from matplotlib.colors import LinearSegmentedColormap
8
  import plotly.express as px
9
  import plotly.graph_objects as go
10
- from sklearn.linear_model import LinearRegression
11
  import numpy as np
12
  from huggingface_hub import HfApi
13
  from huggingface_hub.hf_api import HTTPError
14
  from huggingface_hub.utils import GatedRepoError
15
  from gradio_rangeslider import RangeSlider
16
  import datetime
17
-
18
 
19
  load_dotenv()
20
  webhook_url = os.environ.get("WEBHOOK_URL")
21
-
22
  file_name_list = [
23
  "14b",
24
  "9b",
@@ -27,19 +26,16 @@ file_name_list = [
27
  "1b5",
28
  "other",
29
  ]
30
-
31
  sheet_name_list = [
32
  "cr",
33
  "bpc",
34
  "bpb",
35
  ]
36
-
37
  metric_list = [
38
  "Compression Rate (%)",
39
  "Bits Per Character (BPC)",
40
  "Bits Per Byte (BPB)",
41
  ]
42
-
43
  model_size_list = [
44
  "~14B",
45
  "~9B",
@@ -48,13 +44,11 @@ model_size_list = [
48
  "~1.5B",
49
  "Other",
50
  ]
51
-
52
  metric_to_sheet = {
53
  "Compression Rate (%)": "cr",
54
  "Bits Per Character (BPC)": "bpc",
55
  "Bits Per Byte (BPB)": "bpb",
56
  }
57
-
58
  model_size_to_file_name = {
59
  "~14B": "14b",
60
  "~9B": "9b",
@@ -68,27 +62,21 @@ def read_about_md():
68
  with open('about.md', 'r', encoding='utf-8') as f:
69
  return f.read()
70
 
71
-
72
  def rename_columns(df):
73
  df.columns = [col.rsplit("_", maxsplit=1)[0] for col in df.columns]
74
  return df
75
 
76
-
77
  def get_folders_matching_format(directory):
78
  pattern = re.compile(r"^\d{4}-\d{2}$")
79
  folders = []
80
-
81
  if not os.path.exists(directory):
82
  return folders
83
-
84
  for item in os.listdir(directory):
85
  full_path = os.path.join(directory, item)
86
  if os.path.isdir(full_path) and pattern.match(item):
87
  folders.append(full_path)
88
-
89
  return folders
90
 
91
-
92
  def get_unique_column_names(data=None):
93
  return [
94
  "ao3_\u200benglish",
@@ -100,74 +88,63 @@ def get_unique_column_names(data=None):
100
  "github_\u200bpython",
101
  ]
102
 
103
-
104
  def color_cell(value):
105
  return "background-color: #fffdd0" if pd.notna(value) else "default"
106
 
 
 
107
 
108
- def update_table(
109
- period: str,
110
- models_size: list,
111
- metric: str,
112
- visible_columns: list,
113
- color_columns: list,
114
- size_range: list,
115
- midpoint: float = 0.5,
116
- sort_by: str = "Average (lower=better)",
117
- ascending: bool = True,
118
- ):
119
- print(
120
- f"Updating - time: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}, period: {period}, models: {models_size}, metric: {metric}, visible_columns: {visible_columns}, color_columns: {color_columns}, size_range: {size_range}, sort_by: {sort_by}, ascending: {ascending}\n"
121
- )
122
 
123
  if not models_size:
124
  return "No data available for the selected models and period."
125
- # return pd.DataFrame()
126
-
127
  target_period_data = all_data[period]
128
  target_file_name = [model_size_to_file_name[model] for model in models_size]
129
  sheet_name = metric_to_sheet[metric]
130
-
131
- # combined_data = pd.concat([target_period_data[file_name][sheet_name] for file_name in target_file_name], axis=0)
132
- combined_data = pd.concat(
133
- [df.dropna(axis=1, how="all") for df in [target_period_data[file_name][sheet_name] for file_name in target_file_name]], axis=0
134
- )
135
  if len(combined_data) == 0:
136
  return "No data available for the selected models and period."
137
- # return pd.DataFrame()
138
-
139
- # Filter models based on the size range
140
  combined_data = combined_data[combined_data["Parameters Count (B)"].between(size_range[0], size_range[1])]
141
  combined_data.reset_index(drop=True, inplace=True)
 
142
  if len(combined_data) == 0:
143
  return "No data available for the selected models and period."
144
- # return pd.DataFrame()
145
-
146
  combined_data["Name"] = combined_data["Name"].apply(lambda x: x.replace(".pth", ""))
147
-
148
  ordered_columns = get_unique_column_names()
149
  relevant_columns = [col for col in ordered_columns if col in visible_columns and col not in ["Name", "Parameters Count (B)", "Average (The lower the better)"]]
150
 
151
- if len(combined_data) > 0:
152
  combined_data["Average (The lower the better)"] = round(combined_data[relevant_columns].mean(axis=1), 3)
153
- combined_data = combined_data.rename(columns={"Parameters Count (B)": "Params (B)"})
154
- combined_data = combined_data.rename(columns={"Average (The lower the better)": "Average (lower=better)"})
155
  sorted_data = combined_data.sort_values(by=sort_by, ascending=ascending)
156
- visible_columns = ["Name", "Params (B)", "Average (lower=better)"] + relevant_columns
157
- filtered_data = sorted_data[visible_columns]
158
  filtered_data.columns = [col.replace("_", " ") for col in filtered_data.columns]
159
-
160
  formatter = {col: "{:.3f}" for col in filtered_data.columns if filtered_data[col].dtype in ["float64", "float32"]}
161
-
162
- # color gradient
163
- colors = ["#63be7b", "#ffffff", "#f8696b"]
164
- vmin = {}
165
- vmax = {}
166
- vmid = {}
 
 
 
 
167
  for column in filtered_data.columns:
168
- if column in ["Name", "Params (B)"]:
169
- continue
170
- col_values = filtered_data[column]
171
  if len(col_values) > 1:
172
  sorted_values = np.sort(col_values)
173
  vmin[column] = sorted_values.min()
@@ -175,99 +152,64 @@ def update_table(
175
  idx = int(len(sorted_values) * midpoint)
176
  vmid[column] = sorted_values[idx]
177
 
178
- def custom_background_gradient(series, cmap, vmin, vmax, vmid):
179
- if len(series) == 0:
180
- return series
181
-
182
  def normalize(x):
183
- if x <= vmid:
184
- return 0.5 * (x - vmin) / (vmid - vmin)
 
 
 
 
185
  else:
186
- return 0.5 + 0.5 * (x - vmid) / (vmax - vmid)
187
-
188
  normed = series.apply(normalize)
189
- colors = [cmap(x) for x in normed]
190
- return ["background-color: rgba({}, {}, {}, {})".format(*[int(255 * x) for x in c[:3]], c[3]) for c in colors]
 
 
 
 
191
 
192
  target_color_columns = []
193
- if "Average" in color_columns:
194
- target_color_columns.append("Average (lower=better)")
195
- if "Individual Tests" in color_columns:
196
- target_color_columns.extend([col for col in filtered_data.columns if col not in ["Name", "Params (B)", "Average (lower=better)"]])
197
-
198
- styler = filtered_data.style.format(formatter).map(color_cell, subset=["Params (B)"])
199
-
 
 
 
 
 
 
 
 
 
 
200
  for column in target_color_columns:
201
- styler = styler.apply(
202
- custom_background_gradient,
203
- cmap=LinearSegmentedColormap.from_list("custom_cmap", colors),
204
- vmin=vmin[column],
205
- vmax=vmax[column],
206
- vmid=vmid[column],
207
- subset=[column],
208
- )
209
-
210
- # return styler
211
  styler = styler.hide(axis="index")
212
-
213
  widths = [300, 150, 150, 100, 100, 100, 100, 100, 100, 100, 100]
 
214
  table_styles = []
215
-
 
216
  for i, w in enumerate(widths):
217
- table_styles.append(
218
- {
219
- "selector": "th",
220
- "props": [
221
- ("background-color", "#f5f5f5"),
222
- ("padding", "8px"),
223
- ("font-weight", "bold"),
224
- ],
225
- }
226
- )
227
- table_styles.append(
228
- {
229
- "selector": f"th.col{i}",
230
- "props": [
231
- ("min-width", f"{w}px"),
232
- ("max-width", f"{w}px"),
233
- ("text-align", "center"),
234
- ("border", "1px solid #dddddd"),
235
- ],
236
- }
237
- )
238
- table_styles.append(
239
- {
240
- "selector": f"td.col{i}",
241
- "props": [
242
- ("min-width", f"{w}px"),
243
- ("max-width", f"{w}px"),
244
- ("text-align", "center"),
245
- ("border", "1px solid #dddddd"),
246
- ],
247
- }
248
- )
249
-
250
- table_styles.append(
251
- {
252
- "selector": "table",
253
- "props": [
254
- ("border-collapse", "collapse"),
255
- ("border", "1px solid #dddddd"),
256
- ],
257
- }
258
- )
259
-
260
  styler = styler.set_table_styles(table_styles)
261
-
262
- html_output = styler.to_html()
263
- return html_output
264
-
265
 
266
  def create_world_languages_gdp_chart():
267
  languages = ["English", "Chinese", "Spanish", "Japanese", "German", "French", "Arabic", "Italian", "Portuguese", "Korean", "Other"]
268
  shares = [27, 18, 8, 6, 5, 4, 3, 2, 2, 2, 23]
269
  colors = ["#FF7F7F", "#FFA07A", "#FFDB58", "#90EE90", "#98FB98", "#87CEFA", "#B0C4DE", "#DDA0DD", "#D8BFD8", "#F0E68C", "#E0FFFF"]
270
-
271
  fig = go.Figure(
272
  data=[
273
  go.Pie(
@@ -282,7 +224,6 @@ def create_world_languages_gdp_chart():
282
  )
283
  ]
284
  )
285
-
286
  fig.update_layout(
287
  title={
288
  "text": "World Languages by Share of Global GDP",
@@ -297,10 +238,8 @@ def create_world_languages_gdp_chart():
297
  height=500,
298
  margin=dict(t=80, b=20, l=20, r=20),
299
  )
300
-
301
  return fig
302
 
303
-
304
  def check_model_exists(model_id):
305
  api = HfApi()
306
  try:
@@ -314,11 +253,9 @@ def check_model_exists(model_id):
314
  else:
315
  return "Error: " + str(e)
316
 
317
-
318
  def submit_model(name):
319
  if "Exists" not in check_model_exists(name):
320
  return f"# ERROR: Model {name} does not exist on Hugging Face!"
321
-
322
  try:
323
  response = requests.post(webhook_url, json={"content": name})
324
  if response.status_code == 200:
@@ -334,131 +271,20 @@ def submit_model(name):
334
  except Exception as e:
335
  print(e)
336
  return "ERROR: Unexpected error. Please try again later."
337
-
338
-
339
- # def create_scaling_plot(all_data, period):
340
- # selected_columns = ["Name", "Parameters Count (B)", "Average (The lower the better)"]
341
- # target_data = all_data[period]
342
- # new_df = pd.DataFrame()
343
-
344
- # for size in target_data.keys():
345
- # new_df = pd.concat([new_df, target_data[size]["cr"].loc[:, selected_columns].dropna(axis=1, how="all")], axis=0)
346
-
347
- # new_df.rename(columns={"Parameters Count (B)": "Params(B)", "Average (The lower the better)": "Compression Rate (%)"}, inplace=True)
348
-
349
- # new_df["Log Params(B)"] = np.log(new_df["Params(B)"])
350
- # new_df["Log Compression Rate (%)"] = np.log(new_df["Compression Rate (%)"])
351
-
352
- # fig = px.scatter(
353
- # new_df,
354
- # x="Log Params(B)",
355
- # y="Log Compression Rate (%)",
356
- # title="Compression Rate Scaling Law",
357
- # hover_name="Name",
358
- # custom_data=["Params(B)", "Compression Rate (%)"],
359
- # )
360
-
361
- # fig.update_traces(
362
- # hovertemplate="<b>%{hovertext}</b><br>Params(B): %{customdata[0]:.2f} B<br>Compression Rate (%): %{customdata[1]:.2f}<extra></extra>"
363
- # )
364
- # fig.update_layout(
365
- # width=800, # 设置图像宽度
366
- # height=600, # 设置图像高度
367
- # title={"text": "Compression Rate Scaling Law", "x": 0.5, "xanchor": "center", "yanchor": "top"},
368
- # showlegend=True,
369
- # xaxis={"showgrid": True, "zeroline": False, "type": "linear", "title": "Params(B)"}, # 确保坐标轴类型正确
370
- # yaxis={"showgrid": True, "zeroline": False, "type": "linear", "title": "Compression Rate (%)", "autorange": "reversed"},
371
- # )
372
-
373
- # names_to_connect_dict = {
374
- # "2024-05": ["Meta-Llama-3-8B", "stablelm-3b-4e1t", "Qwen2-1.5B", "TinyLlama-1.1B-intermediate-step-1431k-3T", "Mistral-Nemo-Base-2407"],
375
- # "2024-06": ["Meta-Llama-3-8B", "stablelm-3b-4e1t", "Qwen2-1.5B", "TinyLlama-1.1B-intermediate-step-1431k-3T", "Mistral-Nemo-Base-2407"],
376
- # "2024-07": ["Meta-Llama-3.1-8B", "stablelm-3b-4e1t", "Qwen2-1.5B", "TinyLlama-1.1B-intermediate-step-1431k-3T", "Mistral-Nemo-Base-2407"],
377
- # "2024-08": [
378
- # "Meta-Llama-3.1-8B",
379
- # "Rene-v0.1-1.3b-pytorch",
380
- # "stablelm-3b-4e1t",
381
- # "Qwen2-1.5B",
382
- # "TinyLlama-1.1B-intermediate-step-1431k-3T",
383
- # "Mistral-Nemo-Base-2407",
384
- # ],
385
- # "2025-01": ["Qwen2.5-1.5B"],
386
- # }
387
-
388
- # names_to_connect = names_to_connect_dict.get(period, names_to_connect_dict["2024-08"])
389
-
390
- # connection_points = new_df[new_df["Name"].isin(names_to_connect)]
391
- # print(connection_points)
392
-
393
- # new_df["Color"] = new_df["Name"].apply(lambda name: "#39C5BB" if name in names_to_connect else "#636efa")
394
-
395
- # fig.update_traces(marker=dict(color=new_df["Color"]))
396
-
397
- # X = connection_points["Log Params(B)"].values.reshape(-1, 1)
398
- # y = connection_points["Log Compression Rate (%)"].values
399
- # model = LinearRegression().fit(X, y)
400
-
401
- # x_min = connection_points["Log Params(B)"].min()
402
- # x_max = connection_points["Log Params(B)"].max()
403
- # extended_x = np.linspace(x_min, x_max * 1.5, 100)
404
- # extended_x_original = np.exp(extended_x)
405
- # trend_line_y = model.predict(extended_x.reshape(-1, 1))
406
- # trend_line_y_original = np.exp(trend_line_y)
407
-
408
- # trend_line = go.Scatter(
409
- # x=extended_x,
410
- # y=trend_line_y,
411
- # mode="lines",
412
- # line=dict(color="skyblue", dash="dash"),
413
- # name="Trend Line",
414
- # hovertemplate="<b>Params(B):</b> %{customdata[0]:.2f}<br>" + "<b>Compression Rate (%):</b> %{customdata[1]:.2f}<extra></extra>",
415
- # customdata=np.stack((extended_x_original, trend_line_y_original), axis=-1),
416
- # )
417
-
418
- # fig.add_trace(trend_line)
419
-
420
- # x_min = new_df["Params(B)"].min()
421
- # x_max = new_df["Params(B)"].max()
422
- # x_tick_vals = np.geomspace(x_min, x_max, num=5)
423
- # x_tick_text = [f"{val:.1f}" for val in x_tick_vals]
424
-
425
- # y_min = new_df["Compression Rate (%)"].min()
426
- # y_max = new_df["Compression Rate (%)"].max()
427
- # y_tick_vals = np.geomspace(y_min, y_max, num=5)
428
- # y_tick_text = [f"{val:.1f}" for val in y_tick_vals]
429
-
430
- # fig.update_xaxes(tickvals=np.log(x_tick_vals), ticktext=x_tick_text, title="Params(B)")
431
- # fig.update_yaxes(tickvals=np.log(y_tick_vals), ticktext=y_tick_text, title="Compression Rate (%)", autorange="reversed")
432
-
433
- # fig.update_layout(xaxis=dict(showgrid=True, zeroline=False), yaxis=dict(showgrid=True, zeroline=False))
434
-
435
- # fig.update_traces(marker=dict(size=12))
436
-
437
- # print(fig.layout)
438
-
439
- # return fig
440
-
441
-
442
  def create_scaling_plot(all_data, period):
443
  selected_columns = ["Name", "Parameters Count (B)", "Average (The lower the better)"]
444
  target_data = all_data[period]
445
  new_df = pd.DataFrame()
446
-
447
  for size in target_data.keys():
448
  new_df = pd.concat([new_df, target_data[size]["cr"].loc[:, selected_columns].dropna(axis=1, how="all")], axis=0)
449
-
450
  x_values = new_df["Parameters Count (B)"].astype(float).tolist()
451
  y_values = new_df["Average (The lower the better)"].astype(float).tolist()
452
  names = new_df["Name"].tolist()
453
-
454
  x_min, x_max = np.log10(min(x_values)), np.log10(max(x_values))
455
  y_min, y_max = np.log10(min(y_values)), np.log10(max(y_values))
456
-
457
  x_dtick = (x_max - x_min) / 4
458
  y_dtick = (y_max - y_min) / 4
459
-
460
  fig = go.Figure()
461
-
462
  fig.add_trace(
463
  go.Scatter(
464
  x=x_values,
@@ -473,7 +299,6 @@ def create_scaling_plot(all_data, period):
473
  ),
474
  )
475
  )
476
-
477
  fig.update_layout(
478
  title={"text": "Compression Rate Scaling Law", "x": 0.5, "xanchor": "center", "yanchor": "top"},
479
  width=800,
@@ -499,10 +324,8 @@ def create_scaling_plot(all_data, period):
499
  autorange="reversed",
500
  ),
501
  )
502
-
503
  return fig
504
 
505
-
506
  def read_all_data(folder_name):
507
  all_data = {}
508
  time_list = []
@@ -517,37 +340,18 @@ def read_all_data(folder_name):
517
  for sheet_name in sheet_name_list:
518
  final_file_name = os.path.join(folder, file_name)
519
  all_data[folder_name][file_name][sheet_name] = rename_columns(pd.read_excel(final_file_name + ".xlsx", sheet_name=sheet_name))
520
-
521
  return all_data, time_list
522
 
523
-
524
- # def read_mutilange_data(folder_path='mutilang_data'):
525
- # mutilange_data = {}
526
- # excel_files = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('.xlsx')]
527
- # time_list = [file.split('.')[0] for file in excel_files]
528
- # time_list = [x.split('\\')[-1] for x in time_list]
529
- # for file_name in excel_files:
530
- # if mutilange_data.get(file_name) is None:
531
- # mutilange_data[file_name] = {}
532
- # for sheet_name in sheet_name_list:
533
- # mutilange_data[file_name][sheet_name] = rename_columns(
534
- # pd.read_excel(file_name, sheet_name=sheet_name))
535
- # return mutilange_data, time_list
536
-
537
-
538
  all_data, time_list = read_all_data("data")
539
- # muti_lang_data, muti_lang_time_list = read_mutilange_data()
540
-
541
  time_list.sort()
542
  last_period = time_list[-1]
543
-
544
  initial_fig = create_scaling_plot(all_data, last_period)
545
  initial_metric = metric_list[0]
546
  initial_columns = get_unique_column_names(all_data)
547
  initial_colors = ["Average", "Individual Tests"]
548
  initial_size_range = [0, 40]
 
549
  initial_data = update_table(last_period, model_size_list, initial_metric, initial_columns, initial_colors, initial_size_range)
550
-
551
  css = """
552
  .gradio-container {
553
  max-width: 95% !important;
@@ -566,11 +370,11 @@ table {
566
  width: 100% !important;
567
  }
568
  """
569
-
570
  TITLE_HTML = '<h1 style="text-align:center"><span style="font-size:1.3em">🏆 LLM Compression Leaderboard</span></h1>'
571
  SUBTITLE_HTML = "<h1 style='text-align:center'><span style='font-size:0.8em'>Welcome to Uncheatable Eval LLM Compression Leaderboard, where fancy fine-tuning and cheating won't work 🚫; only compute 💻, data 📊, and real innovation 🔥 can prevail!</span></h1>"
572
-
573
- with gr.Blocks(css=css) as demo:
 
574
  gr.HTML(TITLE_HTML)
575
  gr.HTML(SUBTITLE_HTML)
576
  with gr.Tabs() as tabs:
@@ -585,62 +389,32 @@ with gr.Blocks(css=css) as demo:
585
  midpoint_slider = gr.Slider(minimum=0.1, maximum=0.9, value=0.5, step=0.01, label="Color Gradient Midpoint")
586
  color_selector = gr.CheckboxGroup(label="Colored Columns", choices=["Average", "Individual Tests"], value=initial_colors)
587
  colfilter = gr.CheckboxGroup(label="Data Source", choices=get_unique_column_names(all_data), value=initial_columns)
588
-
589
  table = gr.HTML(initial_data)
590
-
591
- period_selector.change(
592
- update_table,
593
- inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider, midpoint_slider],
594
- outputs=table,
595
- )
596
- model_selector.change(
597
- update_table,
598
- inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider, midpoint_slider],
599
- outputs=table,
600
- )
601
- metric_selector.change(
602
- update_table,
603
- inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider, midpoint_slider],
604
- outputs=table,
605
- )
606
- colfilter.change(
607
- update_table,
608
- inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider, midpoint_slider],
609
- outputs=table,
610
- )
611
- color_selector.change(
612
- update_table,
613
- inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider, midpoint_slider],
614
- outputs=table,
615
- )
616
- size_range_slider.change(
617
- update_table,
618
- inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider, midpoint_slider],
619
- outputs=table,
620
- )
621
- midpoint_slider.change(
622
- update_table,
623
- inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider, midpoint_slider],
624
- outputs=table,
625
- )
626
-
627
  with gr.Tab("🌍 MultiLang"):
628
  gr.Markdown("## Coming soon...")
629
- world_languages_plot = gr.Plot(create_world_languages_gdp_chart())
630
-
631
  with gr.Tab("📈 Scaling Law"):
632
  period_selector_2 = gr.Dropdown(label="Period", choices=time_list, value=last_period)
633
-
634
  def update_plot(period):
635
  new_fig = create_scaling_plot(all_data, period)
636
  return new_fig
637
-
638
  plot = gr.Plot(initial_fig)
639
  period_selector_2.change(update_plot, inputs=period_selector_2, outputs=plot)
640
-
641
  with gr.Tab("ℹ️ About"):
642
  gr.Markdown(read_about_md())
643
-
644
  with gr.Tab("🚀 Submit"):
645
  with gr.Group():
646
  with gr.Row():
@@ -648,5 +422,4 @@ with gr.Blocks(css=css) as demo:
648
  submit = gr.Button("Submit", variant="primary", scale=0)
649
  output = gr.Markdown("# Enter a public HF repo id, then hit Submit to add it to the evaluation queue.")
650
  submit.click(fn=submit_model, inputs=model_name, outputs=output)
651
-
652
- demo.launch(share=False)
 
7
  from matplotlib.colors import LinearSegmentedColormap
8
  import plotly.express as px
9
  import plotly.graph_objects as go
10
+ # from sklearn.linear_model import LinearRegression
11
  import numpy as np
12
  from huggingface_hub import HfApi
13
  from huggingface_hub.hf_api import HTTPError
14
  from huggingface_hub.utils import GatedRepoError
15
  from gradio_rangeslider import RangeSlider
16
  import datetime
17
+ from gradio.themes.utils.colors import slate
18
 
19
  load_dotenv()
20
  webhook_url = os.environ.get("WEBHOOK_URL")
 
21
  file_name_list = [
22
  "14b",
23
  "9b",
 
26
  "1b5",
27
  "other",
28
  ]
 
29
  sheet_name_list = [
30
  "cr",
31
  "bpc",
32
  "bpb",
33
  ]
 
34
  metric_list = [
35
  "Compression Rate (%)",
36
  "Bits Per Character (BPC)",
37
  "Bits Per Byte (BPB)",
38
  ]
 
39
  model_size_list = [
40
  "~14B",
41
  "~9B",
 
44
  "~1.5B",
45
  "Other",
46
  ]
 
47
  metric_to_sheet = {
48
  "Compression Rate (%)": "cr",
49
  "Bits Per Character (BPC)": "bpc",
50
  "Bits Per Byte (BPB)": "bpb",
51
  }
 
52
  model_size_to_file_name = {
53
  "~14B": "14b",
54
  "~9B": "9b",
 
62
  with open('about.md', 'r', encoding='utf-8') as f:
63
  return f.read()
64
 
 
65
  def rename_columns(df):
66
  df.columns = [col.rsplit("_", maxsplit=1)[0] for col in df.columns]
67
  return df
68
 
 
69
  def get_folders_matching_format(directory):
70
  pattern = re.compile(r"^\d{4}-\d{2}$")
71
  folders = []
 
72
  if not os.path.exists(directory):
73
  return folders
 
74
  for item in os.listdir(directory):
75
  full_path = os.path.join(directory, item)
76
  if os.path.isdir(full_path) and pattern.match(item):
77
  folders.append(full_path)
 
78
  return folders
79
 
 
80
  def get_unique_column_names(data=None):
81
  return [
82
  "ao3_\u200benglish",
 
88
  "github_\u200bpython",
89
  ]
90
 
 
91
  def color_cell(value):
92
  return "background-color: #fffdd0" if pd.notna(value) else "default"
93
 
94
+ # def color_cell_themed(value):
95
+ # return "background-color: rgba(255, 253, 208, 1.0)" if pd.notna(value) else "default"
96
 
97
+ # --- 核心改动点 1: 修改 update_table 函数 ---
98
+ # 添加 request: gr.Request = None 参数来接收主题模式信息
99
+ # 默认值为 None 是为了处理初始加载
100
+ def update_table(period: str, models_size: list, metric: str, visible_columns: list, color_columns: list, size_range: list, midpoint: float = 0.5, sort_by: str = "Average (lower=better)", ascending: bool = True, request: gr.Request = None):
101
+ # 打印日志并检查当前模式
102
+ is_dark_mode = request.is_dark if request else False
103
+ print(f"Updating - time: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}, period: {period}, models: {models_size}, metric: {metric}, visible_columns: {visible_columns}, color_columns: {color_columns}, size_range: {size_range}, sort_by: {sort_by}, ascending: {ascending}, is_dark: {is_dark_mode}\n")
 
 
 
 
 
 
 
104
 
105
  if not models_size:
106
  return "No data available for the selected models and period."
107
+
 
108
  target_period_data = all_data[period]
109
  target_file_name = [model_size_to_file_name[model] for model in models_size]
110
  sheet_name = metric_to_sheet[metric]
111
+ combined_data = pd.concat([df.dropna(axis=1, how="all") for df in [target_period_data[file_name][sheet_name] for file_name in target_file_name]], axis=0)
112
+
 
 
 
113
  if len(combined_data) == 0:
114
  return "No data available for the selected models and period."
115
+
 
 
116
  combined_data = combined_data[combined_data["Parameters Count (B)"].between(size_range[0], size_range[1])]
117
  combined_data.reset_index(drop=True, inplace=True)
118
+
119
  if len(combined_data) == 0:
120
  return "No data available for the selected models and period."
121
+
 
122
  combined_data["Name"] = combined_data["Name"].apply(lambda x: x.replace(".pth", ""))
 
123
  ordered_columns = get_unique_column_names()
124
  relevant_columns = [col for col in ordered_columns if col in visible_columns and col not in ["Name", "Parameters Count (B)", "Average (The lower the better)"]]
125
 
126
+ if len(combined_data) > 0 and relevant_columns:
127
  combined_data["Average (The lower the better)"] = round(combined_data[relevant_columns].mean(axis=1), 3)
128
+
129
+ combined_data = combined_data.rename(columns={"Parameters Count (B)": "Params (B)", "Average (The lower the better)": "Average (lower=better)"})
130
  sorted_data = combined_data.sort_values(by=sort_by, ascending=ascending)
131
+ visible_columns_final = ["Name", "Params (B)", "Average (lower=better)"] + relevant_columns
132
+ filtered_data = sorted_data[visible_columns_final]
133
  filtered_data.columns = [col.replace("_", " ") for col in filtered_data.columns]
 
134
  formatter = {col: "{:.3f}" for col in filtered_data.columns if filtered_data[col].dtype in ["float64", "float32"]}
135
+
136
+ # --- 核心改动点 2: 根据主题模式选择不同的配色方案 ---
137
+ if is_dark_mode:
138
+ # 夜间模式配色 (绿 -> 深灰 -> 红)
139
+ colors = ["#2ca02c", "#2b2b2b", "#d62728"]
140
+ else:
141
+ # 日间模式配色 (绿 -> 白 -> 红)
142
+ colors = ["#63be7b", "#ffffff", "#f8696b"]
143
+
144
+ vmin, vmax, vmid = {}, {}, {}
145
  for column in filtered_data.columns:
146
+ if column in ["Name", "Params (B)"]: continue
147
+ col_values = filtered_data[column].dropna()
 
148
  if len(col_values) > 1:
149
  sorted_values = np.sort(col_values)
150
  vmin[column] = sorted_values.min()
 
152
  idx = int(len(sorted_values) * midpoint)
153
  vmid[column] = sorted_values[idx]
154
 
155
+ # --- 核心改动点 3: 修改样式函数以包含固定的黑色字体 ---
156
+ def custom_background_gradient(series, cmap, vmin_val, vmax_val, vmid_val):
157
+ if len(series) == 0: return series
 
158
  def normalize(x):
159
+ if pd.isna(x): return 0.5 # Neutral for NaN
160
+ if vmid_val == vmin_val and x <= vmid_val: return 0.0
161
+ if vmid_val == vmax_val and x >= vmid_val: return 1.0
162
+ if vmid_val == vmin_val or vmid_val == vmax_val: return 0.5
163
+ if x <= vmid_val:
164
+ return 0.5 * (x - vmin_val) / (vmid_val - vmin_val)
165
  else:
166
+ return 0.5 + 0.5 * (x - vmid_val) / (vmax_val - vmid_val)
 
167
  normed = series.apply(normalize)
168
+ cmap_colors = [cmap(x) for x in normed]
169
+ # 在返回的CSS中同时设置 background-color color
170
+ return [
171
+ "background-color: rgba({}, {}, {}, {}); color: black;".format(*[int(255 * c) for c in color[:3]], color[3])
172
+ for color in cmap_colors
173
+ ]
174
 
175
  target_color_columns = []
176
+ if "Average" in color_columns: target_color_columns.append("Average (lower=better)")
177
+ if "Individual Tests" in color_columns: target_color_columns.extend([col for col in filtered_data.columns if col not in ["Name", "Params (B)", "Average (lower=better)"]])
178
+
179
+ def color_params_column_dynamic(value):
180
+ if not pd.notna(value):
181
+ return "default"
182
+
183
+ # 2. 根据 is_dark_mode 返回不同的颜色
184
+ if is_dark_mode:
185
+ # 为夜间模式选择一个柔和、不刺眼的暗金色
186
+ # 字体颜色也设置为浅色以保证对比度
187
+ return "background-color: #4b4936; color: #f0f0f0;"
188
+ else:
189
+ # 为日间模式使用明亮的奶油色,字体为黑色
190
+ return "background-color: #fffdd0; color: black;"
191
+
192
+ styler = filtered_data.style.format(formatter).map(color_params_column_dynamic, subset=["Params (B)"])
193
  for column in target_color_columns:
194
+ if column in vmin:
195
+ custom_cmap = LinearSegmentedColormap.from_list("custom_cmap", colors)
196
+ styler = styler.apply(custom_background_gradient, cmap=custom_cmap, vmin_val=vmin[column], vmax_val=vmax[column], vmid_val=vmid[column], subset=[column])
197
+
 
 
 
 
 
 
198
  styler = styler.hide(axis="index")
 
199
  widths = [300, 150, 150, 100, 100, 100, 100, 100, 100, 100, 100]
200
+
201
  table_styles = []
202
+ table_styles.append({"selector": "th", "props": [("background-color", "var(--background-fill-secondary)"), ("color", "var(--body-text-color)"), ("padding", "8px"), ("font-weight", "bold")]})
203
+ table_styles.append({"selector": "table", "props": [("border-collapse", "collapse"), ("border", f"1px solid var(--border-color-primary)")]})
204
  for i, w in enumerate(widths):
205
+ table_styles.append({"selector": f"th.col{i}, td.col{i}", "props": [("min-width", f"{w}px"), ("max-width", f"{w}px"), ("text-align", "center"), ("border", f"1px solid var(--border-color-primary)")]})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  styler = styler.set_table_styles(table_styles)
207
+ return styler.to_html()
 
 
 
208
 
209
  def create_world_languages_gdp_chart():
210
  languages = ["English", "Chinese", "Spanish", "Japanese", "German", "French", "Arabic", "Italian", "Portuguese", "Korean", "Other"]
211
  shares = [27, 18, 8, 6, 5, 4, 3, 2, 2, 2, 23]
212
  colors = ["#FF7F7F", "#FFA07A", "#FFDB58", "#90EE90", "#98FB98", "#87CEFA", "#B0C4DE", "#DDA0DD", "#D8BFD8", "#F0E68C", "#E0FFFF"]
 
213
  fig = go.Figure(
214
  data=[
215
  go.Pie(
 
224
  )
225
  ]
226
  )
 
227
  fig.update_layout(
228
  title={
229
  "text": "World Languages by Share of Global GDP",
 
238
  height=500,
239
  margin=dict(t=80, b=20, l=20, r=20),
240
  )
 
241
  return fig
242
 
 
243
  def check_model_exists(model_id):
244
  api = HfApi()
245
  try:
 
253
  else:
254
  return "Error: " + str(e)
255
 
 
256
  def submit_model(name):
257
  if "Exists" not in check_model_exists(name):
258
  return f"# ERROR: Model {name} does not exist on Hugging Face!"
 
259
  try:
260
  response = requests.post(webhook_url, json={"content": name})
261
  if response.status_code == 200:
 
271
  except Exception as e:
272
  print(e)
273
  return "ERROR: Unexpected error. Please try again later."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  def create_scaling_plot(all_data, period):
275
  selected_columns = ["Name", "Parameters Count (B)", "Average (The lower the better)"]
276
  target_data = all_data[period]
277
  new_df = pd.DataFrame()
 
278
  for size in target_data.keys():
279
  new_df = pd.concat([new_df, target_data[size]["cr"].loc[:, selected_columns].dropna(axis=1, how="all")], axis=0)
 
280
  x_values = new_df["Parameters Count (B)"].astype(float).tolist()
281
  y_values = new_df["Average (The lower the better)"].astype(float).tolist()
282
  names = new_df["Name"].tolist()
 
283
  x_min, x_max = np.log10(min(x_values)), np.log10(max(x_values))
284
  y_min, y_max = np.log10(min(y_values)), np.log10(max(y_values))
 
285
  x_dtick = (x_max - x_min) / 4
286
  y_dtick = (y_max - y_min) / 4
 
287
  fig = go.Figure()
 
288
  fig.add_trace(
289
  go.Scatter(
290
  x=x_values,
 
299
  ),
300
  )
301
  )
 
302
  fig.update_layout(
303
  title={"text": "Compression Rate Scaling Law", "x": 0.5, "xanchor": "center", "yanchor": "top"},
304
  width=800,
 
324
  autorange="reversed",
325
  ),
326
  )
 
327
  return fig
328
 
 
329
  def read_all_data(folder_name):
330
  all_data = {}
331
  time_list = []
 
340
  for sheet_name in sheet_name_list:
341
  final_file_name = os.path.join(folder, file_name)
342
  all_data[folder_name][file_name][sheet_name] = rename_columns(pd.read_excel(final_file_name + ".xlsx", sheet_name=sheet_name))
 
343
  return all_data, time_list
344
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
  all_data, time_list = read_all_data("data")
 
 
346
  time_list.sort()
347
  last_period = time_list[-1]
 
348
  initial_fig = create_scaling_plot(all_data, last_period)
349
  initial_metric = metric_list[0]
350
  initial_columns = get_unique_column_names(all_data)
351
  initial_colors = ["Average", "Individual Tests"]
352
  initial_size_range = [0, 40]
353
+ # 初始调用 update_table 时,request 参数将为默认的 None
354
  initial_data = update_table(last_period, model_size_list, initial_metric, initial_columns, initial_colors, initial_size_range)
 
355
  css = """
356
  .gradio-container {
357
  max-width: 95% !important;
 
370
  width: 100% !important;
371
  }
372
  """
 
373
  TITLE_HTML = '<h1 style="text-align:center"><span style="font-size:1.3em">🏆 LLM Compression Leaderboard</span></h1>'
374
  SUBTITLE_HTML = "<h1 style='text-align:center'><span style='font-size:0.8em'>Welcome to Uncheatable Eval LLM Compression Leaderboard, where fancy fine-tuning and cheating won't work 🚫; only compute 💻, data 📊, and real innovation 🔥 can prevail!</span></h1>"
375
+ # theme = gr.themes.Default(primary_hue=slate, secondary_hue=slate)
376
+ theme = gr.themes.Default()
377
+ with gr.Blocks(theme=theme, css=css) as demo:
378
  gr.HTML(TITLE_HTML)
379
  gr.HTML(SUBTITLE_HTML)
380
  with gr.Tabs() as tabs:
 
389
  midpoint_slider = gr.Slider(minimum=0.1, maximum=0.9, value=0.5, step=0.01, label="Color Gradient Midpoint")
390
  color_selector = gr.CheckboxGroup(label="Colored Columns", choices=["Average", "Individual Tests"], value=initial_colors)
391
  colfilter = gr.CheckboxGroup(label="Data Source", choices=get_unique_column_names(all_data), value=initial_columns)
 
392
  table = gr.HTML(initial_data)
393
+
394
+ # --- 核心改动点 4: 更新所有 .change() 事件,添加 gr.Request() ---
395
+ # 定义共享的输入列表,避免重复
396
+ shared_inputs = [period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider, midpoint_slider]
397
+
398
+ period_selector.change(update_table, inputs=shared_inputs, outputs=table)
399
+ model_selector.change(update_table, inputs=shared_inputs, outputs=table)
400
+ metric_selector.change(update_table, inputs=shared_inputs, outputs=table)
401
+ colfilter.change(update_table, inputs=shared_inputs, outputs=table)
402
+ color_selector.change(update_table, inputs=shared_inputs, outputs=table)
403
+ size_range_slider.change(update_table, inputs=shared_inputs, outputs=table)
404
+ midpoint_slider.change(update_table, inputs=shared_inputs, outputs=table)
405
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
406
  with gr.Tab("🌍 MultiLang"):
407
  gr.Markdown("## Coming soon...")
408
+ # world_languages_plot = gr.Plot(create_world_languages_gdp_chart())
 
409
  with gr.Tab("📈 Scaling Law"):
410
  period_selector_2 = gr.Dropdown(label="Period", choices=time_list, value=last_period)
 
411
  def update_plot(period):
412
  new_fig = create_scaling_plot(all_data, period)
413
  return new_fig
 
414
  plot = gr.Plot(initial_fig)
415
  period_selector_2.change(update_plot, inputs=period_selector_2, outputs=plot)
 
416
  with gr.Tab("ℹ️ About"):
417
  gr.Markdown(read_about_md())
 
418
  with gr.Tab("🚀 Submit"):
419
  with gr.Group():
420
  with gr.Row():
 
422
  submit = gr.Button("Submit", variant="primary", scale=0)
423
  output = gr.Markdown("# Enter a public HF repo id, then hit Submit to add it to the evaluation queue.")
424
  submit.click(fn=submit_model, inputs=model_name, outputs=output)
425
+ demo.launch(share=False)