Spaces:
Running
Running
Commit
·
9702a67
1
Parent(s):
616bf01
support dark mode
Browse files
app.py
CHANGED
@@ -7,18 +7,17 @@ from dotenv import load_dotenv
|
|
7 |
from matplotlib.colors import LinearSegmentedColormap
|
8 |
import plotly.express as px
|
9 |
import plotly.graph_objects as go
|
10 |
-
from sklearn.linear_model import LinearRegression
|
11 |
import numpy as np
|
12 |
from huggingface_hub import HfApi
|
13 |
from huggingface_hub.hf_api import HTTPError
|
14 |
from huggingface_hub.utils import GatedRepoError
|
15 |
from gradio_rangeslider import RangeSlider
|
16 |
import datetime
|
17 |
-
|
18 |
|
19 |
load_dotenv()
|
20 |
webhook_url = os.environ.get("WEBHOOK_URL")
|
21 |
-
|
22 |
file_name_list = [
|
23 |
"14b",
|
24 |
"9b",
|
@@ -27,19 +26,16 @@ file_name_list = [
|
|
27 |
"1b5",
|
28 |
"other",
|
29 |
]
|
30 |
-
|
31 |
sheet_name_list = [
|
32 |
"cr",
|
33 |
"bpc",
|
34 |
"bpb",
|
35 |
]
|
36 |
-
|
37 |
metric_list = [
|
38 |
"Compression Rate (%)",
|
39 |
"Bits Per Character (BPC)",
|
40 |
"Bits Per Byte (BPB)",
|
41 |
]
|
42 |
-
|
43 |
model_size_list = [
|
44 |
"~14B",
|
45 |
"~9B",
|
@@ -48,13 +44,11 @@ model_size_list = [
|
|
48 |
"~1.5B",
|
49 |
"Other",
|
50 |
]
|
51 |
-
|
52 |
metric_to_sheet = {
|
53 |
"Compression Rate (%)": "cr",
|
54 |
"Bits Per Character (BPC)": "bpc",
|
55 |
"Bits Per Byte (BPB)": "bpb",
|
56 |
}
|
57 |
-
|
58 |
model_size_to_file_name = {
|
59 |
"~14B": "14b",
|
60 |
"~9B": "9b",
|
@@ -68,27 +62,21 @@ def read_about_md():
|
|
68 |
with open('about.md', 'r', encoding='utf-8') as f:
|
69 |
return f.read()
|
70 |
|
71 |
-
|
72 |
def rename_columns(df):
|
73 |
df.columns = [col.rsplit("_", maxsplit=1)[0] for col in df.columns]
|
74 |
return df
|
75 |
|
76 |
-
|
77 |
def get_folders_matching_format(directory):
|
78 |
pattern = re.compile(r"^\d{4}-\d{2}$")
|
79 |
folders = []
|
80 |
-
|
81 |
if not os.path.exists(directory):
|
82 |
return folders
|
83 |
-
|
84 |
for item in os.listdir(directory):
|
85 |
full_path = os.path.join(directory, item)
|
86 |
if os.path.isdir(full_path) and pattern.match(item):
|
87 |
folders.append(full_path)
|
88 |
-
|
89 |
return folders
|
90 |
|
91 |
-
|
92 |
def get_unique_column_names(data=None):
|
93 |
return [
|
94 |
"ao3_\u200benglish",
|
@@ -100,74 +88,63 @@ def get_unique_column_names(data=None):
|
|
100 |
"github_\u200bpython",
|
101 |
]
|
102 |
|
103 |
-
|
104 |
def color_cell(value):
|
105 |
return "background-color: #fffdd0" if pd.notna(value) else "default"
|
106 |
|
|
|
|
|
107 |
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
size_range:
|
115 |
-
midpoint: float = 0.5,
|
116 |
-
sort_by: str = "Average (lower=better)",
|
117 |
-
ascending: bool = True,
|
118 |
-
):
|
119 |
-
print(
|
120 |
-
f"Updating - time: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}, period: {period}, models: {models_size}, metric: {metric}, visible_columns: {visible_columns}, color_columns: {color_columns}, size_range: {size_range}, sort_by: {sort_by}, ascending: {ascending}\n"
|
121 |
-
)
|
122 |
|
123 |
if not models_size:
|
124 |
return "No data available for the selected models and period."
|
125 |
-
|
126 |
-
|
127 |
target_period_data = all_data[period]
|
128 |
target_file_name = [model_size_to_file_name[model] for model in models_size]
|
129 |
sheet_name = metric_to_sheet[metric]
|
130 |
-
|
131 |
-
|
132 |
-
combined_data = pd.concat(
|
133 |
-
[df.dropna(axis=1, how="all") for df in [target_period_data[file_name][sheet_name] for file_name in target_file_name]], axis=0
|
134 |
-
)
|
135 |
if len(combined_data) == 0:
|
136 |
return "No data available for the selected models and period."
|
137 |
-
|
138 |
-
|
139 |
-
# Filter models based on the size range
|
140 |
combined_data = combined_data[combined_data["Parameters Count (B)"].between(size_range[0], size_range[1])]
|
141 |
combined_data.reset_index(drop=True, inplace=True)
|
|
|
142 |
if len(combined_data) == 0:
|
143 |
return "No data available for the selected models and period."
|
144 |
-
|
145 |
-
|
146 |
combined_data["Name"] = combined_data["Name"].apply(lambda x: x.replace(".pth", ""))
|
147 |
-
|
148 |
ordered_columns = get_unique_column_names()
|
149 |
relevant_columns = [col for col in ordered_columns if col in visible_columns and col not in ["Name", "Parameters Count (B)", "Average (The lower the better)"]]
|
150 |
|
151 |
-
if len(combined_data) > 0:
|
152 |
combined_data["Average (The lower the better)"] = round(combined_data[relevant_columns].mean(axis=1), 3)
|
153 |
-
|
154 |
-
combined_data = combined_data.rename(columns={"Average (The lower the better)": "Average (lower=better)"})
|
155 |
sorted_data = combined_data.sort_values(by=sort_by, ascending=ascending)
|
156 |
-
|
157 |
-
filtered_data = sorted_data[
|
158 |
filtered_data.columns = [col.replace("_", " ") for col in filtered_data.columns]
|
159 |
-
|
160 |
formatter = {col: "{:.3f}" for col in filtered_data.columns if filtered_data[col].dtype in ["float64", "float32"]}
|
161 |
-
|
162 |
-
#
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
|
|
|
|
|
|
|
|
167 |
for column in filtered_data.columns:
|
168 |
-
if column in ["Name", "Params (B)"]:
|
169 |
-
|
170 |
-
col_values = filtered_data[column]
|
171 |
if len(col_values) > 1:
|
172 |
sorted_values = np.sort(col_values)
|
173 |
vmin[column] = sorted_values.min()
|
@@ -175,99 +152,64 @@ def update_table(
|
|
175 |
idx = int(len(sorted_values) * midpoint)
|
176 |
vmid[column] = sorted_values[idx]
|
177 |
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
def normalize(x):
|
183 |
-
if x
|
184 |
-
|
|
|
|
|
|
|
|
|
185 |
else:
|
186 |
-
return 0.5 + 0.5 * (x -
|
187 |
-
|
188 |
normed = series.apply(normalize)
|
189 |
-
|
190 |
-
|
|
|
|
|
|
|
|
|
191 |
|
192 |
target_color_columns = []
|
193 |
-
if "Average" in color_columns:
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
for column in target_color_columns:
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
vmax=vmax[column],
|
206 |
-
vmid=vmid[column],
|
207 |
-
subset=[column],
|
208 |
-
)
|
209 |
-
|
210 |
-
# return styler
|
211 |
styler = styler.hide(axis="index")
|
212 |
-
|
213 |
widths = [300, 150, 150, 100, 100, 100, 100, 100, 100, 100, 100]
|
|
|
214 |
table_styles = []
|
215 |
-
|
|
|
216 |
for i, w in enumerate(widths):
|
217 |
-
table_styles.append(
|
218 |
-
{
|
219 |
-
"selector": "th",
|
220 |
-
"props": [
|
221 |
-
("background-color", "#f5f5f5"),
|
222 |
-
("padding", "8px"),
|
223 |
-
("font-weight", "bold"),
|
224 |
-
],
|
225 |
-
}
|
226 |
-
)
|
227 |
-
table_styles.append(
|
228 |
-
{
|
229 |
-
"selector": f"th.col{i}",
|
230 |
-
"props": [
|
231 |
-
("min-width", f"{w}px"),
|
232 |
-
("max-width", f"{w}px"),
|
233 |
-
("text-align", "center"),
|
234 |
-
("border", "1px solid #dddddd"),
|
235 |
-
],
|
236 |
-
}
|
237 |
-
)
|
238 |
-
table_styles.append(
|
239 |
-
{
|
240 |
-
"selector": f"td.col{i}",
|
241 |
-
"props": [
|
242 |
-
("min-width", f"{w}px"),
|
243 |
-
("max-width", f"{w}px"),
|
244 |
-
("text-align", "center"),
|
245 |
-
("border", "1px solid #dddddd"),
|
246 |
-
],
|
247 |
-
}
|
248 |
-
)
|
249 |
-
|
250 |
-
table_styles.append(
|
251 |
-
{
|
252 |
-
"selector": "table",
|
253 |
-
"props": [
|
254 |
-
("border-collapse", "collapse"),
|
255 |
-
("border", "1px solid #dddddd"),
|
256 |
-
],
|
257 |
-
}
|
258 |
-
)
|
259 |
-
|
260 |
styler = styler.set_table_styles(table_styles)
|
261 |
-
|
262 |
-
html_output = styler.to_html()
|
263 |
-
return html_output
|
264 |
-
|
265 |
|
266 |
def create_world_languages_gdp_chart():
|
267 |
languages = ["English", "Chinese", "Spanish", "Japanese", "German", "French", "Arabic", "Italian", "Portuguese", "Korean", "Other"]
|
268 |
shares = [27, 18, 8, 6, 5, 4, 3, 2, 2, 2, 23]
|
269 |
colors = ["#FF7F7F", "#FFA07A", "#FFDB58", "#90EE90", "#98FB98", "#87CEFA", "#B0C4DE", "#DDA0DD", "#D8BFD8", "#F0E68C", "#E0FFFF"]
|
270 |
-
|
271 |
fig = go.Figure(
|
272 |
data=[
|
273 |
go.Pie(
|
@@ -282,7 +224,6 @@ def create_world_languages_gdp_chart():
|
|
282 |
)
|
283 |
]
|
284 |
)
|
285 |
-
|
286 |
fig.update_layout(
|
287 |
title={
|
288 |
"text": "World Languages by Share of Global GDP",
|
@@ -297,10 +238,8 @@ def create_world_languages_gdp_chart():
|
|
297 |
height=500,
|
298 |
margin=dict(t=80, b=20, l=20, r=20),
|
299 |
)
|
300 |
-
|
301 |
return fig
|
302 |
|
303 |
-
|
304 |
def check_model_exists(model_id):
|
305 |
api = HfApi()
|
306 |
try:
|
@@ -314,11 +253,9 @@ def check_model_exists(model_id):
|
|
314 |
else:
|
315 |
return "Error: " + str(e)
|
316 |
|
317 |
-
|
318 |
def submit_model(name):
|
319 |
if "Exists" not in check_model_exists(name):
|
320 |
return f"# ERROR: Model {name} does not exist on Hugging Face!"
|
321 |
-
|
322 |
try:
|
323 |
response = requests.post(webhook_url, json={"content": name})
|
324 |
if response.status_code == 200:
|
@@ -334,131 +271,20 @@ def submit_model(name):
|
|
334 |
except Exception as e:
|
335 |
print(e)
|
336 |
return "ERROR: Unexpected error. Please try again later."
|
337 |
-
|
338 |
-
|
339 |
-
# def create_scaling_plot(all_data, period):
|
340 |
-
# selected_columns = ["Name", "Parameters Count (B)", "Average (The lower the better)"]
|
341 |
-
# target_data = all_data[period]
|
342 |
-
# new_df = pd.DataFrame()
|
343 |
-
|
344 |
-
# for size in target_data.keys():
|
345 |
-
# new_df = pd.concat([new_df, target_data[size]["cr"].loc[:, selected_columns].dropna(axis=1, how="all")], axis=0)
|
346 |
-
|
347 |
-
# new_df.rename(columns={"Parameters Count (B)": "Params(B)", "Average (The lower the better)": "Compression Rate (%)"}, inplace=True)
|
348 |
-
|
349 |
-
# new_df["Log Params(B)"] = np.log(new_df["Params(B)"])
|
350 |
-
# new_df["Log Compression Rate (%)"] = np.log(new_df["Compression Rate (%)"])
|
351 |
-
|
352 |
-
# fig = px.scatter(
|
353 |
-
# new_df,
|
354 |
-
# x="Log Params(B)",
|
355 |
-
# y="Log Compression Rate (%)",
|
356 |
-
# title="Compression Rate Scaling Law",
|
357 |
-
# hover_name="Name",
|
358 |
-
# custom_data=["Params(B)", "Compression Rate (%)"],
|
359 |
-
# )
|
360 |
-
|
361 |
-
# fig.update_traces(
|
362 |
-
# hovertemplate="<b>%{hovertext}</b><br>Params(B): %{customdata[0]:.2f} B<br>Compression Rate (%): %{customdata[1]:.2f}<extra></extra>"
|
363 |
-
# )
|
364 |
-
# fig.update_layout(
|
365 |
-
# width=800, # 设置图像宽度
|
366 |
-
# height=600, # 设置图像高度
|
367 |
-
# title={"text": "Compression Rate Scaling Law", "x": 0.5, "xanchor": "center", "yanchor": "top"},
|
368 |
-
# showlegend=True,
|
369 |
-
# xaxis={"showgrid": True, "zeroline": False, "type": "linear", "title": "Params(B)"}, # 确保坐标轴类型正确
|
370 |
-
# yaxis={"showgrid": True, "zeroline": False, "type": "linear", "title": "Compression Rate (%)", "autorange": "reversed"},
|
371 |
-
# )
|
372 |
-
|
373 |
-
# names_to_connect_dict = {
|
374 |
-
# "2024-05": ["Meta-Llama-3-8B", "stablelm-3b-4e1t", "Qwen2-1.5B", "TinyLlama-1.1B-intermediate-step-1431k-3T", "Mistral-Nemo-Base-2407"],
|
375 |
-
# "2024-06": ["Meta-Llama-3-8B", "stablelm-3b-4e1t", "Qwen2-1.5B", "TinyLlama-1.1B-intermediate-step-1431k-3T", "Mistral-Nemo-Base-2407"],
|
376 |
-
# "2024-07": ["Meta-Llama-3.1-8B", "stablelm-3b-4e1t", "Qwen2-1.5B", "TinyLlama-1.1B-intermediate-step-1431k-3T", "Mistral-Nemo-Base-2407"],
|
377 |
-
# "2024-08": [
|
378 |
-
# "Meta-Llama-3.1-8B",
|
379 |
-
# "Rene-v0.1-1.3b-pytorch",
|
380 |
-
# "stablelm-3b-4e1t",
|
381 |
-
# "Qwen2-1.5B",
|
382 |
-
# "TinyLlama-1.1B-intermediate-step-1431k-3T",
|
383 |
-
# "Mistral-Nemo-Base-2407",
|
384 |
-
# ],
|
385 |
-
# "2025-01": ["Qwen2.5-1.5B"],
|
386 |
-
# }
|
387 |
-
|
388 |
-
# names_to_connect = names_to_connect_dict.get(period, names_to_connect_dict["2024-08"])
|
389 |
-
|
390 |
-
# connection_points = new_df[new_df["Name"].isin(names_to_connect)]
|
391 |
-
# print(connection_points)
|
392 |
-
|
393 |
-
# new_df["Color"] = new_df["Name"].apply(lambda name: "#39C5BB" if name in names_to_connect else "#636efa")
|
394 |
-
|
395 |
-
# fig.update_traces(marker=dict(color=new_df["Color"]))
|
396 |
-
|
397 |
-
# X = connection_points["Log Params(B)"].values.reshape(-1, 1)
|
398 |
-
# y = connection_points["Log Compression Rate (%)"].values
|
399 |
-
# model = LinearRegression().fit(X, y)
|
400 |
-
|
401 |
-
# x_min = connection_points["Log Params(B)"].min()
|
402 |
-
# x_max = connection_points["Log Params(B)"].max()
|
403 |
-
# extended_x = np.linspace(x_min, x_max * 1.5, 100)
|
404 |
-
# extended_x_original = np.exp(extended_x)
|
405 |
-
# trend_line_y = model.predict(extended_x.reshape(-1, 1))
|
406 |
-
# trend_line_y_original = np.exp(trend_line_y)
|
407 |
-
|
408 |
-
# trend_line = go.Scatter(
|
409 |
-
# x=extended_x,
|
410 |
-
# y=trend_line_y,
|
411 |
-
# mode="lines",
|
412 |
-
# line=dict(color="skyblue", dash="dash"),
|
413 |
-
# name="Trend Line",
|
414 |
-
# hovertemplate="<b>Params(B):</b> %{customdata[0]:.2f}<br>" + "<b>Compression Rate (%):</b> %{customdata[1]:.2f}<extra></extra>",
|
415 |
-
# customdata=np.stack((extended_x_original, trend_line_y_original), axis=-1),
|
416 |
-
# )
|
417 |
-
|
418 |
-
# fig.add_trace(trend_line)
|
419 |
-
|
420 |
-
# x_min = new_df["Params(B)"].min()
|
421 |
-
# x_max = new_df["Params(B)"].max()
|
422 |
-
# x_tick_vals = np.geomspace(x_min, x_max, num=5)
|
423 |
-
# x_tick_text = [f"{val:.1f}" for val in x_tick_vals]
|
424 |
-
|
425 |
-
# y_min = new_df["Compression Rate (%)"].min()
|
426 |
-
# y_max = new_df["Compression Rate (%)"].max()
|
427 |
-
# y_tick_vals = np.geomspace(y_min, y_max, num=5)
|
428 |
-
# y_tick_text = [f"{val:.1f}" for val in y_tick_vals]
|
429 |
-
|
430 |
-
# fig.update_xaxes(tickvals=np.log(x_tick_vals), ticktext=x_tick_text, title="Params(B)")
|
431 |
-
# fig.update_yaxes(tickvals=np.log(y_tick_vals), ticktext=y_tick_text, title="Compression Rate (%)", autorange="reversed")
|
432 |
-
|
433 |
-
# fig.update_layout(xaxis=dict(showgrid=True, zeroline=False), yaxis=dict(showgrid=True, zeroline=False))
|
434 |
-
|
435 |
-
# fig.update_traces(marker=dict(size=12))
|
436 |
-
|
437 |
-
# print(fig.layout)
|
438 |
-
|
439 |
-
# return fig
|
440 |
-
|
441 |
-
|
442 |
def create_scaling_plot(all_data, period):
|
443 |
selected_columns = ["Name", "Parameters Count (B)", "Average (The lower the better)"]
|
444 |
target_data = all_data[period]
|
445 |
new_df = pd.DataFrame()
|
446 |
-
|
447 |
for size in target_data.keys():
|
448 |
new_df = pd.concat([new_df, target_data[size]["cr"].loc[:, selected_columns].dropna(axis=1, how="all")], axis=0)
|
449 |
-
|
450 |
x_values = new_df["Parameters Count (B)"].astype(float).tolist()
|
451 |
y_values = new_df["Average (The lower the better)"].astype(float).tolist()
|
452 |
names = new_df["Name"].tolist()
|
453 |
-
|
454 |
x_min, x_max = np.log10(min(x_values)), np.log10(max(x_values))
|
455 |
y_min, y_max = np.log10(min(y_values)), np.log10(max(y_values))
|
456 |
-
|
457 |
x_dtick = (x_max - x_min) / 4
|
458 |
y_dtick = (y_max - y_min) / 4
|
459 |
-
|
460 |
fig = go.Figure()
|
461 |
-
|
462 |
fig.add_trace(
|
463 |
go.Scatter(
|
464 |
x=x_values,
|
@@ -473,7 +299,6 @@ def create_scaling_plot(all_data, period):
|
|
473 |
),
|
474 |
)
|
475 |
)
|
476 |
-
|
477 |
fig.update_layout(
|
478 |
title={"text": "Compression Rate Scaling Law", "x": 0.5, "xanchor": "center", "yanchor": "top"},
|
479 |
width=800,
|
@@ -499,10 +324,8 @@ def create_scaling_plot(all_data, period):
|
|
499 |
autorange="reversed",
|
500 |
),
|
501 |
)
|
502 |
-
|
503 |
return fig
|
504 |
|
505 |
-
|
506 |
def read_all_data(folder_name):
|
507 |
all_data = {}
|
508 |
time_list = []
|
@@ -517,37 +340,18 @@ def read_all_data(folder_name):
|
|
517 |
for sheet_name in sheet_name_list:
|
518 |
final_file_name = os.path.join(folder, file_name)
|
519 |
all_data[folder_name][file_name][sheet_name] = rename_columns(pd.read_excel(final_file_name + ".xlsx", sheet_name=sheet_name))
|
520 |
-
|
521 |
return all_data, time_list
|
522 |
|
523 |
-
|
524 |
-
# def read_mutilange_data(folder_path='mutilang_data'):
|
525 |
-
# mutilange_data = {}
|
526 |
-
# excel_files = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('.xlsx')]
|
527 |
-
# time_list = [file.split('.')[0] for file in excel_files]
|
528 |
-
# time_list = [x.split('\\')[-1] for x in time_list]
|
529 |
-
# for file_name in excel_files:
|
530 |
-
# if mutilange_data.get(file_name) is None:
|
531 |
-
# mutilange_data[file_name] = {}
|
532 |
-
# for sheet_name in sheet_name_list:
|
533 |
-
# mutilange_data[file_name][sheet_name] = rename_columns(
|
534 |
-
# pd.read_excel(file_name, sheet_name=sheet_name))
|
535 |
-
# return mutilange_data, time_list
|
536 |
-
|
537 |
-
|
538 |
all_data, time_list = read_all_data("data")
|
539 |
-
# muti_lang_data, muti_lang_time_list = read_mutilange_data()
|
540 |
-
|
541 |
time_list.sort()
|
542 |
last_period = time_list[-1]
|
543 |
-
|
544 |
initial_fig = create_scaling_plot(all_data, last_period)
|
545 |
initial_metric = metric_list[0]
|
546 |
initial_columns = get_unique_column_names(all_data)
|
547 |
initial_colors = ["Average", "Individual Tests"]
|
548 |
initial_size_range = [0, 40]
|
|
|
549 |
initial_data = update_table(last_period, model_size_list, initial_metric, initial_columns, initial_colors, initial_size_range)
|
550 |
-
|
551 |
css = """
|
552 |
.gradio-container {
|
553 |
max-width: 95% !important;
|
@@ -566,11 +370,11 @@ table {
|
|
566 |
width: 100% !important;
|
567 |
}
|
568 |
"""
|
569 |
-
|
570 |
TITLE_HTML = '<h1 style="text-align:center"><span style="font-size:1.3em">🏆 LLM Compression Leaderboard</span></h1>'
|
571 |
SUBTITLE_HTML = "<h1 style='text-align:center'><span style='font-size:0.8em'>Welcome to Uncheatable Eval LLM Compression Leaderboard, where fancy fine-tuning and cheating won't work 🚫; only compute 💻, data 📊, and real innovation 🔥 can prevail!</span></h1>"
|
572 |
-
|
573 |
-
|
|
|
574 |
gr.HTML(TITLE_HTML)
|
575 |
gr.HTML(SUBTITLE_HTML)
|
576 |
with gr.Tabs() as tabs:
|
@@ -585,62 +389,32 @@ with gr.Blocks(css=css) as demo:
|
|
585 |
midpoint_slider = gr.Slider(minimum=0.1, maximum=0.9, value=0.5, step=0.01, label="Color Gradient Midpoint")
|
586 |
color_selector = gr.CheckboxGroup(label="Colored Columns", choices=["Average", "Individual Tests"], value=initial_colors)
|
587 |
colfilter = gr.CheckboxGroup(label="Data Source", choices=get_unique_column_names(all_data), value=initial_columns)
|
588 |
-
|
589 |
table = gr.HTML(initial_data)
|
590 |
-
|
591 |
-
|
592 |
-
|
593 |
-
|
594 |
-
|
595 |
-
)
|
596 |
-
model_selector.change(
|
597 |
-
|
598 |
-
|
599 |
-
|
600 |
-
)
|
601 |
-
|
602 |
-
|
603 |
-
inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider, midpoint_slider],
|
604 |
-
outputs=table,
|
605 |
-
)
|
606 |
-
colfilter.change(
|
607 |
-
update_table,
|
608 |
-
inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider, midpoint_slider],
|
609 |
-
outputs=table,
|
610 |
-
)
|
611 |
-
color_selector.change(
|
612 |
-
update_table,
|
613 |
-
inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider, midpoint_slider],
|
614 |
-
outputs=table,
|
615 |
-
)
|
616 |
-
size_range_slider.change(
|
617 |
-
update_table,
|
618 |
-
inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider, midpoint_slider],
|
619 |
-
outputs=table,
|
620 |
-
)
|
621 |
-
midpoint_slider.change(
|
622 |
-
update_table,
|
623 |
-
inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider, midpoint_slider],
|
624 |
-
outputs=table,
|
625 |
-
)
|
626 |
-
|
627 |
with gr.Tab("🌍 MultiLang"):
|
628 |
gr.Markdown("## Coming soon...")
|
629 |
-
world_languages_plot = gr.Plot(create_world_languages_gdp_chart())
|
630 |
-
|
631 |
with gr.Tab("📈 Scaling Law"):
|
632 |
period_selector_2 = gr.Dropdown(label="Period", choices=time_list, value=last_period)
|
633 |
-
|
634 |
def update_plot(period):
|
635 |
new_fig = create_scaling_plot(all_data, period)
|
636 |
return new_fig
|
637 |
-
|
638 |
plot = gr.Plot(initial_fig)
|
639 |
period_selector_2.change(update_plot, inputs=period_selector_2, outputs=plot)
|
640 |
-
|
641 |
with gr.Tab("ℹ️ About"):
|
642 |
gr.Markdown(read_about_md())
|
643 |
-
|
644 |
with gr.Tab("🚀 Submit"):
|
645 |
with gr.Group():
|
646 |
with gr.Row():
|
@@ -648,5 +422,4 @@ with gr.Blocks(css=css) as demo:
|
|
648 |
submit = gr.Button("Submit", variant="primary", scale=0)
|
649 |
output = gr.Markdown("# Enter a public HF repo id, then hit Submit to add it to the evaluation queue.")
|
650 |
submit.click(fn=submit_model, inputs=model_name, outputs=output)
|
651 |
-
|
652 |
-
demo.launch(share=False)
|
|
|
7 |
from matplotlib.colors import LinearSegmentedColormap
|
8 |
import plotly.express as px
|
9 |
import plotly.graph_objects as go
|
10 |
+
# from sklearn.linear_model import LinearRegression
|
11 |
import numpy as np
|
12 |
from huggingface_hub import HfApi
|
13 |
from huggingface_hub.hf_api import HTTPError
|
14 |
from huggingface_hub.utils import GatedRepoError
|
15 |
from gradio_rangeslider import RangeSlider
|
16 |
import datetime
|
17 |
+
from gradio.themes.utils.colors import slate
|
18 |
|
19 |
load_dotenv()
|
20 |
webhook_url = os.environ.get("WEBHOOK_URL")
|
|
|
21 |
file_name_list = [
|
22 |
"14b",
|
23 |
"9b",
|
|
|
26 |
"1b5",
|
27 |
"other",
|
28 |
]
|
|
|
29 |
sheet_name_list = [
|
30 |
"cr",
|
31 |
"bpc",
|
32 |
"bpb",
|
33 |
]
|
|
|
34 |
metric_list = [
|
35 |
"Compression Rate (%)",
|
36 |
"Bits Per Character (BPC)",
|
37 |
"Bits Per Byte (BPB)",
|
38 |
]
|
|
|
39 |
model_size_list = [
|
40 |
"~14B",
|
41 |
"~9B",
|
|
|
44 |
"~1.5B",
|
45 |
"Other",
|
46 |
]
|
|
|
47 |
metric_to_sheet = {
|
48 |
"Compression Rate (%)": "cr",
|
49 |
"Bits Per Character (BPC)": "bpc",
|
50 |
"Bits Per Byte (BPB)": "bpb",
|
51 |
}
|
|
|
52 |
model_size_to_file_name = {
|
53 |
"~14B": "14b",
|
54 |
"~9B": "9b",
|
|
|
62 |
with open('about.md', 'r', encoding='utf-8') as f:
|
63 |
return f.read()
|
64 |
|
|
|
65 |
def rename_columns(df):
|
66 |
df.columns = [col.rsplit("_", maxsplit=1)[0] for col in df.columns]
|
67 |
return df
|
68 |
|
|
|
69 |
def get_folders_matching_format(directory):
|
70 |
pattern = re.compile(r"^\d{4}-\d{2}$")
|
71 |
folders = []
|
|
|
72 |
if not os.path.exists(directory):
|
73 |
return folders
|
|
|
74 |
for item in os.listdir(directory):
|
75 |
full_path = os.path.join(directory, item)
|
76 |
if os.path.isdir(full_path) and pattern.match(item):
|
77 |
folders.append(full_path)
|
|
|
78 |
return folders
|
79 |
|
|
|
80 |
def get_unique_column_names(data=None):
|
81 |
return [
|
82 |
"ao3_\u200benglish",
|
|
|
88 |
"github_\u200bpython",
|
89 |
]
|
90 |
|
|
|
91 |
def color_cell(value):
|
92 |
return "background-color: #fffdd0" if pd.notna(value) else "default"
|
93 |
|
94 |
+
# def color_cell_themed(value):
|
95 |
+
# return "background-color: rgba(255, 253, 208, 1.0)" if pd.notna(value) else "default"
|
96 |
|
97 |
+
# --- 核心改动点 1: 修改 update_table 函数 ---
|
98 |
+
# 添加 request: gr.Request = None 参数来接收主题模式信息
|
99 |
+
# 默认值为 None 是为了处理初始加载
|
100 |
+
def update_table(period: str, models_size: list, metric: str, visible_columns: list, color_columns: list, size_range: list, midpoint: float = 0.5, sort_by: str = "Average (lower=better)", ascending: bool = True, request: gr.Request = None):
|
101 |
+
# 打印日志并检查当前模式
|
102 |
+
is_dark_mode = request.is_dark if request else False
|
103 |
+
print(f"Updating - time: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}, period: {period}, models: {models_size}, metric: {metric}, visible_columns: {visible_columns}, color_columns: {color_columns}, size_range: {size_range}, sort_by: {sort_by}, ascending: {ascending}, is_dark: {is_dark_mode}\n")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
|
105 |
if not models_size:
|
106 |
return "No data available for the selected models and period."
|
107 |
+
|
|
|
108 |
target_period_data = all_data[period]
|
109 |
target_file_name = [model_size_to_file_name[model] for model in models_size]
|
110 |
sheet_name = metric_to_sheet[metric]
|
111 |
+
combined_data = pd.concat([df.dropna(axis=1, how="all") for df in [target_period_data[file_name][sheet_name] for file_name in target_file_name]], axis=0)
|
112 |
+
|
|
|
|
|
|
|
113 |
if len(combined_data) == 0:
|
114 |
return "No data available for the selected models and period."
|
115 |
+
|
|
|
|
|
116 |
combined_data = combined_data[combined_data["Parameters Count (B)"].between(size_range[0], size_range[1])]
|
117 |
combined_data.reset_index(drop=True, inplace=True)
|
118 |
+
|
119 |
if len(combined_data) == 0:
|
120 |
return "No data available for the selected models and period."
|
121 |
+
|
|
|
122 |
combined_data["Name"] = combined_data["Name"].apply(lambda x: x.replace(".pth", ""))
|
|
|
123 |
ordered_columns = get_unique_column_names()
|
124 |
relevant_columns = [col for col in ordered_columns if col in visible_columns and col not in ["Name", "Parameters Count (B)", "Average (The lower the better)"]]
|
125 |
|
126 |
+
if len(combined_data) > 0 and relevant_columns:
|
127 |
combined_data["Average (The lower the better)"] = round(combined_data[relevant_columns].mean(axis=1), 3)
|
128 |
+
|
129 |
+
combined_data = combined_data.rename(columns={"Parameters Count (B)": "Params (B)", "Average (The lower the better)": "Average (lower=better)"})
|
130 |
sorted_data = combined_data.sort_values(by=sort_by, ascending=ascending)
|
131 |
+
visible_columns_final = ["Name", "Params (B)", "Average (lower=better)"] + relevant_columns
|
132 |
+
filtered_data = sorted_data[visible_columns_final]
|
133 |
filtered_data.columns = [col.replace("_", " ") for col in filtered_data.columns]
|
|
|
134 |
formatter = {col: "{:.3f}" for col in filtered_data.columns if filtered_data[col].dtype in ["float64", "float32"]}
|
135 |
+
|
136 |
+
# --- 核心改动点 2: 根据主题模式选择不同的配色方案 ---
|
137 |
+
if is_dark_mode:
|
138 |
+
# 夜间模式配色 (绿 -> 深灰 -> 红)
|
139 |
+
colors = ["#2ca02c", "#2b2b2b", "#d62728"]
|
140 |
+
else:
|
141 |
+
# 日间模式配色 (绿 -> 白 -> 红)
|
142 |
+
colors = ["#63be7b", "#ffffff", "#f8696b"]
|
143 |
+
|
144 |
+
vmin, vmax, vmid = {}, {}, {}
|
145 |
for column in filtered_data.columns:
|
146 |
+
if column in ["Name", "Params (B)"]: continue
|
147 |
+
col_values = filtered_data[column].dropna()
|
|
|
148 |
if len(col_values) > 1:
|
149 |
sorted_values = np.sort(col_values)
|
150 |
vmin[column] = sorted_values.min()
|
|
|
152 |
idx = int(len(sorted_values) * midpoint)
|
153 |
vmid[column] = sorted_values[idx]
|
154 |
|
155 |
+
# --- 核心改动点 3: 修改样式函数以包含固定的黑色字体 ---
|
156 |
+
def custom_background_gradient(series, cmap, vmin_val, vmax_val, vmid_val):
|
157 |
+
if len(series) == 0: return series
|
|
|
158 |
def normalize(x):
|
159 |
+
if pd.isna(x): return 0.5 # Neutral for NaN
|
160 |
+
if vmid_val == vmin_val and x <= vmid_val: return 0.0
|
161 |
+
if vmid_val == vmax_val and x >= vmid_val: return 1.0
|
162 |
+
if vmid_val == vmin_val or vmid_val == vmax_val: return 0.5
|
163 |
+
if x <= vmid_val:
|
164 |
+
return 0.5 * (x - vmin_val) / (vmid_val - vmin_val)
|
165 |
else:
|
166 |
+
return 0.5 + 0.5 * (x - vmid_val) / (vmax_val - vmid_val)
|
|
|
167 |
normed = series.apply(normalize)
|
168 |
+
cmap_colors = [cmap(x) for x in normed]
|
169 |
+
# 在返回的CSS中同时设置 background-color 和 color
|
170 |
+
return [
|
171 |
+
"background-color: rgba({}, {}, {}, {}); color: black;".format(*[int(255 * c) for c in color[:3]], color[3])
|
172 |
+
for color in cmap_colors
|
173 |
+
]
|
174 |
|
175 |
target_color_columns = []
|
176 |
+
if "Average" in color_columns: target_color_columns.append("Average (lower=better)")
|
177 |
+
if "Individual Tests" in color_columns: target_color_columns.extend([col for col in filtered_data.columns if col not in ["Name", "Params (B)", "Average (lower=better)"]])
|
178 |
+
|
179 |
+
def color_params_column_dynamic(value):
|
180 |
+
if not pd.notna(value):
|
181 |
+
return "default"
|
182 |
+
|
183 |
+
# 2. 根据 is_dark_mode 返回不同的颜色
|
184 |
+
if is_dark_mode:
|
185 |
+
# 为夜间模式选择一个柔和、不刺眼的暗金色
|
186 |
+
# 字体颜色也设置为浅色以保证对比度
|
187 |
+
return "background-color: #4b4936; color: #f0f0f0;"
|
188 |
+
else:
|
189 |
+
# 为日间模式使用明亮的奶油色,字体为黑色
|
190 |
+
return "background-color: #fffdd0; color: black;"
|
191 |
+
|
192 |
+
styler = filtered_data.style.format(formatter).map(color_params_column_dynamic, subset=["Params (B)"])
|
193 |
for column in target_color_columns:
|
194 |
+
if column in vmin:
|
195 |
+
custom_cmap = LinearSegmentedColormap.from_list("custom_cmap", colors)
|
196 |
+
styler = styler.apply(custom_background_gradient, cmap=custom_cmap, vmin_val=vmin[column], vmax_val=vmax[column], vmid_val=vmid[column], subset=[column])
|
197 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
198 |
styler = styler.hide(axis="index")
|
|
|
199 |
widths = [300, 150, 150, 100, 100, 100, 100, 100, 100, 100, 100]
|
200 |
+
|
201 |
table_styles = []
|
202 |
+
table_styles.append({"selector": "th", "props": [("background-color", "var(--background-fill-secondary)"), ("color", "var(--body-text-color)"), ("padding", "8px"), ("font-weight", "bold")]})
|
203 |
+
table_styles.append({"selector": "table", "props": [("border-collapse", "collapse"), ("border", f"1px solid var(--border-color-primary)")]})
|
204 |
for i, w in enumerate(widths):
|
205 |
+
table_styles.append({"selector": f"th.col{i}, td.col{i}", "props": [("min-width", f"{w}px"), ("max-width", f"{w}px"), ("text-align", "center"), ("border", f"1px solid var(--border-color-primary)")]})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
styler = styler.set_table_styles(table_styles)
|
207 |
+
return styler.to_html()
|
|
|
|
|
|
|
208 |
|
209 |
def create_world_languages_gdp_chart():
|
210 |
languages = ["English", "Chinese", "Spanish", "Japanese", "German", "French", "Arabic", "Italian", "Portuguese", "Korean", "Other"]
|
211 |
shares = [27, 18, 8, 6, 5, 4, 3, 2, 2, 2, 23]
|
212 |
colors = ["#FF7F7F", "#FFA07A", "#FFDB58", "#90EE90", "#98FB98", "#87CEFA", "#B0C4DE", "#DDA0DD", "#D8BFD8", "#F0E68C", "#E0FFFF"]
|
|
|
213 |
fig = go.Figure(
|
214 |
data=[
|
215 |
go.Pie(
|
|
|
224 |
)
|
225 |
]
|
226 |
)
|
|
|
227 |
fig.update_layout(
|
228 |
title={
|
229 |
"text": "World Languages by Share of Global GDP",
|
|
|
238 |
height=500,
|
239 |
margin=dict(t=80, b=20, l=20, r=20),
|
240 |
)
|
|
|
241 |
return fig
|
242 |
|
|
|
243 |
def check_model_exists(model_id):
|
244 |
api = HfApi()
|
245 |
try:
|
|
|
253 |
else:
|
254 |
return "Error: " + str(e)
|
255 |
|
|
|
256 |
def submit_model(name):
|
257 |
if "Exists" not in check_model_exists(name):
|
258 |
return f"# ERROR: Model {name} does not exist on Hugging Face!"
|
|
|
259 |
try:
|
260 |
response = requests.post(webhook_url, json={"content": name})
|
261 |
if response.status_code == 200:
|
|
|
271 |
except Exception as e:
|
272 |
print(e)
|
273 |
return "ERROR: Unexpected error. Please try again later."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
def create_scaling_plot(all_data, period):
|
275 |
selected_columns = ["Name", "Parameters Count (B)", "Average (The lower the better)"]
|
276 |
target_data = all_data[period]
|
277 |
new_df = pd.DataFrame()
|
|
|
278 |
for size in target_data.keys():
|
279 |
new_df = pd.concat([new_df, target_data[size]["cr"].loc[:, selected_columns].dropna(axis=1, how="all")], axis=0)
|
|
|
280 |
x_values = new_df["Parameters Count (B)"].astype(float).tolist()
|
281 |
y_values = new_df["Average (The lower the better)"].astype(float).tolist()
|
282 |
names = new_df["Name"].tolist()
|
|
|
283 |
x_min, x_max = np.log10(min(x_values)), np.log10(max(x_values))
|
284 |
y_min, y_max = np.log10(min(y_values)), np.log10(max(y_values))
|
|
|
285 |
x_dtick = (x_max - x_min) / 4
|
286 |
y_dtick = (y_max - y_min) / 4
|
|
|
287 |
fig = go.Figure()
|
|
|
288 |
fig.add_trace(
|
289 |
go.Scatter(
|
290 |
x=x_values,
|
|
|
299 |
),
|
300 |
)
|
301 |
)
|
|
|
302 |
fig.update_layout(
|
303 |
title={"text": "Compression Rate Scaling Law", "x": 0.5, "xanchor": "center", "yanchor": "top"},
|
304 |
width=800,
|
|
|
324 |
autorange="reversed",
|
325 |
),
|
326 |
)
|
|
|
327 |
return fig
|
328 |
|
|
|
329 |
def read_all_data(folder_name):
|
330 |
all_data = {}
|
331 |
time_list = []
|
|
|
340 |
for sheet_name in sheet_name_list:
|
341 |
final_file_name = os.path.join(folder, file_name)
|
342 |
all_data[folder_name][file_name][sheet_name] = rename_columns(pd.read_excel(final_file_name + ".xlsx", sheet_name=sheet_name))
|
|
|
343 |
return all_data, time_list
|
344 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
345 |
all_data, time_list = read_all_data("data")
|
|
|
|
|
346 |
time_list.sort()
|
347 |
last_period = time_list[-1]
|
|
|
348 |
initial_fig = create_scaling_plot(all_data, last_period)
|
349 |
initial_metric = metric_list[0]
|
350 |
initial_columns = get_unique_column_names(all_data)
|
351 |
initial_colors = ["Average", "Individual Tests"]
|
352 |
initial_size_range = [0, 40]
|
353 |
+
# 初始调用 update_table 时,request 参数将为默认的 None
|
354 |
initial_data = update_table(last_period, model_size_list, initial_metric, initial_columns, initial_colors, initial_size_range)
|
|
|
355 |
css = """
|
356 |
.gradio-container {
|
357 |
max-width: 95% !important;
|
|
|
370 |
width: 100% !important;
|
371 |
}
|
372 |
"""
|
|
|
373 |
TITLE_HTML = '<h1 style="text-align:center"><span style="font-size:1.3em">🏆 LLM Compression Leaderboard</span></h1>'
|
374 |
SUBTITLE_HTML = "<h1 style='text-align:center'><span style='font-size:0.8em'>Welcome to Uncheatable Eval LLM Compression Leaderboard, where fancy fine-tuning and cheating won't work 🚫; only compute 💻, data 📊, and real innovation 🔥 can prevail!</span></h1>"
|
375 |
+
# theme = gr.themes.Default(primary_hue=slate, secondary_hue=slate)
|
376 |
+
theme = gr.themes.Default()
|
377 |
+
with gr.Blocks(theme=theme, css=css) as demo:
|
378 |
gr.HTML(TITLE_HTML)
|
379 |
gr.HTML(SUBTITLE_HTML)
|
380 |
with gr.Tabs() as tabs:
|
|
|
389 |
midpoint_slider = gr.Slider(minimum=0.1, maximum=0.9, value=0.5, step=0.01, label="Color Gradient Midpoint")
|
390 |
color_selector = gr.CheckboxGroup(label="Colored Columns", choices=["Average", "Individual Tests"], value=initial_colors)
|
391 |
colfilter = gr.CheckboxGroup(label="Data Source", choices=get_unique_column_names(all_data), value=initial_columns)
|
|
|
392 |
table = gr.HTML(initial_data)
|
393 |
+
|
394 |
+
# --- 核心改动点 4: 更新所有 .change() 事件,添加 gr.Request() ---
|
395 |
+
# 定义共享的输入列表,避免重复
|
396 |
+
shared_inputs = [period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider, midpoint_slider]
|
397 |
+
|
398 |
+
period_selector.change(update_table, inputs=shared_inputs, outputs=table)
|
399 |
+
model_selector.change(update_table, inputs=shared_inputs, outputs=table)
|
400 |
+
metric_selector.change(update_table, inputs=shared_inputs, outputs=table)
|
401 |
+
colfilter.change(update_table, inputs=shared_inputs, outputs=table)
|
402 |
+
color_selector.change(update_table, inputs=shared_inputs, outputs=table)
|
403 |
+
size_range_slider.change(update_table, inputs=shared_inputs, outputs=table)
|
404 |
+
midpoint_slider.change(update_table, inputs=shared_inputs, outputs=table)
|
405 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
406 |
with gr.Tab("🌍 MultiLang"):
|
407 |
gr.Markdown("## Coming soon...")
|
408 |
+
# world_languages_plot = gr.Plot(create_world_languages_gdp_chart())
|
|
|
409 |
with gr.Tab("📈 Scaling Law"):
|
410 |
period_selector_2 = gr.Dropdown(label="Period", choices=time_list, value=last_period)
|
|
|
411 |
def update_plot(period):
|
412 |
new_fig = create_scaling_plot(all_data, period)
|
413 |
return new_fig
|
|
|
414 |
plot = gr.Plot(initial_fig)
|
415 |
period_selector_2.change(update_plot, inputs=period_selector_2, outputs=plot)
|
|
|
416 |
with gr.Tab("ℹ️ About"):
|
417 |
gr.Markdown(read_about_md())
|
|
|
418 |
with gr.Tab("🚀 Submit"):
|
419 |
with gr.Group():
|
420 |
with gr.Row():
|
|
|
422 |
submit = gr.Button("Submit", variant="primary", scale=0)
|
423 |
output = gr.Markdown("# Enter a public HF repo id, then hit Submit to add it to the evaluation queue.")
|
424 |
submit.click(fn=submit_model, inputs=model_name, outputs=output)
|
425 |
+
demo.launch(share=False)
|
|