Spaces:
Running
Running
BenchmarkBot
commited on
Commit
Β·
ec9f1c7
1
Parent(s):
271d809
new runs
Browse files
app.py
CHANGED
@@ -28,7 +28,6 @@ OPTIMUM_TOKEN = os.environ.get("OPTIMUM_TOKEN", None)
|
|
28 |
|
29 |
|
30 |
ALL_COLUMNS_MAPPING = {
|
31 |
-
"best_scored_model": "Best Scored Model π",
|
32 |
"model_type": "Type π€",
|
33 |
"weight_class": "Class ποΈ",
|
34 |
#
|
@@ -38,11 +37,11 @@ ALL_COLUMNS_MAPPING = {
|
|
38 |
#
|
39 |
"generate.throughput(tokens/s)": "Throughput (tokens/s) β¬οΈ",
|
40 |
"forward.peak_memory(MB)": "Peak Memory (MB) β¬οΈ",
|
41 |
-
"best_score": "Score (%) β¬οΈ",
|
42 |
#
|
|
|
|
|
43 |
}
|
44 |
ALL_COLUMNS_DATATYPES = [
|
45 |
-
"markdown",
|
46 |
"str",
|
47 |
"str",
|
48 |
#
|
@@ -52,6 +51,8 @@ ALL_COLUMNS_DATATYPES = [
|
|
52 |
#
|
53 |
"number",
|
54 |
"number",
|
|
|
|
|
55 |
"number",
|
56 |
]
|
57 |
SORTING_COLUMN = ["tradeoff"]
|
@@ -66,9 +67,12 @@ def get_benchmark_df(benchmark="1xA100-80GB"):
|
|
66 |
# load and merge
|
67 |
bench_df = pd.read_csv(f"./llm-perf-dataset/reports/{benchmark}.csv")
|
68 |
scores_df = pd.read_csv(
|
69 |
-
|
70 |
)
|
71 |
-
|
|
|
|
|
|
|
72 |
|
73 |
# add optimizations
|
74 |
merged_df["optimizations"] = merged_df[
|
@@ -89,6 +93,11 @@ def get_benchmark_df(benchmark="1xA100-80GB"):
|
|
89 |
axis=1,
|
90 |
)
|
91 |
|
|
|
|
|
|
|
|
|
|
|
92 |
# create composite score
|
93 |
score_distance = 100 - merged_df["best_score"]
|
94 |
# normalize latency between 0 and 100
|
@@ -247,7 +256,7 @@ with demo:
|
|
247 |
|
248 |
with gr.TabItem("Control Panel ποΈ", id=2):
|
249 |
gr.HTML(
|
250 |
-
"Use this control panel to filter the leaderboard
|
251 |
elem_id="descriptive-text",
|
252 |
)
|
253 |
# control panel interface
|
|
|
28 |
|
29 |
|
30 |
ALL_COLUMNS_MAPPING = {
|
|
|
31 |
"model_type": "Type π€",
|
32 |
"weight_class": "Class ποΈ",
|
33 |
#
|
|
|
37 |
#
|
38 |
"generate.throughput(tokens/s)": "Throughput (tokens/s) β¬οΈ",
|
39 |
"forward.peak_memory(MB)": "Peak Memory (MB) β¬οΈ",
|
|
|
40 |
#
|
41 |
+
"best_scored_model": "Best Scored Model π",
|
42 |
+
"best_score": "Best Score (%) β¬οΈ",
|
43 |
}
|
44 |
ALL_COLUMNS_DATATYPES = [
|
|
|
45 |
"str",
|
46 |
"str",
|
47 |
#
|
|
|
51 |
#
|
52 |
"number",
|
53 |
"number",
|
54 |
+
#
|
55 |
+
"markdown",
|
56 |
"number",
|
57 |
]
|
58 |
SORTING_COLUMN = ["tradeoff"]
|
|
|
67 |
# load and merge
|
68 |
bench_df = pd.read_csv(f"./llm-perf-dataset/reports/{benchmark}.csv")
|
69 |
scores_df = pd.read_csv(
|
70 |
+
"./llm-perf-dataset/reports/Weighted+Classed-Open-LLM-Leaderboard.csv"
|
71 |
)
|
72 |
+
|
73 |
+
bench_df["merge_id"] = bench_df.experiment_name.str.split("_1_1000_").str[-1]
|
74 |
+
scores_df["merge_id"] = scores_df.weight_class + "_" + scores_df.model_type
|
75 |
+
merged_df = bench_df.merge(scores_df, on="merge_id")
|
76 |
|
77 |
# add optimizations
|
78 |
merged_df["optimizations"] = merged_df[
|
|
|
93 |
axis=1,
|
94 |
)
|
95 |
|
96 |
+
# remove score for quantized models
|
97 |
+
merged_df.loc[
|
98 |
+
merged_df["optimizations"].str.contains("LLM.int8|LLM.fp4"), "best_score"
|
99 |
+
] = "Not Evaluated"
|
100 |
+
|
101 |
# create composite score
|
102 |
score_distance = 100 - merged_df["best_score"]
|
103 |
# normalize latency between 0 and 100
|
|
|
256 |
|
257 |
with gr.TabItem("Control Panel ποΈ", id=2):
|
258 |
gr.HTML(
|
259 |
+
"Use this control panel to filter the leaderboard's table and plot.",
|
260 |
elem_id="descriptive-text",
|
261 |
)
|
262 |
# control panel interface
|