BenchmarkBot commited on
Commit
ec9f1c7
Β·
1 Parent(s): 271d809
Files changed (1) hide show
  1. app.py +15 -6
app.py CHANGED
@@ -28,7 +28,6 @@ OPTIMUM_TOKEN = os.environ.get("OPTIMUM_TOKEN", None)
28
 
29
 
30
  ALL_COLUMNS_MAPPING = {
31
- "best_scored_model": "Best Scored Model πŸ†",
32
  "model_type": "Type πŸ€—",
33
  "weight_class": "Class πŸ‹οΈ",
34
  #
@@ -38,11 +37,11 @@ ALL_COLUMNS_MAPPING = {
38
  #
39
  "generate.throughput(tokens/s)": "Throughput (tokens/s) ⬆️",
40
  "forward.peak_memory(MB)": "Peak Memory (MB) ⬇️",
41
- "best_score": "Score (%) ⬆️",
42
  #
 
 
43
  }
44
  ALL_COLUMNS_DATATYPES = [
45
- "markdown",
46
  "str",
47
  "str",
48
  #
@@ -52,6 +51,8 @@ ALL_COLUMNS_DATATYPES = [
52
  #
53
  "number",
54
  "number",
 
 
55
  "number",
56
  ]
57
  SORTING_COLUMN = ["tradeoff"]
@@ -66,9 +67,12 @@ def get_benchmark_df(benchmark="1xA100-80GB"):
66
  # load and merge
67
  bench_df = pd.read_csv(f"./llm-perf-dataset/reports/{benchmark}.csv")
68
  scores_df = pd.read_csv(
69
- f"./llm-perf-dataset/reports/Grouped-Open-LLM-Leaderboard.csv"
70
  )
71
- merged_df = bench_df.merge(scores_df, left_on="model", right_on="best_scored_model")
 
 
 
72
 
73
  # add optimizations
74
  merged_df["optimizations"] = merged_df[
@@ -89,6 +93,11 @@ def get_benchmark_df(benchmark="1xA100-80GB"):
89
  axis=1,
90
  )
91
 
 
 
 
 
 
92
  # create composite score
93
  score_distance = 100 - merged_df["best_score"]
94
  # normalize latency between 0 and 100
@@ -247,7 +256,7 @@ with demo:
247
 
248
  with gr.TabItem("Control Panel πŸŽ›οΈ", id=2):
249
  gr.HTML(
250
- "Use this control panel to filter the leaderboard (table and plot).",
251
  elem_id="descriptive-text",
252
  )
253
  # control panel interface
 
28
 
29
 
30
  ALL_COLUMNS_MAPPING = {
 
31
  "model_type": "Type πŸ€—",
32
  "weight_class": "Class πŸ‹οΈ",
33
  #
 
37
  #
38
  "generate.throughput(tokens/s)": "Throughput (tokens/s) ⬆️",
39
  "forward.peak_memory(MB)": "Peak Memory (MB) ⬇️",
 
40
  #
41
+ "best_scored_model": "Best Scored Model πŸ†",
42
+ "best_score": "Best Score (%) ⬆️",
43
  }
44
  ALL_COLUMNS_DATATYPES = [
 
45
  "str",
46
  "str",
47
  #
 
51
  #
52
  "number",
53
  "number",
54
+ #
55
+ "markdown",
56
  "number",
57
  ]
58
  SORTING_COLUMN = ["tradeoff"]
 
67
  # load and merge
68
  bench_df = pd.read_csv(f"./llm-perf-dataset/reports/{benchmark}.csv")
69
  scores_df = pd.read_csv(
70
+ "./llm-perf-dataset/reports/Weighted+Classed-Open-LLM-Leaderboard.csv"
71
  )
72
+
73
+ bench_df["merge_id"] = bench_df.experiment_name.str.split("_1_1000_").str[-1]
74
+ scores_df["merge_id"] = scores_df.weight_class + "_" + scores_df.model_type
75
+ merged_df = bench_df.merge(scores_df, on="merge_id")
76
 
77
  # add optimizations
78
  merged_df["optimizations"] = merged_df[
 
93
  axis=1,
94
  )
95
 
96
+ # remove score for quantized models
97
+ merged_df.loc[
98
+ merged_df["optimizations"].str.contains("LLM.int8|LLM.fp4"), "best_score"
99
+ ] = "Not Evaluated"
100
+
101
  # create composite score
102
  score_distance = 100 - merged_df["best_score"]
103
  # normalize latency between 0 and 100
 
256
 
257
  with gr.TabItem("Control Panel πŸŽ›οΈ", id=2):
258
  gr.HTML(
259
+ "Use this control panel to filter the leaderboard's table and plot.",
260
  elem_id="descriptive-text",
261
  )
262
  # control panel interface