BenchmarkBot commited on
Commit
2773294
·
1 Parent(s): 534ff40

remove bnb quantization

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -12,13 +12,13 @@ LLM_PERF_LEADERBOARD_REPO = "optimum/llm-perf-leaderboard"
12
  LLM_PERF_DATASET_REPO = "optimum/llm-perf-dataset"
13
  OPTIMUM_TOKEN = os.environ.get("OPTIMUM_TOKEN")
14
 
15
- OLD_COLUMNS = ["model", "backend.name", "backend.torch_dtype", "backend.quantization",
16
  "generate.latency(s)", "generate.throughput(tokens/s)"]
17
 
18
- NEW_COLUMNS = ["Model", "Backend 🏭", "Load Datatype", "Quantization 🗜️",
19
  "Latency (s) ⬇️", "Throughput (tokens/s) ⬆️"]
20
 
21
- COLUMNS_DATATYPES = ["markdown", "str", "str", "str", "number", "number"]
22
 
23
  SORTING_COLUMN = ["Throughput (tokens/s) ⬆️"]
24
 
@@ -34,6 +34,9 @@ def get_benchmark_df():
34
  df = pd.read_csv(
35
  "./llm-perf-dataset/reports/cuda_1_100/inference_report.csv")
36
 
 
 
 
37
  # preprocess
38
  df["model"] = df["model"].apply(make_clickable_model)
39
 
 
12
  LLM_PERF_DATASET_REPO = "optimum/llm-perf-dataset"
13
  OPTIMUM_TOKEN = os.environ.get("OPTIMUM_TOKEN")
14
 
15
+ OLD_COLUMNS = ["model", "backend.name", "backend.torch_dtype",
16
  "generate.latency(s)", "generate.throughput(tokens/s)"]
17
 
18
+ NEW_COLUMNS = ["Model", "Backend 🏭", "Load Datatype",
19
  "Latency (s) ⬇️", "Throughput (tokens/s) ⬆️"]
20
 
21
+ COLUMNS_DATATYPES = ["markdown", "str", "str", "number", "number"]
22
 
23
  SORTING_COLUMN = ["Throughput (tokens/s) ⬆️"]
24
 
 
34
  df = pd.read_csv(
35
  "./llm-perf-dataset/reports/cuda_1_100/inference_report.csv")
36
 
37
+ # remove quantized models
38
+ df = df[df["backend.quantization"].notna()]
39
+
40
  # preprocess
41
  df["model"] = df["model"].apply(make_clickable_model)
42