Spaces:

optimum
/

llm-perf-leaderboard

Running

BenchmarkBot commited on Jul 28, 2023

Commit

e0ef314

1 Parent(s): 9e3eaf4

fix naming

Files changed (2) hide show

app.py CHANGED Viewed

@@ -28,8 +28,8 @@ OPTIMUM_TOKEN = os.environ.get("OPTIMUM_TOKEN", None)
 ALL_COLUMNS_MAPPING = {
-    "best_scored_model": "Best Scored LLM 🏆",
-    "model_type": "LLM Type 🤗",
     "weight_class": "Weight Class 🏋️",
     #
     "backend.name": "Backend 🏭",
@@ -107,7 +107,7 @@ def get_benchmark_table(bench_df):
     # rename
     bench_df.rename(columns=ALL_COLUMNS_MAPPING, inplace=True)
     # transform
-    bench_df["LLM Type 🤗"] = bench_df["LLM Type 🤗"].apply(process_model_type)
     bench_df["Weight Class 🏋️"] = bench_df["Weight Class 🏋️"].apply(
         process_weight_class
     )

 ALL_COLUMNS_MAPPING = {
+    "best_scored_model": "Best Scored Model 🏆",
+    "model_type": "Model Type 🤗",
     "weight_class": "Weight Class 🏋️",
     #
     "backend.name": "Backend 🏭",
     # rename
     bench_df.rename(columns=ALL_COLUMNS_MAPPING, inplace=True)
     # transform
+    bench_df["Model Type 🤗"] = bench_df["Model Type 🤗"].apply(process_model_type)
     bench_df["Weight Class 🏋️"] = bench_df["Weight Class 🏋️"].apply(
         process_weight_class
     )

src/assets/text_content.py CHANGED Viewed

@@ -12,9 +12,9 @@ A100_TEXT = """<h3>Single-GPU Benchmark (1xA100):</h3>
 <ul>
     <li>LLMs are evaluated on a singleton batch and genrating a thousand tokens.</li>
     <li>Peak memory is measured in MB during the first forward pass of the LLM (no warmup).</li>
-    <li>Each pair of (LLM Type, Weight Class) is represented by the best scored LLM. This LLM is the one used for all the hardware/backend/optimization experiments.</li>
     <li>Score is the average evaluation score obtained from the <a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard">🤗 Open LLM Leaderboard</a>.</li>
-    <li>Ranking is based on the euclidean distance from "perfect LLM" (i.e. 0 latency and 100% accuracy).</li>
 </ul>
 """

 <ul>
     <li>LLMs are evaluated on a singleton batch and genrating a thousand tokens.</li>
     <li>Peak memory is measured in MB during the first forward pass of the LLM (no warmup).</li>
+    <li>Each pair of (Model Type, Weight Class) is represented by the best scored model. This LLM is the one used for all the hardware/backend/optimization experiments.</li>
     <li>Score is the average evaluation score obtained from the <a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard">🤗 Open LLM Leaderboard</a>.</li>
+    <li>Ranking is based on the euclidean distance from the "Perfect LLM" (i.e. 0 latency and 100% accuracy).</li>
 </ul>
 """