Spaces:
Running
Running
BenchmarkBot
commited on
Commit
β’
e0ef314
1
Parent(s):
9e3eaf4
fix naming
Browse files- app.py +3 -3
- src/assets/text_content.py +2 -2
app.py
CHANGED
@@ -28,8 +28,8 @@ OPTIMUM_TOKEN = os.environ.get("OPTIMUM_TOKEN", None)
|
|
28 |
|
29 |
|
30 |
ALL_COLUMNS_MAPPING = {
|
31 |
-
"best_scored_model": "Best Scored
|
32 |
-
"model_type": "
|
33 |
"weight_class": "Weight Class ποΈ",
|
34 |
#
|
35 |
"backend.name": "Backend π",
|
@@ -107,7 +107,7 @@ def get_benchmark_table(bench_df):
|
|
107 |
# rename
|
108 |
bench_df.rename(columns=ALL_COLUMNS_MAPPING, inplace=True)
|
109 |
# transform
|
110 |
-
bench_df["
|
111 |
bench_df["Weight Class ποΈ"] = bench_df["Weight Class ποΈ"].apply(
|
112 |
process_weight_class
|
113 |
)
|
|
|
28 |
|
29 |
|
30 |
ALL_COLUMNS_MAPPING = {
|
31 |
+
"best_scored_model": "Best Scored Model π",
|
32 |
+
"model_type": "Model Type π€",
|
33 |
"weight_class": "Weight Class ποΈ",
|
34 |
#
|
35 |
"backend.name": "Backend π",
|
|
|
107 |
# rename
|
108 |
bench_df.rename(columns=ALL_COLUMNS_MAPPING, inplace=True)
|
109 |
# transform
|
110 |
+
bench_df["Model Type π€"] = bench_df["Model Type π€"].apply(process_model_type)
|
111 |
bench_df["Weight Class ποΈ"] = bench_df["Weight Class ποΈ"].apply(
|
112 |
process_weight_class
|
113 |
)
|
src/assets/text_content.py
CHANGED
@@ -12,9 +12,9 @@ A100_TEXT = """<h3>Single-GPU Benchmark (1xA100):</h3>
|
|
12 |
<ul>
|
13 |
<li>LLMs are evaluated on a singleton batch and genrating a thousand tokens.</li>
|
14 |
<li>Peak memory is measured in MB during the first forward pass of the LLM (no warmup).</li>
|
15 |
-
<li>Each pair of (
|
16 |
<li>Score is the average evaluation score obtained from the <a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard">π€ Open LLM Leaderboard</a>.</li>
|
17 |
-
<li>Ranking is based on the euclidean distance from "
|
18 |
</ul>
|
19 |
"""
|
20 |
|
|
|
12 |
<ul>
|
13 |
<li>LLMs are evaluated on a singleton batch and genrating a thousand tokens.</li>
|
14 |
<li>Peak memory is measured in MB during the first forward pass of the LLM (no warmup).</li>
|
15 |
+
<li>Each pair of (Model Type, Weight Class) is represented by the best scored model. This LLM is the one used for all the hardware/backend/optimization experiments.</li>
|
16 |
<li>Score is the average evaluation score obtained from the <a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard">π€ Open LLM Leaderboard</a>.</li>
|
17 |
+
<li>Ranking is based on the euclidean distance from the "Perfect LLM" (i.e. 0 latency and 100% accuracy).</li>
|
18 |
</ul>
|
19 |
"""
|
20 |
|