Spaces:
Running
Running
[email protected]
commited on
Commit
Β·
2d95777
1
Parent(s):
c4a1d6e
update
Browse files- app.py +1 -1
- src/populate.py +2 -2
app.py
CHANGED
@@ -138,7 +138,7 @@ with demo:
|
|
138 |
with gr.TabItem("π
LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
139 |
leaderboard = init_leaderboard(LEADERBOARD_DF, AutoEvalColumn)
|
140 |
|
141 |
-
with gr.TabItem("
|
142 |
leaderboard = init_leaderboard(ASSET_LEADERBOARD_DF, AutoEvalColumnAsset)
|
143 |
|
144 |
with gr.TabItem("π Performance Plot", elem_id="llm-benchmark-tab-table", id=2):
|
|
|
138 |
with gr.TabItem("π
LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
139 |
leaderboard = init_leaderboard(LEADERBOARD_DF, AutoEvalColumn)
|
140 |
|
141 |
+
with gr.TabItem("π οΈ Asset Benchmark", elem_id="llm-benchmark-asset-tab-table", id=1):
|
142 |
leaderboard = init_leaderboard(ASSET_LEADERBOARD_DF, AutoEvalColumnAsset)
|
143 |
|
144 |
with gr.TabItem("π Performance Plot", elem_id="llm-benchmark-tab-table", id=2):
|
src/populate.py
CHANGED
@@ -4,7 +4,7 @@ import os
|
|
4 |
import pandas as pd
|
5 |
|
6 |
from src.display.formatting import has_no_nan_values, make_clickable_model
|
7 |
-
from src.display.utils import
|
8 |
from src.leaderboard.read_evals import get_raw_eval_results
|
9 |
|
10 |
|
@@ -17,7 +17,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
17 |
|
18 |
df = pd.DataFrame.from_records(all_data_json)
|
19 |
print(df)
|
20 |
-
df = df.sort_values(by=[
|
21 |
df = df[cols].round(decimals=2)
|
22 |
|
23 |
# filter out if any of the benchmarks have not been produced
|
|
|
4 |
import pandas as pd
|
5 |
|
6 |
from src.display.formatting import has_no_nan_values, make_clickable_model
|
7 |
+
from src.display.utils import AutoEvalColumnAsset, EvalQueueColumn
|
8 |
from src.leaderboard.read_evals import get_raw_eval_results
|
9 |
|
10 |
|
|
|
17 |
|
18 |
df = pd.DataFrame.from_records(all_data_json)
|
19 |
print(df)
|
20 |
+
df = df.sort_values(by=[AutoEvalColumnAsset.average.name], ascending=False)
|
21 |
df = df[cols].round(decimals=2)
|
22 |
|
23 |
# filter out if any of the benchmarks have not been produced
|