[email protected] commited on
Commit
2d95777
Β·
1 Parent(s): c4a1d6e
Files changed (2) hide show
  1. app.py +1 -1
  2. src/populate.py +2 -2
app.py CHANGED
@@ -138,7 +138,7 @@ with demo:
138
  with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
139
  leaderboard = init_leaderboard(LEADERBOARD_DF, AutoEvalColumn)
140
 
141
- with gr.TabItem("πŸ… Asset Benchmark", elem_id="llm-benchmark-asset-tab-table", id=1):
142
  leaderboard = init_leaderboard(ASSET_LEADERBOARD_DF, AutoEvalColumnAsset)
143
 
144
  with gr.TabItem("πŸ“Š Performance Plot", elem_id="llm-benchmark-tab-table", id=2):
 
138
  with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
139
  leaderboard = init_leaderboard(LEADERBOARD_DF, AutoEvalColumn)
140
 
141
+ with gr.TabItem("πŸ› οΈ Asset Benchmark", elem_id="llm-benchmark-asset-tab-table", id=1):
142
  leaderboard = init_leaderboard(ASSET_LEADERBOARD_DF, AutoEvalColumnAsset)
143
 
144
  with gr.TabItem("πŸ“Š Performance Plot", elem_id="llm-benchmark-tab-table", id=2):
src/populate.py CHANGED
@@ -4,7 +4,7 @@ import os
4
  import pandas as pd
5
 
6
  from src.display.formatting import has_no_nan_values, make_clickable_model
7
- from src.display.utils import AutoEvalColumn, EvalQueueColumn
8
  from src.leaderboard.read_evals import get_raw_eval_results
9
 
10
 
@@ -17,7 +17,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
17
 
18
  df = pd.DataFrame.from_records(all_data_json)
19
  print(df)
20
- df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
21
  df = df[cols].round(decimals=2)
22
 
23
  # filter out if any of the benchmarks have not been produced
 
4
  import pandas as pd
5
 
6
  from src.display.formatting import has_no_nan_values, make_clickable_model
7
+ from src.display.utils import AutoEvalColumnAsset, EvalQueueColumn
8
  from src.leaderboard.read_evals import get_raw_eval_results
9
 
10
 
 
17
 
18
  df = pd.DataFrame.from_records(all_data_json)
19
  print(df)
20
+ df = df.sort_values(by=[AutoEvalColumnAsset.average.name], ascending=False)
21
  df = df[cols].round(decimals=2)
22
 
23
  # filter out if any of the benchmarks have not been produced