hSterz commited on
Commit
a0b0f73
·
1 Parent(s): 8fdc857
Files changed (2) hide show
  1. app.py +2 -1
  2. src/populate.py +2 -2
app.py CHANGED
@@ -50,6 +50,7 @@ except Exception:
50
 
51
 
52
  LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
 
53
 
54
  (
55
  finished_eval_queue_df,
@@ -86,7 +87,7 @@ with demo:
86
  leaderboard = init_leaderboard(LEADERBOARD_DF)
87
 
88
  with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=1):
89
- leaderboard = init_leaderboard(LEADERBOARD_DF)
90
 
91
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table-n-correct", id=2):
92
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
 
50
 
51
 
52
  LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
53
+ LEADERBOARD_DF_N_CORRECT = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS, version="n_correct")
54
 
55
  (
56
  finished_eval_queue_df,
 
87
  leaderboard = init_leaderboard(LEADERBOARD_DF)
88
 
89
  with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=1):
90
+ leaderboard = init_leaderboard(LEADERBOARD_DF_N_CORRECT)
91
 
92
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table-n-correct", id=2):
93
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
src/populate.py CHANGED
@@ -8,9 +8,9 @@ from src.display.utils import AutoEvalColumn, EvalQueueColumn
8
  from src.leaderboard.read_evals import get_raw_eval_results
9
 
10
 
11
- def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
12
  """Creates a dataframe from all the individual experiment results"""
13
- raw_data = get_raw_eval_results(results_path, requests_path)
14
  all_data_json = [v.to_dict() for v in raw_data]
15
 
16
  print(all_data_json)
 
8
  from src.leaderboard.read_evals import get_raw_eval_results
9
 
10
 
11
+ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list, version="1_correct") -> pd.DataFrame:
12
  """Creates a dataframe from all the individual experiment results"""
13
+ raw_data = get_raw_eval_results(results_path+"/"+version, requests_path)
14
  all_data_json = [v.to_dict() for v in raw_data]
15
 
16
  print(all_data_json)