hSterz commited on
Commit
31b5122
Β·
1 Parent(s): f785497

Add 1_correct_var

Browse files
Files changed (1) hide show
  1. app.py +4 -1
app.py CHANGED
@@ -55,7 +55,7 @@ except Exception:
55
 
56
  LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
57
  LEADERBOARD_DF_N_CORRECT = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS, version="n_correct")
58
-
59
  (
60
  finished_eval_queue_df,
61
  running_eval_queue_df,
@@ -98,6 +98,9 @@ with demo:
98
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
99
  with gr.TabItem("πŸ… 1 Correct", elem_id="llm-benchmark-tab-table", id=0):
100
  leaderboard = init_leaderboard(LEADERBOARD_DF)
 
 
 
101
 
102
  with gr.TabItem("πŸ… N Correct", elem_id="llm-benchmark-tab-table", id=1):
103
  leaderboard = init_leaderboard(LEADERBOARD_DF_N_CORRECT)
 
55
 
56
  LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
57
  LEADERBOARD_DF_N_CORRECT = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS, version="n_correct")
58
+ LEADERBOARD_DF_1_CORRECT_VAR = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS, version="1_correct_var")
59
  (
60
  finished_eval_queue_df,
61
  running_eval_queue_df,
 
98
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
99
  with gr.TabItem("πŸ… 1 Correct", elem_id="llm-benchmark-tab-table", id=0):
100
  leaderboard = init_leaderboard(LEADERBOARD_DF)
101
+
102
+ with gr.TabItem("πŸ… 1 Correct with Option Variations", elem_id="llm-benchmark-tab-table", id=4):
103
+ leaderboard = init_leaderboard(LEADERBOARD_DF_1_CORRECT_VAR)
104
 
105
  with gr.TabItem("πŸ… N Correct", elem_id="llm-benchmark-tab-table", id=1):
106
  leaderboard = init_leaderboard(LEADERBOARD_DF_N_CORRECT)