Spaces:
Sleeping
Sleeping
Add 1_correct_var
Browse files
app.py
CHANGED
@@ -55,7 +55,7 @@ except Exception:
|
|
55 |
|
56 |
LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
57 |
LEADERBOARD_DF_N_CORRECT = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS, version="n_correct")
|
58 |
-
|
59 |
(
|
60 |
finished_eval_queue_df,
|
61 |
running_eval_queue_df,
|
@@ -98,6 +98,9 @@ with demo:
|
|
98 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
99 |
with gr.TabItem("π
1 Correct", elem_id="llm-benchmark-tab-table", id=0):
|
100 |
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
|
|
|
|
|
|
101 |
|
102 |
with gr.TabItem("π
N Correct", elem_id="llm-benchmark-tab-table", id=1):
|
103 |
leaderboard = init_leaderboard(LEADERBOARD_DF_N_CORRECT)
|
|
|
55 |
|
56 |
LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
57 |
LEADERBOARD_DF_N_CORRECT = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS, version="n_correct")
|
58 |
+
LEADERBOARD_DF_1_CORRECT_VAR = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS, version="1_correct_var")
|
59 |
(
|
60 |
finished_eval_queue_df,
|
61 |
running_eval_queue_df,
|
|
|
98 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
99 |
with gr.TabItem("π
1 Correct", elem_id="llm-benchmark-tab-table", id=0):
|
100 |
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
101 |
+
|
102 |
+
with gr.TabItem("π
1 Correct with Option Variations", elem_id="llm-benchmark-tab-table", id=4):
|
103 |
+
leaderboard = init_leaderboard(LEADERBOARD_DF_1_CORRECT_VAR)
|
104 |
|
105 |
with gr.TabItem("π
N Correct", elem_id="llm-benchmark-tab-table", id=1):
|
106 |
leaderboard = init_leaderboard(LEADERBOARD_DF_N_CORRECT)
|