xeon27
commited on
Commit
·
ed6229f
1
Parent(s):
2b8ba97
Fix bug
Browse files
app.py
CHANGED
@@ -26,7 +26,7 @@ from src.display.utils import (
|
|
26 |
Precision
|
27 |
)
|
28 |
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
|
29 |
-
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
30 |
from src.submission.submit import add_new_eval
|
31 |
|
32 |
|
@@ -59,20 +59,21 @@ AGENTIC_LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PAT
|
|
59 |
pending_eval_queue_df,
|
60 |
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
61 |
|
62 |
-
def init_leaderboard(dataframe):
|
63 |
if dataframe is None or dataframe.empty:
|
64 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
|
|
65 |
return Leaderboard(
|
66 |
value=dataframe,
|
67 |
-
datatype=[c.type for c in
|
68 |
select_columns=SelectColumns(
|
69 |
-
default_selection=[c.name for c in
|
70 |
-
cant_deselect=[c.name for c in
|
71 |
label="Select Columns to Display:",
|
72 |
),
|
73 |
# search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
|
74 |
search_columns=[AutoEvalColumn.model.name,],
|
75 |
-
hide_columns=[c.name for c in
|
76 |
# filter_columns=[
|
77 |
# ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
|
78 |
# ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
|
@@ -100,10 +101,10 @@ with demo:
|
|
100 |
|
101 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
102 |
with gr.TabItem("Single-turn Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
103 |
-
leaderboard = init_leaderboard(ST_LEADERBOARD_DF)
|
104 |
|
105 |
with gr.TabItem("Agentic Benchmark", elem_id="llm-benchmark-tab-table", id=1):
|
106 |
-
leaderboard = init_leaderboard(AGENTIC_LEADERBOARD_DF)
|
107 |
|
108 |
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
|
109 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
|
|
26 |
Precision
|
27 |
)
|
28 |
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
|
29 |
+
from src.populate import get_evaluation_queue_df, get_leaderboard_df, TASK_NAME_INVERSE_MAP
|
30 |
from src.submission.submit import add_new_eval
|
31 |
|
32 |
|
|
|
59 |
pending_eval_queue_df,
|
60 |
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
61 |
|
62 |
+
def init_leaderboard(dataframe, benchmark_type):
|
63 |
if dataframe is None or dataframe.empty:
|
64 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
65 |
+
AutoEvalColumnSubset = [c for c in fields(AutoEvalColumn) if ((c.name=="Model") or (TASK_NAME_INVERSE_MAP.get(c.name, dict()).get("type", "")==benchmark_type))]
|
66 |
return Leaderboard(
|
67 |
value=dataframe,
|
68 |
+
datatype=[c.type for c in AutoEvalColumnSubset],
|
69 |
select_columns=SelectColumns(
|
70 |
+
default_selection=[c.name for c in AutoEvalColumnSubset if c.displayed_by_default],
|
71 |
+
cant_deselect=[c.name for c in AutoEvalColumnSubset if c.never_hidden],
|
72 |
label="Select Columns to Display:",
|
73 |
),
|
74 |
# search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
|
75 |
search_columns=[AutoEvalColumn.model.name,],
|
76 |
+
hide_columns=[c.name for c in AutoEvalColumnSubset if c.hidden],
|
77 |
# filter_columns=[
|
78 |
# ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
|
79 |
# ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
|
|
|
101 |
|
102 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
103 |
with gr.TabItem("Single-turn Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
104 |
+
leaderboard = init_leaderboard(ST_LEADERBOARD_DF, "single-turn")
|
105 |
|
106 |
with gr.TabItem("Agentic Benchmark", elem_id="llm-benchmark-tab-table", id=1):
|
107 |
+
leaderboard = init_leaderboard(AGENTIC_LEADERBOARD_DF, "agentic")
|
108 |
|
109 |
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
|
110 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|