xeon27 commited on
Commit
ed6229f
·
1 Parent(s): 2b8ba97
Files changed (1) hide show
  1. app.py +9 -8
app.py CHANGED
@@ -26,7 +26,7 @@ from src.display.utils import (
26
  Precision
27
  )
28
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
29
- from src.populate import get_evaluation_queue_df, get_leaderboard_df
30
  from src.submission.submit import add_new_eval
31
 
32
 
@@ -59,20 +59,21 @@ AGENTIC_LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PAT
59
  pending_eval_queue_df,
60
  ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
61
 
62
- def init_leaderboard(dataframe):
63
  if dataframe is None or dataframe.empty:
64
  raise ValueError("Leaderboard DataFrame is empty or None.")
 
65
  return Leaderboard(
66
  value=dataframe,
67
- datatype=[c.type for c in fields(AutoEvalColumn)],
68
  select_columns=SelectColumns(
69
- default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
70
- cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
71
  label="Select Columns to Display:",
72
  ),
73
  # search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
74
  search_columns=[AutoEvalColumn.model.name,],
75
- hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
76
  # filter_columns=[
77
  # ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
78
  # ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
@@ -100,10 +101,10 @@ with demo:
100
 
101
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
102
  with gr.TabItem("Single-turn Benchmark", elem_id="llm-benchmark-tab-table", id=0):
103
- leaderboard = init_leaderboard(ST_LEADERBOARD_DF)
104
 
105
  with gr.TabItem("Agentic Benchmark", elem_id="llm-benchmark-tab-table", id=1):
106
- leaderboard = init_leaderboard(AGENTIC_LEADERBOARD_DF)
107
 
108
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
109
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
 
26
  Precision
27
  )
28
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
29
+ from src.populate import get_evaluation_queue_df, get_leaderboard_df, TASK_NAME_INVERSE_MAP
30
  from src.submission.submit import add_new_eval
31
 
32
 
 
59
  pending_eval_queue_df,
60
  ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
61
 
62
+ def init_leaderboard(dataframe, benchmark_type):
63
  if dataframe is None or dataframe.empty:
64
  raise ValueError("Leaderboard DataFrame is empty or None.")
65
+ AutoEvalColumnSubset = [c for c in fields(AutoEvalColumn) if ((c.name=="Model") or (TASK_NAME_INVERSE_MAP.get(c.name, dict()).get("type", "")==benchmark_type))]
66
  return Leaderboard(
67
  value=dataframe,
68
+ datatype=[c.type for c in AutoEvalColumnSubset],
69
  select_columns=SelectColumns(
70
+ default_selection=[c.name for c in AutoEvalColumnSubset if c.displayed_by_default],
71
+ cant_deselect=[c.name for c in AutoEvalColumnSubset if c.never_hidden],
72
  label="Select Columns to Display:",
73
  ),
74
  # search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
75
  search_columns=[AutoEvalColumn.model.name,],
76
+ hide_columns=[c.name for c in AutoEvalColumnSubset if c.hidden],
77
  # filter_columns=[
78
  # ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
79
  # ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
 
101
 
102
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
103
  with gr.TabItem("Single-turn Benchmark", elem_id="llm-benchmark-tab-table", id=0):
104
+ leaderboard = init_leaderboard(ST_LEADERBOARD_DF, "single-turn")
105
 
106
  with gr.TabItem("Agentic Benchmark", elem_id="llm-benchmark-tab-table", id=1):
107
+ leaderboard = init_leaderboard(AGENTIC_LEADERBOARD_DF, "agentic")
108
 
109
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
110
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")