pufanyi commited on
Commit
35850bf
Β·
1 Parent(s): e491c93

Refactor init_leaderboard function to handle multiple subsets, improve column selection and hiding, and include Dataset Version in filter_columns

Browse files
Files changed (2) hide show
  1. app.py +68 -28
  2. src/populate.py +4 -3
app.py CHANGED
@@ -65,40 +65,80 @@ except Exception:
65
  restart_space()
66
 
67
 
68
- LEADERBOARD_DF, SUBSETS = get_leaderboard_df(RESULTS_REPO)
69
 
70
 
71
- def init_leaderboard(dataframes, subsets):
72
- subsets = list(subsets)
73
 
74
  with gr.Row():
75
  selected_subset = gr.Dropdown(choices=subsets, label="Select Dataset Subset", value=subsets[-1])
 
 
 
76
 
77
- # with gr.Row():
78
- # datatype = [c.type for c in fields(AutoEvalColumn)]
79
- # dataframe = gr.Dataframe(dataframes, datatype=datatype, type="pandas")
80
-
81
-
82
- return Leaderboard(
83
- value=dataframes,
84
- datatype=[c.type for c in fields(AutoEvalColumn)],
85
- select_columns=SelectColumns(
86
- default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
87
- cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
88
- label="Select Columns to Display:",
89
- ),
90
- search_columns=[AutoEvalColumn.model.name],
91
- hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
92
- filter_columns=[
93
- ColumnFilter(
94
- column=AutoEvalColumn.dataset_version.name,
95
- choices=subsets,
96
- default=subsets[-1],
97
- )
98
- # gr.Dropdown(choices=subsets, label="Select Dataset Subset", value=subsets[-1])
99
- ],
100
- interactive=False,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
  demo = gr.Blocks(css=custom_css)
104
  with demo:
@@ -107,7 +147,7 @@ with demo:
107
 
108
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
109
  with gr.TabItem("πŸ… LiveBench Results", elem_id="llm-benchmark-tab-table", id=0):
110
- init_leaderboard(LEADERBOARD_DF, SUBSETS)
111
 
112
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
113
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
 
65
  restart_space()
66
 
67
 
68
+ LEADERBOARD_DF = get_leaderboard_df(RESULTS_REPO)
69
 
70
 
71
+ def init_leaderboard(dataframes):
72
+ subsets = list(dataframes.keys())
73
 
74
  with gr.Row():
75
  selected_subset = gr.Dropdown(choices=subsets, label="Select Dataset Subset", value=subsets[-1])
76
+ research_textbox = gr.Textbox(placeholder="πŸ” Search Models... [press enter]", label="Filter Models by Name")
77
+ selected_columns = gr.CheckboxGroup(choices=[c.name for c in fields(AutoEvalColumn) if not c.hidden], label="Select Columns to Display", value=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default])
78
+
79
 
80
+ data = dataframes[subsets[-1]]
81
+
82
+ with gr.Row():
83
+ datatype = [c.type for c in fields(AutoEvalColumn)]
84
+ df = gr.Dataframe(data, datatype=datatype, type="pandas")
85
+
86
+ def refresh(subset):
87
+ global LEADERBOARD_DF
88
+ LEADERBOARD_DF = get_leaderboard_df(RESULTS_REPO)
89
+ research_textbox.value = ""
90
+ selected_subset.choices = subsets
91
+ update_data(subset, research_textbox, selected_columns)
92
+
93
+
94
+
95
+ def update_data(subset, search_term, selected_columns):
96
+ return dataframes[subset][dataframes[subset].model.str.contains(search_term, case=False)][selected_columns]
97
+
98
+ with gr.Row():
99
+ refresh_button = gr.Button("Refresh")
100
+ refresh_button.click(refresh, inputs=[
101
+ selected_subset,
102
+ ], outputs=data, concurrency_limit=20)
103
+
104
+
105
+
106
+ selected_subset.change(update_data, inputs=[
107
+ selected_subset, research_textbox, selected_columns
108
+ ], outputs=data)
109
+ research_textbox.submit(
110
+ update_data,
111
+ inputs=[selected_subset, research_textbox, selected_columns],
112
+ outputs=data
113
+ )
114
+ selected_columns.change(
115
+ update_data,
116
+ inputs=[selected_subset, research_textbox, selected_columns],
117
+ outputs=data
118
  )
119
+
120
+
121
+
122
+ # return Leaderboard(
123
+ # value=dataframes,
124
+ # datatype=[c.type for c in fields(AutoEvalColumn)],
125
+ # select_columns=SelectColumns(
126
+ # default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
127
+ # cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
128
+ # label="Select Columns to Display:",
129
+ # ),
130
+ # search_columns=[AutoEvalColumn.model.name],
131
+ # hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
132
+ # filter_columns=[
133
+ # ColumnFilter(
134
+ # column=AutoEvalColumn.dataset_version.name,
135
+ # choices=subsets,
136
+ # default=subsets[-1],
137
+ # )
138
+ # # gr.Dropdown(choices=subsets, label="Select Dataset Subset", value=subsets[-1])
139
+ # ],
140
+ # interactive=False,
141
+ # )
142
 
143
  demo = gr.Blocks(css=custom_css)
144
  with demo:
 
147
 
148
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
149
  with gr.TabItem("πŸ… LiveBench Results", elem_id="llm-benchmark-tab-table", id=0):
150
+ init_leaderboard(LEADERBOARD_DF)
151
 
152
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
153
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
src/populate.py CHANGED
@@ -17,8 +17,9 @@ def get_leaderboard_df(results_repo):
17
  subset_df = subset_data.to_pandas()
18
  subset_df = subset_df.sort_values(by="Total", ascending=False)
19
  subset_df = subset_df.round(2) # Round all numeric columns to two decimal places
20
- subset_df["Dataset Version"] = [subset] * len(subset_df)
21
  print(subset_df)
22
  subset_dfs[subset] = subset_df
23
- df = pd.concat(subset_dfs.values())
24
- return df, subset_dfs.keys()
 
 
17
  subset_df = subset_data.to_pandas()
18
  subset_df = subset_df.sort_values(by="Total", ascending=False)
19
  subset_df = subset_df.round(2) # Round all numeric columns to two decimal places
20
+ # subset_df["Dataset Version"] = [subset] * len(subset_df)
21
  print(subset_df)
22
  subset_dfs[subset] = subset_df
23
+ # df = pd.concat(subset_dfs.values())
24
+ # return df, subset_dfs.keys()
25
+ return subset_dfs