lukecq commited on
Commit
21f1468
·
1 Parent(s): b732491

update the UI

Browse files
Files changed (5) hide show
  1. .DS_Store +0 -0
  2. .gitignore +3 -0
  3. app.py +54 -260
  4. requirements.txt +1 -0
  5. src/leaderboard/load_results.py +2 -1
.DS_Store DELETED
Binary file (6.15 kB)
 
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ *__pycache__/
2
+ eval-results/
3
+ .DS_Store
app.py CHANGED
@@ -3,6 +3,7 @@ import pandas as pd
3
  import os
4
  from huggingface_hub import snapshot_download, login
5
  from apscheduler.schedulers.background import BackgroundScheduler
 
6
 
7
  from src.display.about import (
8
  CITATION_BUTTON_LABEL,
@@ -39,59 +40,6 @@ TYPES = ['number', 'markdown', 'str', 'str', 'number', 'number', 'number', 'numb
39
  # Load the data from the csv file
40
  csv_path = f'{EVAL_RESULTS_PATH}/SeaExam_results_20240808.csv'
41
  df_m3exam, df_mmlu, df_avg = load_data(csv_path)
42
- # df_m3exam = df_m3exam.copy()[show_columns]
43
- # df_mmlu = df_mmlu.copy()[show_columns]
44
- df_avg_init = df_avg.copy()[df_avg['type'] == '🔶 chat'][show_columns]
45
- df_m3exam_init = df_m3exam.copy()[df_m3exam['type'] == '🔶 chat'][show_columns]
46
- df_mmlu_init = df_mmlu.copy()[df_mmlu['type'] == '🔶 chat'][show_columns]
47
-
48
- # data_types = ['number', 'str', 'markdown','str', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number']
49
- # map_columns = {'rank':'R','type':'type', 'Model':'Model','open?':'open?', 'avg_sea':'avg_sea ⬇️', 'en':'en', 'zh':'zh', 'id':'id', 'th':'th', 'vi':'vi', 'avg':'avg', 'params':'params(B)'}
50
- # map_types = {'rank': 'number', 'type': 'str', 'Model': 'markdown', 'open?': 'str', 'avg_sea': 'number', 'en': 'number', 'zh': 'number', 'id': 'number', 'th': 'number', 'vi': 'number', 'avg': 'number', 'params': 'number'}
51
- # Searching and filtering
52
- def update_table(
53
- hidden_df: pd.DataFrame,
54
- # columns: list,
55
- type_query: list,
56
- open_query: list,
57
- # precision_query: str,
58
- # size_query: list,
59
- # show_deleted: bool,
60
- query: str,
61
- ):
62
- # filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
63
- # filtered_df = filter_queries(query, filtered_df)
64
- # df = select_columns(filtered_df, columns)
65
- filtered_df = hidden_df.copy()
66
-
67
- filtered_df = filtered_df[filtered_df['type'].isin(type_query)]
68
- map_open = {'open': 'Y', 'closed': 'N'}
69
- filtered_df = filtered_df[filtered_df['open?'].isin([map_open[o] for o in open_query])]
70
- filtered_df = filter_queries(query, filtered_df)
71
- # filtered_df = filtered_df[[map_columns[k] for k in columns]]
72
- # deduplication
73
- # df = df.drop_duplicates(subset=["Model"])
74
- df = filtered_df.drop_duplicates()
75
- df = df[show_columns]
76
- return df
77
-
78
- def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
79
- return df[(df['Model'].str.contains(query, case=False))]
80
-
81
- def filter_queries(query: str, filtered_df: pd.DataFrame) -> pd.DataFrame:
82
- final_df = []
83
- if query != "":
84
- queries = [q.strip() for q in query.split(";")]
85
- for _q in queries:
86
- _q = _q.strip()
87
- if _q != "":
88
- temp_filtered_df = search_table(filtered_df, _q)
89
- if len(temp_filtered_df) > 0:
90
- final_df.append(temp_filtered_df)
91
- if len(final_df) > 0:
92
- filtered_df = pd.concat(final_df)
93
-
94
- return filtered_df
95
 
96
  demo = gr.Blocks(css=custom_css)
97
  with demo:
@@ -100,222 +48,68 @@ with demo:
100
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
101
 
102
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
103
- with gr.TabItem("🏅 Overall", elem_id="llm-benchmark-Sum", id=0):
104
- with gr.Row():
105
- with gr.Column():
106
- with gr.Row():
107
- search_bar = gr.Textbox(
108
- placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...",
109
- show_label=False,
110
- elem_id="search-bar",
111
- )
112
- # with gr.Row():
113
- # with gr.Column():
114
- # shown_columns = gr.CheckboxGroup(
115
- # choices=["rank","type", "Model","open?", "avg_sea", "en", "zh", "id", "th", "vi", "avg", "params"],
116
- # value=["rank", "type", "Model", "avg_sea", "en", "zh", "id", "th", "vi", "avg", "params"],
117
- # label="Select model types to show",
118
- # elem_id="column-select",
119
- # interactive=True,
120
- # )
121
-
122
- # with gr.Row():
123
- with gr.Column():
124
- type_query = gr.CheckboxGroup(
125
- choices=["🟢 base", "🔶 chat"],
126
- value=["🔶 chat" ],
127
- label="model types to show",
128
- elem_id="type-select",
129
- interactive=True,
130
- )
131
- with gr.Column():
132
- open_query = gr.CheckboxGroup(
133
- choices=["open", "closed"],
134
- value=["open", "closed"],
135
- label="open-source or closed-source models?",
136
- elem_id="open-select",
137
- interactive=True,
138
- )
139
-
140
- leaderboard_table = gr.components.Dataframe(
141
- value=df_avg_init,
142
- # [[map_columns[k] for k in shown_columns.value]],
143
- # value=leaderboard_df[
144
- # [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
145
- # + shown_columns.value
146
- # + [AutoEvalColumn.dummy.name]
147
- # ],
148
- # headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
149
- datatype=TYPES,
150
- elem_id="leaderboard-table",
151
- interactive=False,
152
- # datatype=['number', 'str', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number'],
153
- # datatype=[map_types[k] for k in shown_columns.value],
154
- visible=True,
155
- # column_widths=["20%", "6%", "8%", "6%", "8%", "8%", "6%", "6%", "6%", "6%", "6%"],
156
- )
157
-
158
- hidden_leaderboard_table_for_search = gr.components.Dataframe(
159
- value=df_avg,
160
- # elem_id="leaderboard-table",
161
- interactive=False,
162
- visible=False,
163
- )
164
-
165
- search_bar.submit(
166
- update_table,
167
- [
168
- # df_avg,
169
- hidden_leaderboard_table_for_search,
170
- # shown_columns,
171
- type_query,
172
- open_query,
173
- # filter_columns_type,
174
- # filter_columns_precision,
175
- # filter_columns_size,
176
- # deleted_models_visibility,
177
- search_bar,
178
  ],
179
- leaderboard_table,
180
- )
181
- for selector in [type_query, open_query]:
182
- selector.change(
183
- update_table,
184
- [
185
- # df_avg,
186
- hidden_leaderboard_table_for_search,
187
- # shown_columns,
188
- type_query,
189
- open_query,
190
- # filter_columns_type,
191
- # filter_columns_precision,
192
- # filter_columns_size,
193
- # deleted_models_visibility,
194
- search_bar,
195
- ],
196
- leaderboard_table,
197
- )
198
- with gr.TabItem("M3Exam", elem_id="llm-benchmark-M3Exam", id=1):
199
- with gr.Row():
200
- with gr.Column():
201
- search_bar = gr.Textbox(
202
- placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...",
203
- show_label=False,
204
- elem_id="search-bar",
205
- )
206
- with gr.Column():
207
- type_query = gr.CheckboxGroup(
208
- choices=["🟢 base", "🔶 chat"],
209
- value=["🔶 chat" ],
210
- label="model types to show",
211
- elem_id="type-select",
212
- interactive=True,
213
- )
214
- with gr.Column():
215
- open_query = gr.CheckboxGroup(
216
- choices=["open", "closed"],
217
- value=["open", "closed"],
218
- label="open-source or closed-source models?",
219
- elem_id="open-select",
220
- interactive=True,
221
- )
222
-
223
- leaderboard_table = gr.components.Dataframe(
224
- value=df_m3exam_init,
225
- interactive=False,
226
- visible=True,
227
- # datatype=['number', 'str', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number'],
228
  datatype=TYPES,
 
229
  )
230
-
231
- hidden_leaderboard_table_for_search = gr.components.Dataframe(
232
- value=df_m3exam,
233
- interactive=False,
234
- visible=False,
235
- )
236
-
237
- search_bar.submit(
238
- update_table,
239
- [
240
- hidden_leaderboard_table_for_search,
241
- type_query,
242
- open_query,
243
- search_bar,
 
 
 
244
  ],
245
- leaderboard_table,
246
- )
247
- for selector in [type_query, open_query]:
248
- selector.change(
249
- update_table,
250
- [
251
- hidden_leaderboard_table_for_search,
252
- type_query,
253
- open_query,
254
- search_bar,
255
- ],
256
- leaderboard_table,
257
- )
258
-
259
- with gr.TabItem("MMLU", elem_id="llm-benchmark-MMLU", id=2):
260
- with gr.Row():
261
- with gr.Column():
262
- search_bar = gr.Textbox(
263
- placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...",
264
- show_label=False,
265
- elem_id="search-bar",
266
- )
267
- with gr.Column():
268
- type_query = gr.CheckboxGroup(
269
- choices=["🟢 base", "🔶 chat"],
270
- value=["🔶 chat" ],
271
- label="model types to show",
272
- elem_id="type-select",
273
- interactive=True,
274
- )
275
- with gr.Column():
276
- open_query = gr.CheckboxGroup(
277
- choices=["open", "closed"],
278
- value=["open", "closed"],
279
- label="open-source or closed-source models?",
280
- elem_id="open-select",
281
- interactive=True,
282
- )
283
-
284
- leaderboard_table = gr.components.Dataframe(
285
- value=df_mmlu_init,
286
- interactive=False,
287
- visible=True,
288
- # datatype=['number', 'str', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number'],
289
  datatype=TYPES,
 
290
  )
291
 
292
- hidden_leaderboard_table_for_search = gr.components.Dataframe(
293
- value=df_mmlu,
294
- interactive=False,
295
- visible=False,
296
- )
297
-
298
- search_bar.submit(
299
- update_table,
300
- [
301
- hidden_leaderboard_table_for_search,
302
- type_query,
303
- open_query,
304
- search_bar,
 
 
 
305
  ],
306
- leaderboard_table,
 
307
  )
308
- for selector in [type_query, open_query]:
309
- selector.change(
310
- update_table,
311
- [
312
- hidden_leaderboard_table_for_search,
313
- type_query,
314
- open_query,
315
- search_bar,
316
- ],
317
- leaderboard_table,
318
- )
319
 
320
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=3):
321
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
 
3
  import os
4
  from huggingface_hub import snapshot_download, login
5
  from apscheduler.schedulers.background import BackgroundScheduler
6
+ from gradio_leaderboard import Leaderboard, SelectColumns, ColumnFilter
7
 
8
  from src.display.about import (
9
  CITATION_BUTTON_LABEL,
 
40
  # Load the data from the csv file
41
  csv_path = f'{EVAL_RESULTS_PATH}/SeaExam_results_20240808.csv'
42
  df_m3exam, df_mmlu, df_avg = load_data(csv_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  demo = gr.Blocks(css=custom_css)
45
  with demo:
 
48
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
49
 
50
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
51
+ with gr.Tab("🏅 Overall"):
52
+ Leaderboard(
53
+ value=df_avg[show_columns],
54
+ select_columns=SelectColumns(
55
+ default_selection=show_columns,
56
+ cant_deselect=["R", "Model"],
57
+ label="Select Columns to Display:",
58
+ ),
59
+ search_columns=["Model"],
60
+ # hide_columns=["model_name_for_query", "Model Size"],
61
+ filter_columns=[
62
+ "type",
63
+ "open?",
64
+ # ColumnFilter("MOE", type="boolean", default=False, label="MoE"),
65
+ # ColumnFilter("Flagged", type="boolean", default=False),
66
+ ColumnFilter("params(B)", default=[7, 10]),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  datatype=TYPES,
69
+ # column_widths=["2%", "33%"],
70
  )
71
+
72
+ with gr.Tab("M3Exam"):
73
+ Leaderboard(
74
+ value=df_m3exam[show_columns],
75
+ select_columns=SelectColumns(
76
+ default_selection=show_columns,
77
+ cant_deselect=["R", "Model"],
78
+ label="Select Columns to Display:",
79
+ ),
80
+ search_columns=["Model"],
81
+ # hide_columns=["model_name_for_query", "Model Size"],
82
+ filter_columns=[
83
+ "type",
84
+ "open?",
85
+ # ColumnFilter("MOE", type="boolean", default=False, label="MoE"),
86
+ # ColumnFilter("Flagged", type="boolean", default=False),
87
+ ColumnFilter("params(B)", default=[7, 10]),
88
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  datatype=TYPES,
90
+ # column_widths=["2%", "33%"],
91
  )
92
 
93
+ with gr.Tab("MMLU"):
94
+ Leaderboard(
95
+ value=df_mmlu[show_columns],
96
+ select_columns=SelectColumns(
97
+ default_selection=show_columns,
98
+ cant_deselect=["R", "Model"],
99
+ label="Select Columns to Display:",
100
+ ),
101
+ search_columns=["Model"],
102
+ # hide_columns=["model_name_for_query", "Model Size"],
103
+ filter_columns=[
104
+ "type",
105
+ "open?",
106
+ # ColumnFilter("MOE", type="boolean", default=False, label="MoE"),
107
+ # ColumnFilter("Flagged", type="boolean", default=False),
108
+ ColumnFilter("params(B)", default=[7, 10]),
109
  ],
110
+ datatype=TYPES,
111
+ # column_widths=["2%", "33%"],
112
  )
 
 
 
 
 
 
 
 
 
 
 
113
 
114
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=3):
115
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
requirements.txt CHANGED
@@ -3,6 +3,7 @@ black==23.11.0
3
  click==8.1.3
4
  datasets==2.14.5
5
  gradio==4.4.0
 
6
  gradio_client==0.7.0
7
  huggingface-hub>=0.18.0
8
  matplotlib==3.7.1
 
3
  click==8.1.3
4
  datasets==2.14.5
5
  gradio==4.4.0
6
+ gradio-leaderboard==0.0.11
7
  gradio_client==0.7.0
8
  huggingface-hub>=0.18.0
9
  matplotlib==3.7.1
src/leaderboard/load_results.py CHANGED
@@ -28,7 +28,8 @@ def make_clickable_model(model_name, link=None):
28
  if len(model_name.split("/")) == 2:
29
  link = "https://huggingface.co/" + model_name
30
  return (
31
- f'<a target="_blank" style="text-decoration: underline" href="{link}">{model_name.split("/")[-1]}</a>'
 
32
  )
33
  return model_name
34
 
 
28
  if len(model_name.split("/")) == 2:
29
  link = "https://huggingface.co/" + model_name
30
  return (
31
+ # f'<a target="_blank" style="text-decoration: underline" href="{link}">{model_name.split("/")[-1]}</a>'
32
+ f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name.split("/")[-1]}</a>'
33
  )
34
  return model_name
35