Spaces:
Runtime error
Runtime error
refactor: refactoring the dashboard codes
Browse files- app.py +30 -136
- src/display/gradio_formatting.py +92 -0
- src/display/utils.py +1 -1
app.py
CHANGED
@@ -15,13 +15,13 @@ from src.display.utils import COL_NAME_IS_ANONYMOUS, COL_NAME_REVISION, COL_NAME
|
|
15 |
from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
|
16 |
from src.read_evals import get_raw_eval_results, get_leaderboard_df
|
17 |
from src.utils import update_table, update_metric, update_table_long_doc, upload_file, get_default_cols, submit_results, clear_reranking_selections
|
|
|
18 |
|
19 |
|
20 |
def restart_space():
|
21 |
API.restart_space(repo_id=REPO_ID)
|
22 |
|
23 |
|
24 |
-
|
25 |
try:
|
26 |
snapshot_download(
|
27 |
repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30,
|
@@ -54,6 +54,9 @@ shown_columns_long_doc, types_long_doc = get_default_cols(
|
|
54 |
leaderboard_df_long_doc = leaderboard_df_long_doc[~leaderboard_df_long_doc[COL_NAME_IS_ANONYMOUS]][shown_columns_long_doc]
|
55 |
leaderboard_df_long_doc.drop([COL_NAME_REVISION, COL_NAME_TIMESTAMP], axis=1, inplace=True)
|
56 |
|
|
|
|
|
|
|
57 |
|
58 |
def update_metric_qa(
|
59 |
metric: str,
|
@@ -90,90 +93,33 @@ with demo:
|
|
90 |
with gr.Column():
|
91 |
# search retrieval models
|
92 |
with gr.Row():
|
93 |
-
selected_version =
|
94 |
-
choices=["AIR-Bench_24.04",],
|
95 |
-
value="AIR-Bench_24.04",
|
96 |
-
label="Select the version of AIR-Bench",
|
97 |
-
interactive = True
|
98 |
-
)
|
99 |
with gr.Row():
|
100 |
-
search_bar =
|
101 |
-
placeholder=" 🔍 Search for retrieval methods (separate multiple queries with `;`) and press ENTER...",
|
102 |
-
show_label=False,
|
103 |
-
elem_id="search-bar",
|
104 |
-
info="Search the retrieval methods"
|
105 |
-
)
|
106 |
-
# select reranking model
|
107 |
-
reranking_models = sorted(list(frozenset([eval_result.reranking_model for eval_result in raw_data])))
|
108 |
with gr.Row():
|
109 |
-
selected_rerankings =
|
110 |
-
choices=reranking_models,
|
111 |
-
# value=reranking_models,
|
112 |
-
label="Select the reranking models",
|
113 |
-
elem_id="reranking-select",
|
114 |
-
interactive=True,
|
115 |
-
multiselect=True
|
116 |
-
)
|
117 |
with gr.Row():
|
118 |
-
select_noreranker_only_btn =
|
119 |
-
value="Only show results without ranking models",
|
120 |
-
)
|
121 |
|
122 |
with gr.Column(min_width=320):
|
123 |
# select the metric
|
124 |
-
selected_metric =
|
125 |
-
choices=METRIC_LIST,
|
126 |
-
value=DEFAULT_METRIC,
|
127 |
-
label="Select the metric",
|
128 |
-
interactive=True,
|
129 |
-
elem_id="metric-select",
|
130 |
-
)
|
131 |
# select domain
|
132 |
with gr.Row():
|
133 |
-
selected_domains =
|
134 |
-
choices=DOMAIN_COLS_QA,
|
135 |
-
value=DOMAIN_COLS_QA,
|
136 |
-
label="Select the domains",
|
137 |
-
elem_id="domain-column-select",
|
138 |
-
interactive=True,
|
139 |
-
)
|
140 |
# select language
|
141 |
with gr.Row():
|
142 |
-
selected_langs =
|
143 |
-
choices=LANG_COLS_QA,
|
144 |
-
value=LANG_COLS_QA,
|
145 |
-
label="Select the languages",
|
146 |
-
elem_id="language-column-select",
|
147 |
-
multiselect=True,
|
148 |
-
interactive=True
|
149 |
-
)
|
150 |
with gr.Row():
|
151 |
-
show_anonymous =
|
152 |
-
label="Show anonymous submissions",
|
153 |
-
value=False,
|
154 |
-
info="The anonymous submissions might have invalid model information."
|
155 |
-
)
|
156 |
with gr.Row():
|
157 |
-
show_revision_and_timestamp =
|
158 |
-
label="Show submission details",
|
159 |
-
value=False,
|
160 |
-
info="Show the revision and timestamp information of submissions"
|
161 |
-
)
|
162 |
|
163 |
-
|
164 |
-
|
165 |
-
datatype=types_qa,
|
166 |
-
elem_id="leaderboard-table",
|
167 |
-
interactive=False,
|
168 |
-
visible=True,
|
169 |
-
)
|
170 |
|
171 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
172 |
-
hidden_leaderboard_table_for_search =
|
173 |
-
value=original_df_qa,
|
174 |
-
datatype=types_qa,
|
175 |
-
visible=False,
|
176 |
-
)
|
177 |
|
178 |
# Set search_bar listener
|
179 |
search_bar.submit(
|
@@ -232,90 +178,38 @@ with demo:
|
|
232 |
with gr.Row():
|
233 |
with gr.Column():
|
234 |
with gr.Row():
|
235 |
-
selected_version =
|
236 |
-
choices=["AIR-Bench_24.04",],
|
237 |
-
value="AIR-Bench_24.04",
|
238 |
-
label="Select the version of AIR-Bench",
|
239 |
-
interactive=True
|
240 |
-
)
|
241 |
with gr.Row():
|
242 |
-
search_bar =
|
243 |
-
info="Search the retrieval methods",
|
244 |
-
placeholder=" 🔍 Search for retrieval methods (separate multiple queries with `;`)"
|
245 |
-
" and press ENTER...",
|
246 |
-
show_label=False,
|
247 |
-
elem_id="search-bar-long-doc",
|
248 |
-
)
|
249 |
# select reranking model
|
250 |
-
reranking_models = list(frozenset([eval_result.reranking_model for eval_result in raw_data]))
|
251 |
with gr.Row():
|
252 |
-
selected_rerankings =
|
253 |
-
choices=reranking_models,
|
254 |
-
# value=reranking_models,
|
255 |
-
label="Select the reranking models",
|
256 |
-
elem_id="reranking-select-long-doc",
|
257 |
-
interactive=True,
|
258 |
-
multiselect=True,
|
259 |
-
)
|
260 |
with gr.Row():
|
261 |
-
select_noreranker_only_btn =
|
262 |
-
value="Only show results without ranking models",
|
263 |
-
)
|
264 |
with gr.Column(min_width=320):
|
265 |
# select the metric
|
266 |
with gr.Row():
|
267 |
-
selected_metric =
|
268 |
-
choices=METRIC_LIST,
|
269 |
-
value=DEFAULT_METRIC,
|
270 |
-
label="Select the metric",
|
271 |
-
interactive=True,
|
272 |
-
elem_id="metric-select-long-doc",
|
273 |
-
)
|
274 |
# select domain
|
275 |
with gr.Row():
|
276 |
-
selected_domains =
|
277 |
-
choices=DOMAIN_COLS_LONG_DOC,
|
278 |
-
value=DOMAIN_COLS_LONG_DOC,
|
279 |
-
label="Select the domains",
|
280 |
-
elem_id="domain-column-select-long-doc",
|
281 |
-
interactive=True,
|
282 |
-
)
|
283 |
# select language
|
284 |
with gr.Row():
|
285 |
-
selected_langs =
|
286 |
-
|
287 |
-
value=LANG_COLS_LONG_DOC,
|
288 |
-
label="Select the languages",
|
289 |
-
elem_id="language-column-select-long-doc",
|
290 |
-
multiselect=True,
|
291 |
-
interactive=True
|
292 |
)
|
293 |
with gr.Row():
|
294 |
-
show_anonymous =
|
295 |
-
label="Show anonymous submissions",
|
296 |
-
value=False,
|
297 |
-
info="The anonymous submissions might have invalid model information."
|
298 |
-
)
|
299 |
with gr.Row():
|
300 |
-
show_revision_and_timestamp =
|
301 |
-
label="Show submission details",
|
302 |
-
value=False,
|
303 |
-
info="Show the revision and timestamp information of submissions"
|
304 |
-
)
|
305 |
|
306 |
-
leaderboard_table_long_doc =
|
307 |
-
|
308 |
-
datatype=types_long_doc,
|
309 |
-
elem_id="leaderboard-table-long-doc",
|
310 |
-
interactive=False,
|
311 |
-
visible=True,
|
312 |
)
|
313 |
|
314 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
315 |
-
hidden_leaderboard_table_for_search =
|
316 |
-
|
317 |
-
datatype=types_long_doc,
|
318 |
-
visible=False,
|
319 |
)
|
320 |
|
321 |
# Set search_bar listener
|
|
|
15 |
from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
|
16 |
from src.read_evals import get_raw_eval_results, get_leaderboard_df
|
17 |
from src.utils import update_table, update_metric, update_table_long_doc, upload_file, get_default_cols, submit_results, clear_reranking_selections
|
18 |
+
from src.display.gradio_formatting import get_version_dropdown, get_search_bar, get_reranking_dropdown, get_noreranker_button, get_metric_dropdown, get_domain_dropdown, get_language_dropdown, get_anonymous_checkbox, get_revision_and_ts_checkbox, get_leaderboard_table
|
19 |
|
20 |
|
21 |
def restart_space():
|
22 |
API.restart_space(repo_id=REPO_ID)
|
23 |
|
24 |
|
|
|
25 |
try:
|
26 |
snapshot_download(
|
27 |
repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30,
|
|
|
54 |
leaderboard_df_long_doc = leaderboard_df_long_doc[~leaderboard_df_long_doc[COL_NAME_IS_ANONYMOUS]][shown_columns_long_doc]
|
55 |
leaderboard_df_long_doc.drop([COL_NAME_REVISION, COL_NAME_TIMESTAMP], axis=1, inplace=True)
|
56 |
|
57 |
+
# select reranking model
|
58 |
+
reranking_models = sorted(list(frozenset([eval_result.reranking_model for eval_result in raw_data])))
|
59 |
+
|
60 |
|
61 |
def update_metric_qa(
|
62 |
metric: str,
|
|
|
93 |
with gr.Column():
|
94 |
# search retrieval models
|
95 |
with gr.Row():
|
96 |
+
selected_version = get_version_dropdown()
|
|
|
|
|
|
|
|
|
|
|
97 |
with gr.Row():
|
98 |
+
search_bar = get_search_bar()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
with gr.Row():
|
100 |
+
selected_rerankings = get_reranking_dropdown(reranking_models)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
with gr.Row():
|
102 |
+
select_noreranker_only_btn = get_noreranker_button()
|
|
|
|
|
103 |
|
104 |
with gr.Column(min_width=320):
|
105 |
# select the metric
|
106 |
+
selected_metric = get_metric_dropdown(METRIC_LIST, DEFAULT_METRIC)
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
# select domain
|
108 |
with gr.Row():
|
109 |
+
selected_domains = get_domain_dropdown(DOMAIN_COLS_QA, DOMAIN_COLS_QA)
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
# select language
|
111 |
with gr.Row():
|
112 |
+
selected_langs = get_language_dropdown(LANG_COLS_QA, LANG_COLS_QA)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
with gr.Row():
|
114 |
+
show_anonymous = get_anonymous_checkbox()
|
|
|
|
|
|
|
|
|
115 |
with gr.Row():
|
116 |
+
show_revision_and_timestamp = get_revision_and_ts_checkbox()
|
|
|
|
|
|
|
|
|
117 |
|
118 |
+
|
119 |
+
leaderboard_table = get_leaderboard_table(leaderboard_df_qa, types_qa)
|
|
|
|
|
|
|
|
|
|
|
120 |
|
121 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
122 |
+
hidden_leaderboard_table_for_search = get_leaderboard_table(original_df_qa, types_qa, visible=False)
|
|
|
|
|
|
|
|
|
123 |
|
124 |
# Set search_bar listener
|
125 |
search_bar.submit(
|
|
|
178 |
with gr.Row():
|
179 |
with gr.Column():
|
180 |
with gr.Row():
|
181 |
+
selected_version = get_version_dropdown()
|
|
|
|
|
|
|
|
|
|
|
182 |
with gr.Row():
|
183 |
+
search_bar = get_search_bar()
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
# select reranking model
|
|
|
185 |
with gr.Row():
|
186 |
+
selected_rerankings = get_reranking_dropdown(reranking_models)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
with gr.Row():
|
188 |
+
select_noreranker_only_btn = get_noreranker_button()
|
|
|
|
|
189 |
with gr.Column(min_width=320):
|
190 |
# select the metric
|
191 |
with gr.Row():
|
192 |
+
selected_metric = get_metric_dropdown(METRIC_LIST, DEFAULT_METRIC)
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
# select domain
|
194 |
with gr.Row():
|
195 |
+
selected_domains = get_domain_dropdown(DOMAIN_COLS_LONG_DOC, DOMAIN_COLS_LONG_DOC)
|
|
|
|
|
|
|
|
|
|
|
|
|
196 |
# select language
|
197 |
with gr.Row():
|
198 |
+
selected_langs = get_language_dropdown(
|
199 |
+
LANG_COLS_LONG_DOC, LANG_COLS_LONG_DOC
|
|
|
|
|
|
|
|
|
|
|
200 |
)
|
201 |
with gr.Row():
|
202 |
+
show_anonymous = get_anonymous_checkbox()
|
|
|
|
|
|
|
|
|
203 |
with gr.Row():
|
204 |
+
show_revision_and_timestamp = get_revision_and_ts_checkbox()
|
|
|
|
|
|
|
|
|
205 |
|
206 |
+
leaderboard_table_long_doc = get_leaderboard_table(
|
207 |
+
leaderboard_df_long_doc, types_long_doc
|
|
|
|
|
|
|
|
|
208 |
)
|
209 |
|
210 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
211 |
+
hidden_leaderboard_table_for_search =get_leaderboard_table(
|
212 |
+
original_df_long_doc, types_long_doc, visible=False
|
|
|
|
|
213 |
)
|
214 |
|
215 |
# Set search_bar listener
|
src/display/gradio_formatting.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
|
4 |
+
def get_version_dropdown():
|
5 |
+
return gr.Dropdown(
|
6 |
+
choices=["AIR-Bench_24.04", ],
|
7 |
+
value="AIR-Bench_24.04",
|
8 |
+
label="Select the version of AIR-Bench",
|
9 |
+
interactive=True
|
10 |
+
)
|
11 |
+
|
12 |
+
|
13 |
+
def get_search_bar():
|
14 |
+
return gr.Textbox(
|
15 |
+
placeholder=" 🔍 Search for retrieval methods (separate multiple queries with `;`) and press ENTER...",
|
16 |
+
show_label=False,
|
17 |
+
# elem_id="search-bar",
|
18 |
+
info="Search the retrieval methods"
|
19 |
+
)
|
20 |
+
|
21 |
+
|
22 |
+
def get_reranking_dropdown(model_list):
|
23 |
+
return gr.Dropdown(
|
24 |
+
choices=model_list,
|
25 |
+
label="Select the reranking models",
|
26 |
+
# elem_id="reranking-select",
|
27 |
+
interactive=True,
|
28 |
+
multiselect=True
|
29 |
+
)
|
30 |
+
|
31 |
+
|
32 |
+
def get_noreranker_button():
|
33 |
+
return gr.Button(
|
34 |
+
value="Only show results without ranking models",
|
35 |
+
)
|
36 |
+
|
37 |
+
|
38 |
+
def get_metric_dropdown(metric_list, default_metrics):
|
39 |
+
return gr.Dropdown(
|
40 |
+
choices=metric_list,
|
41 |
+
value=default_metrics,
|
42 |
+
label="Select the metric",
|
43 |
+
interactive=True,
|
44 |
+
# elem_id="metric-select-long-doc",
|
45 |
+
)
|
46 |
+
|
47 |
+
|
48 |
+
def get_domain_dropdown(domain_list, default_domains):
|
49 |
+
return gr.CheckboxGroup(
|
50 |
+
choices=domain_list,
|
51 |
+
value=default_domains,
|
52 |
+
label="Select the domains",
|
53 |
+
# elem_id="domain-column-select",
|
54 |
+
interactive=True,
|
55 |
+
)
|
56 |
+
|
57 |
+
|
58 |
+
def get_language_dropdown(language_list, default_languages):
|
59 |
+
return gr.Dropdown(
|
60 |
+
choices=language_list,
|
61 |
+
value=language_list,
|
62 |
+
label="Select the languages",
|
63 |
+
# elem_id="language-column-select",
|
64 |
+
multiselect=True,
|
65 |
+
interactive=True
|
66 |
+
)
|
67 |
+
|
68 |
+
|
69 |
+
def get_anonymous_checkbox():
|
70 |
+
return gr.Checkbox(
|
71 |
+
label="Show anonymous submissions",
|
72 |
+
value=False,
|
73 |
+
info="The anonymous submissions might have invalid model information."
|
74 |
+
)
|
75 |
+
|
76 |
+
|
77 |
+
def get_revision_and_ts_checkbox():
|
78 |
+
return gr.Checkbox(
|
79 |
+
label="Show submission details",
|
80 |
+
value=False,
|
81 |
+
info="Show the revision and timestamp information of submissions"
|
82 |
+
)
|
83 |
+
|
84 |
+
|
85 |
+
def get_leaderboard_table(df, datatype, visible=True):
|
86 |
+
return gr.components.Dataframe(
|
87 |
+
value=df,
|
88 |
+
datatype=datatype,
|
89 |
+
elem_id="leaderboard-table",
|
90 |
+
interactive=False,
|
91 |
+
visible=visible,
|
92 |
+
)
|
src/display/utils.py
CHANGED
@@ -90,4 +90,4 @@ COLS_LITE = [c.name for c in fields(AutoEvalColumnQA) if c.displayed_by_default
|
|
90 |
|
91 |
QA_BENCHMARK_COLS = [t.value.col_name for t in BenchmarksQA]
|
92 |
|
93 |
-
LONG_DOC_BENCHMARK_COLS = [t.value.col_name for t in BenchmarksLongDoc]
|
|
|
90 |
|
91 |
QA_BENCHMARK_COLS = [t.value.col_name for t in BenchmarksQA]
|
92 |
|
93 |
+
LONG_DOC_BENCHMARK_COLS = [t.value.col_name for t in BenchmarksLongDoc]
|