Spaces:
AIR-Bench
/
Running on CPU Upgrade

nan commited on
Commit
5b11066
·
unverified ·
2 Parent(s): 5401ba4 bb09125

Merge pull request #1 from AIR-Bench/fix-longdoc-noreranking-bug-1212

Browse files
Files changed (1) hide show
  1. app.py +141 -85
app.py CHANGED
@@ -108,28 +108,125 @@ def update_doc_metric(
108
  )
109
 
110
 
111
- def update_qa_version(version):
112
  global datastore
113
  global ds_dict
114
- datastore = ds_dict[version]
 
 
 
 
 
 
 
 
 
115
  domain_elem = get_domain_dropdown(QABenchmarks[datastore.slug])
116
- lang_elem = get_language_dropdown(QABenchmarks[datastore.slug])
117
- model_elem = get_reranking_dropdown(datastore.reranking_models)
118
- df_elem = get_leaderboard_table(datastore.qa_fmt_df, datastore.qa_types)
119
- hidden_df_elem = get_leaderboard_table(datastore.qa_raw_df, datastore.qa_types, visible=False)
120
- return domain_elem, lang_elem, model_elem, df_elem, hidden_df_elem
121
 
122
 
123
- def update_doc_version(version):
124
- global datastore
125
- global ds_dict
126
- datastore = ds_dict[version]
127
  domain_elem = get_domain_dropdown(LongDocBenchmarks[datastore.slug])
 
 
 
 
 
 
 
 
 
 
 
128
  lang_elem = get_language_dropdown(LongDocBenchmarks[datastore.slug])
 
 
 
 
 
129
  model_elem = get_reranking_dropdown(datastore.reranking_models)
130
- df_elem = get_leaderboard_table(datastore.doc_fmt_df, datastore.doc_types)
131
- hidden_df_elem = get_leaderboard_table(datastore.doc_raw_df, datastore.doc_types, visible=False)
132
- return domain_elem, lang_elem, model_elem, df_elem, hidden_df_elem
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
 
135
  demo = gr.Blocks(css=custom_css)
@@ -151,9 +248,11 @@ with demo:
151
  # select domain
152
  with gr.Row():
153
  domains = get_domain_dropdown(QABenchmarks[datastore.slug])
 
154
  # select language
155
  with gr.Row():
156
  langs = get_language_dropdown(QABenchmarks[datastore.slug])
 
157
  with gr.Column():
158
  # select the metric
159
  metric = get_metric_dropdown(METRIC_LIST, DEFAULT_METRIC_QA)
@@ -170,18 +269,15 @@ with demo:
170
  # select reranking models
171
  with gr.Column():
172
  models = get_reranking_dropdown(datastore.reranking_models)
 
173
  # shown_table
174
  qa_df_elem_ret_rerank = get_leaderboard_table(datastore.qa_fmt_df, datastore.qa_types)
 
175
  # Dummy leaderboard for handling the case when the user uses backspace key
176
  qa_df_elem_ret_rerank_hidden = get_leaderboard_table(
177
  datastore.qa_raw_df, datastore.qa_types, visible=False
178
  )
179
-
180
- version.change(
181
- update_qa_version,
182
- version,
183
- [domains, langs, models, qa_df_elem_ret_rerank, qa_df_elem_ret_rerank_hidden],
184
- )
185
 
186
  set_listeners(
187
  TaskType.qa,
@@ -210,31 +306,17 @@ with demo:
210
  search_bar_ret = get_search_bar()
211
  with gr.Column(scale=1):
212
  models_ret = get_noreranking_dropdown()
213
-
214
- _qa_df_ret = datastore.qa_fmt_df[datastore.qa_fmt_df[COL_NAME_RERANKING_MODEL] == "NoReranker"]
215
- _qa_df_ret = reset_rank(_qa_df_ret)
216
  qa_df_elem_ret = get_leaderboard_table(_qa_df_ret, datastore.qa_types)
 
217
 
218
  # Dummy leaderboard for handling the case when the user uses backspace key
219
- _qa_df_ret_hidden = datastore.qa_raw_df[
220
- datastore.qa_raw_df[COL_NAME_RERANKING_MODEL] == "NoReranker"
221
- ]
222
- _qa_df_ret_hidden = reset_rank(_qa_df_ret_hidden)
223
  qa_df_elem_ret_hidden = get_leaderboard_table(
224
  _qa_df_ret_hidden, datastore.qa_types, visible=False
225
  )
226
-
227
- version.change(
228
- update_qa_version,
229
- version,
230
- [
231
- domains,
232
- langs,
233
- models_ret,
234
- qa_df_elem_ret,
235
- qa_df_elem_ret_hidden,
236
- ],
237
- )
238
 
239
  set_listeners(
240
  TaskType.qa,
@@ -265,29 +347,22 @@ with demo:
265
  )
266
 
267
  with gr.TabItem("Reranking Only", id=12):
268
- _qa_df_rerank = datastore.qa_fmt_df[datastore.qa_fmt_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK]
269
- _qa_df_rerank = reset_rank(_qa_df_rerank)
270
  qa_rerank_models = _qa_df_rerank[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
271
  with gr.Row():
272
  with gr.Column(scale=1):
273
  qa_models_rerank = get_reranking_dropdown(qa_rerank_models)
 
274
  with gr.Column(scale=1):
275
  qa_search_bar_rerank = gr.Textbox(show_label=False, visible=False)
276
  qa_df_elem_rerank = get_leaderboard_table(_qa_df_rerank, datastore.qa_types)
 
277
 
278
- _qa_df_rerank_hidden = datastore.qa_raw_df[
279
- datastore.qa_raw_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
280
- ]
281
- _qa_df_rerank_hidden = reset_rank(_qa_df_rerank_hidden)
282
  qa_df_elem_rerank_hidden = get_leaderboard_table(
283
  _qa_df_rerank_hidden, datastore.qa_types, visible=False
284
  )
285
-
286
- version.change(
287
- update_qa_version,
288
- version,
289
- [domains, langs, qa_models_rerank, qa_df_elem_rerank, qa_df_elem_rerank_hidden],
290
- )
291
 
292
  set_listeners(
293
  TaskType.qa,
@@ -322,9 +397,11 @@ with demo:
322
  # select domain
323
  with gr.Row():
324
  domains = get_domain_dropdown(LongDocBenchmarks[datastore.slug])
 
325
  # select language
326
  with gr.Row():
327
  langs = get_language_dropdown(LongDocBenchmarks[datastore.slug])
 
328
  with gr.Column():
329
  # select the metric
330
  with gr.Row():
@@ -340,19 +417,17 @@ with demo:
340
  search_bar = get_search_bar()
341
  with gr.Column():
342
  models = get_reranking_dropdown(datastore.reranking_models)
 
343
 
344
  doc_df_elem_ret_rerank = get_leaderboard_table(datastore.doc_fmt_df, datastore.doc_types)
345
 
346
- # Dummy leaderboard for handling the case when the user uses backspace key
 
347
  doc_df_elem_ret_rerank_hidden = get_leaderboard_table(
348
  datastore.doc_raw_df, datastore.doc_types, visible=False
349
  )
350
 
351
- version.change(
352
- update_doc_version,
353
- version,
354
- [domains, langs, models, doc_df_elem_ret_rerank, doc_df_elem_ret_rerank_hidden],
355
- )
356
 
357
  set_listeners(
358
  TaskType.long_doc,
@@ -388,26 +463,15 @@ with demo:
388
  search_bar_ret = get_search_bar()
389
  with gr.Column(scale=1):
390
  models_ret = get_noreranking_dropdown()
391
-
392
- _doc_df_ret = datastore.doc_fmt_df[
393
- datastore.doc_fmt_df[COL_NAME_RERANKING_MODEL] == "NoReranker"
394
- ]
395
- _doc_df_ret = reset_rank(_doc_df_ret)
396
  doc_df_elem_ret = get_leaderboard_table(_doc_df_ret, datastore.doc_types)
 
397
 
398
- _doc_df_ret_hidden = datastore.doc_raw_df[
399
- datastore.doc_raw_df[COL_NAME_RERANKING_MODEL] == "NoReranker"
400
- ]
401
- _doc_df_ret_hidden = reset_rank(_doc_df_ret_hidden)
402
  doc_df_elem_ret_hidden = get_leaderboard_table(
403
  _doc_df_ret_hidden, datastore.doc_types, visible=False
404
  )
405
-
406
- version.change(
407
- update_doc_version,
408
- version,
409
- [domains, langs, models_ret, doc_df_elem_ret, doc_df_elem_ret_hidden],
410
- )
411
 
412
  set_listeners(
413
  TaskType.long_doc,
@@ -437,10 +501,7 @@ with demo:
437
  queue=True,
438
  )
439
  with gr.TabItem("Reranking Only", id=22):
440
- _doc_df_rerank = datastore.doc_fmt_df[
441
- datastore.doc_fmt_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
442
- ]
443
- _doc_df_rerank = reset_rank(_doc_df_rerank)
444
  doc_rerank_models = (
445
  _doc_df_rerank[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
446
  )
@@ -450,19 +511,14 @@ with demo:
450
  with gr.Column(scale=1):
451
  doc_search_bar_rerank = gr.Textbox(show_label=False, visible=False)
452
  doc_df_elem_rerank = get_leaderboard_table(_doc_df_rerank, datastore.doc_types)
453
- _doc_df_rerank_hidden = datastore.doc_raw_df[
454
- datastore.doc_raw_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
455
- ]
456
- _doc_df_rerank_hidden = reset_rank(_doc_df_rerank_hidden)
457
  doc_df_elem_rerank_hidden = get_leaderboard_table(
458
  _doc_df_rerank_hidden, datastore.doc_types, visible=False
459
  )
460
 
461
- version.change(
462
- update_doc_version,
463
- version,
464
- [domains, langs, doc_models_rerank, doc_df_elem_rerank, doc_df_elem_rerank_hidden],
465
- )
466
 
467
  set_listeners(
468
  TaskType.long_doc,
 
108
  )
109
 
110
 
111
+ def update_datastore(version):
112
  global datastore
113
  global ds_dict
114
+ if datastore.version != version:
115
+ print(f"updated data version: {datastore.version} -> {version}")
116
+ datastore = ds_dict[version]
117
+ else:
118
+ print(f"current data version: {datastore.version}")
119
+ return datastore
120
+
121
+
122
+ def update_qa_domains(version):
123
+ datastore = update_datastore(version)
124
  domain_elem = get_domain_dropdown(QABenchmarks[datastore.slug])
125
+ return domain_elem
 
 
 
 
126
 
127
 
128
+ def update_doc_domains(version):
129
+ datastore = update_datastore(version)
 
 
130
  domain_elem = get_domain_dropdown(LongDocBenchmarks[datastore.slug])
131
+ return domain_elem
132
+
133
+
134
+ def update_qa_langs(version):
135
+ datastore = update_datastore(version)
136
+ lang_elem = get_language_dropdown(QABenchmarks[datastore.slug])
137
+ return lang_elem
138
+
139
+
140
+ def update_doc_langs(version):
141
+ datastore = update_datastore(version)
142
  lang_elem = get_language_dropdown(LongDocBenchmarks[datastore.slug])
143
+ return lang_elem
144
+
145
+
146
+ def update_qa_models(version):
147
+ datastore = update_datastore(version)
148
  model_elem = get_reranking_dropdown(datastore.reranking_models)
149
+ return model_elem
150
+
151
+
152
+ def update_qa_df_ret_rerank(version):
153
+ datastore = update_datastore(version)
154
+ return get_leaderboard_table(datastore.qa_fmt_df, datastore.qa_types)
155
+
156
+
157
+ def update_qa_hidden_df_ret_rerank(version):
158
+ datastore = update_datastore(version)
159
+ return get_leaderboard_table(datastore.qa_raw_df, datastore.qa_types, visible=False)
160
+
161
+
162
+ def update_doc_df_ret_rerank(version):
163
+ datastore = update_datastore(version)
164
+ return get_leaderboard_table(datastore.doc_fmt_df, datastore.doc_types)
165
+
166
+
167
+ def update_doc_hidden_df_ret_rerank(version):
168
+ datastore = update_datastore(version)
169
+ return get_leaderboard_table(datastore.doc_raw_df, datastore.doc_types, visible=False)
170
+
171
+
172
+ def filter_df_ret(df):
173
+ df_ret = df[df[COL_NAME_RERANKING_MODEL] == "NoReranker"]
174
+ df_ret = reset_rank(df_ret)
175
+ return df_ret
176
+
177
+
178
+ def update_qa_df_ret(version):
179
+ datastore = update_datastore(version)
180
+ df_ret = filter_df_ret(datastore.qa_fmt_df)
181
+ return get_leaderboard_table(df_ret, datastore.qa_types)
182
+
183
+
184
+ def update_qa_hidden_df_ret(version):
185
+ datastore = update_datastore(version)
186
+ df_ret_hidden = filter_df_ret(datastore.qa_raw_df)
187
+ return get_leaderboard_table(df_ret_hidden, datastore.qa_types, visible=False)
188
+
189
+
190
+ def update_doc_df_ret(version):
191
+ datastore = update_datastore(version)
192
+ df_ret = filter_df_ret(datastore.doc_fmt_df)
193
+ return get_leaderboard_table(df_ret, datastore.doc_types)
194
+
195
+
196
+ def update_doc_hidden_df_ret(version):
197
+ datastore = update_datastore(version)
198
+ df_ret_hidden = filter_df_ret(datastore.doc_raw_df)
199
+ return get_leaderboard_table(df_ret_hidden, datastore.doc_types, visible=False)
200
+
201
+
202
+ def filter_df_rerank(df):
203
+ df_rerank = df[df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK]
204
+ df_rerank = reset_rank(df_rerank)
205
+ return df_rerank
206
+
207
+
208
+ def update_qa_df_rerank(version):
209
+ datastore = update_datastore(version)
210
+ df_rerank = filter_df_rerank(datastore.qa_fmt_df)
211
+ return get_leaderboard_table(df_rerank, datastore.qa_types)
212
+
213
+
214
+ def update_qa_hidden_df_rerank(version):
215
+ datastore = update_datastore(version)
216
+ df_rerank_hidden = filter_df_rerank(datastore.qa_raw_df)
217
+ return get_leaderboard_table(df_rerank_hidden, datastore.qa_types, visible=False)
218
+
219
+
220
+ def update_doc_df_rerank(version):
221
+ datastore = update_datastore(version)
222
+ df_rerank = filter_df_rerank(datastore.doc_fmt_df)
223
+ return get_leaderboard_table(df_rerank, datastore.doc_types)
224
+
225
+
226
+ def update_doc_hidden_df_rerank(version):
227
+ datastore = update_datastore(version)
228
+ df_rerank_hidden = filter_df_rerank(datastore.doc_raw_df)
229
+ return get_leaderboard_table(df_rerank_hidden, datastore.doc_types, visible=False)
230
 
231
 
232
  demo = gr.Blocks(css=custom_css)
 
248
  # select domain
249
  with gr.Row():
250
  domains = get_domain_dropdown(QABenchmarks[datastore.slug])
251
+ version.change(update_qa_domains, version, domains)
252
  # select language
253
  with gr.Row():
254
  langs = get_language_dropdown(QABenchmarks[datastore.slug])
255
+ version.change(update_qa_langs, version, langs)
256
  with gr.Column():
257
  # select the metric
258
  metric = get_metric_dropdown(METRIC_LIST, DEFAULT_METRIC_QA)
 
269
  # select reranking models
270
  with gr.Column():
271
  models = get_reranking_dropdown(datastore.reranking_models)
272
+ version.change(update_qa_models, version, models)
273
  # shown_table
274
  qa_df_elem_ret_rerank = get_leaderboard_table(datastore.qa_fmt_df, datastore.qa_types)
275
+ version.change(update_qa_df_ret_rerank, version, qa_df_elem_ret_rerank)
276
  # Dummy leaderboard for handling the case when the user uses backspace key
277
  qa_df_elem_ret_rerank_hidden = get_leaderboard_table(
278
  datastore.qa_raw_df, datastore.qa_types, visible=False
279
  )
280
+ version.change(update_qa_hidden_df_ret_rerank, version, qa_df_elem_ret_rerank_hidden)
 
 
 
 
 
281
 
282
  set_listeners(
283
  TaskType.qa,
 
306
  search_bar_ret = get_search_bar()
307
  with gr.Column(scale=1):
308
  models_ret = get_noreranking_dropdown()
309
+ version.change(update_qa_models, version, models_ret)
310
+ _qa_df_ret = filter_df_ret(datastore.qa_fmt_df)
 
311
  qa_df_elem_ret = get_leaderboard_table(_qa_df_ret, datastore.qa_types)
312
+ version.change(update_qa_df_ret, version, qa_df_elem_ret)
313
 
314
  # Dummy leaderboard for handling the case when the user uses backspace key
315
+ _qa_df_ret_hidden = filter_df_ret(datastore.qa_raw_df)
 
 
 
316
  qa_df_elem_ret_hidden = get_leaderboard_table(
317
  _qa_df_ret_hidden, datastore.qa_types, visible=False
318
  )
319
+ version.change(update_qa_hidden_df_ret, version, qa_df_elem_ret_hidden)
 
 
 
 
 
 
 
 
 
 
 
320
 
321
  set_listeners(
322
  TaskType.qa,
 
347
  )
348
 
349
  with gr.TabItem("Reranking Only", id=12):
350
+ _qa_df_rerank = filter_df_rerank(datastore.qa_fmt_df)
 
351
  qa_rerank_models = _qa_df_rerank[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
352
  with gr.Row():
353
  with gr.Column(scale=1):
354
  qa_models_rerank = get_reranking_dropdown(qa_rerank_models)
355
+ version.change(update_qa_models, version, qa_models_rerank)
356
  with gr.Column(scale=1):
357
  qa_search_bar_rerank = gr.Textbox(show_label=False, visible=False)
358
  qa_df_elem_rerank = get_leaderboard_table(_qa_df_rerank, datastore.qa_types)
359
+ version.change(update_qa_df_rerank, version, qa_df_elem_rerank)
360
 
361
+ _qa_df_rerank_hidden = filter_df_rerank(datastore.qa_raw_df)
 
 
 
362
  qa_df_elem_rerank_hidden = get_leaderboard_table(
363
  _qa_df_rerank_hidden, datastore.qa_types, visible=False
364
  )
365
+ version.change(update_qa_hidden_df_rerank, version, qa_df_elem_rerank_hidden)
 
 
 
 
 
366
 
367
  set_listeners(
368
  TaskType.qa,
 
397
  # select domain
398
  with gr.Row():
399
  domains = get_domain_dropdown(LongDocBenchmarks[datastore.slug])
400
+ version.change(update_doc_domains, version, domains)
401
  # select language
402
  with gr.Row():
403
  langs = get_language_dropdown(LongDocBenchmarks[datastore.slug])
404
+ version.change(update_doc_langs, version, langs)
405
  with gr.Column():
406
  # select the metric
407
  with gr.Row():
 
417
  search_bar = get_search_bar()
418
  with gr.Column():
419
  models = get_reranking_dropdown(datastore.reranking_models)
420
+ version.change(update_qa_models, version, models)
421
 
422
  doc_df_elem_ret_rerank = get_leaderboard_table(datastore.doc_fmt_df, datastore.doc_types)
423
 
424
+ version.change(update_doc_df_ret_rerank, version, doc_df_elem_ret_rerank)
425
+
426
  doc_df_elem_ret_rerank_hidden = get_leaderboard_table(
427
  datastore.doc_raw_df, datastore.doc_types, visible=False
428
  )
429
 
430
+ version.change(update_doc_hidden_df_ret_rerank, version, doc_df_elem_ret_rerank_hidden)
 
 
 
 
431
 
432
  set_listeners(
433
  TaskType.long_doc,
 
463
  search_bar_ret = get_search_bar()
464
  with gr.Column(scale=1):
465
  models_ret = get_noreranking_dropdown()
466
+ _doc_df_ret = filter_df_ret(datastore.doc_fmt_df)
 
 
 
 
467
  doc_df_elem_ret = get_leaderboard_table(_doc_df_ret, datastore.doc_types)
468
+ version.change(update_doc_df_ret, version, doc_df_elem_ret)
469
 
470
+ _doc_df_ret_hidden = filter_df_ret(datastore.doc_raw_df)
 
 
 
471
  doc_df_elem_ret_hidden = get_leaderboard_table(
472
  _doc_df_ret_hidden, datastore.doc_types, visible=False
473
  )
474
+ version.change(update_doc_hidden_df_ret, version, doc_df_elem_ret_hidden)
 
 
 
 
 
475
 
476
  set_listeners(
477
  TaskType.long_doc,
 
501
  queue=True,
502
  )
503
  with gr.TabItem("Reranking Only", id=22):
504
+ _doc_df_rerank = filter_df_rerank(datastore.doc_fmt_df)
 
 
 
505
  doc_rerank_models = (
506
  _doc_df_rerank[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
507
  )
 
511
  with gr.Column(scale=1):
512
  doc_search_bar_rerank = gr.Textbox(show_label=False, visible=False)
513
  doc_df_elem_rerank = get_leaderboard_table(_doc_df_rerank, datastore.doc_types)
514
+ version.change(update_doc_df_rerank, version, doc_df_elem_rerank)
515
+
516
+ _doc_df_rerank_hidden = filter_df_rerank(datastore.doc_raw_df)
 
517
  doc_df_elem_rerank_hidden = get_leaderboard_table(
518
  _doc_df_rerank_hidden, datastore.doc_types, visible=False
519
  )
520
 
521
+ version.change(update_doc_hidden_df_rerank, version, doc_df_elem_rerank_hidden)
 
 
 
 
522
 
523
  set_listeners(
524
  TaskType.long_doc,