binwang commited on
Commit
2bd4813
·
verified ·
1 Parent(s): 7d5f69e

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app/pages.py +11 -11
app/pages.py CHANGED
@@ -282,7 +282,7 @@ def speech_question_answering_singlish():
282
  with space1:
283
  tab_section = st.selectbox('Dataset', filters_1_list)
284
  with space2:
285
- metric = st.selectbox('Metric', ['LLAMA3_70B_JUDGE'])
286
  metric = metric.lower()
287
 
288
  if tab_section:
@@ -310,7 +310,7 @@ def spoken_dialogue_summarization_singlish():
310
  with space1:
311
  tab_section = st.selectbox('Dataset', filters_1_list)
312
  with space2:
313
- metric = st.selectbox('Metric', ['LLAMA3_70B_JUDGE'])
314
  metric = metric.lower()
315
 
316
  if tab_section:
@@ -336,7 +336,7 @@ def speech_instruction():
336
  with space1:
337
  tab_section = st.selectbox('Dataset', filters_1_list)
338
  with space2:
339
- metric = st.selectbox('Metric', ['LLAMA3_70B_JUDGE'])
340
  metric = metric.lower()
341
 
342
  if tab_section:
@@ -360,7 +360,7 @@ def audio_captioning():
360
  with space1:
361
  tab_section = st.selectbox('Dataset', dataset_list)
362
  with space2:
363
- metric = st.selectbox('Metric', ['LLAMA3_70B_JUDGE', 'METEOR'])
364
  metric = metric.lower()
365
 
366
  if tab_section:
@@ -383,7 +383,7 @@ def audio_scene_question_answering():
383
  with space1:
384
  tab_section = st.selectbox('Dataset', filters_1_list)
385
  with space2:
386
- metric = st.selectbox('Metric', ['LLAMA3_70B_JUDGE'])
387
  metric = metric.lower()
388
 
389
  if tab_section:
@@ -413,7 +413,7 @@ def accent_recognition():
413
  with space1:
414
  tab_section = st.selectbox('Dataset', filters_1_list)
415
  with space2:
416
- metric = st.selectbox('Metric', ['LLAMA3_70B_JUDGE'])
417
  metric = metric.lower()
418
 
419
  if tab_section:
@@ -440,7 +440,7 @@ def gender_recognition():
440
  with space1:
441
  tab_section = st.selectbox('Dataset', filters_1_list)
442
  with space2:
443
- metric = st.selectbox('Metric', ['LLAMA3_70B_JUDGE'])
444
  metric = metric.lower()
445
 
446
  if tab_section:
@@ -470,7 +470,7 @@ def emotion_recognition():
470
  with space1:
471
  tab_section = st.selectbox('Dataset', filters_1_list)
472
  with space2:
473
- metric = st.selectbox('Metric', ['LLAMA3_70B_JUDGE'])
474
  metric = metric.lower()
475
 
476
  if tab_section:
@@ -498,7 +498,7 @@ def music_understanding():
498
  with space1:
499
  tab_section = st.selectbox('Dataset', filters_1_list)
500
  with space2:
501
- metric = st.selectbox('Metric', ['LLAMA3_70B_JUDGE'])
502
  metric = metric.lower()
503
 
504
  if tab_section:
@@ -561,7 +561,7 @@ def under_development():
561
  'YTB-SDS-Batch1',
562
  'YTB-PQA-Batch1',
563
  ]:
564
- metric = st.selectbox('Metric', ['LLAMA3_70B_JUDGE'])
565
  metric = metric.lower()
566
  else:
567
  raise ValueError('Invalid dataset')
@@ -588,7 +588,7 @@ def mmau_evaluation():
588
  with space1:
589
  tab_section = st.selectbox('Dataset', filters_1_list)
590
  with space2:
591
- metric = st.selectbox('Metric', ['LLAMA3_70B_JUDGE', 'STRING_MATCH'])
592
  metric = metric.lower()
593
 
594
  if tab_section:
 
282
  with space1:
283
  tab_section = st.selectbox('Dataset', filters_1_list)
284
  with space2:
285
+ metric = st.selectbox('Metric', ['LLAMA3_70B_JUDGE', 'GPT4O_JUDGE'])
286
  metric = metric.lower()
287
 
288
  if tab_section:
 
310
  with space1:
311
  tab_section = st.selectbox('Dataset', filters_1_list)
312
  with space2:
313
+ metric = st.selectbox('Metric', ['LLAMA3_70B_JUDGE', 'GPT4O_JUDGE'])
314
  metric = metric.lower()
315
 
316
  if tab_section:
 
336
  with space1:
337
  tab_section = st.selectbox('Dataset', filters_1_list)
338
  with space2:
339
+ metric = st.selectbox('Metric', ['LLAMA3_70B_JUDGE', 'GPT4O_JUDGE'])
340
  metric = metric.lower()
341
 
342
  if tab_section:
 
360
  with space1:
361
  tab_section = st.selectbox('Dataset', dataset_list)
362
  with space2:
363
+ metric = st.selectbox('Metric', ['LLAMA3_70B_JUDGE', 'GPT4O_JUDGE', 'METEOR'])
364
  metric = metric.lower()
365
 
366
  if tab_section:
 
383
  with space1:
384
  tab_section = st.selectbox('Dataset', filters_1_list)
385
  with space2:
386
+ metric = st.selectbox('Metric', ['LLAMA3_70B_JUDGE', 'GPT4O_JUDGE'])
387
  metric = metric.lower()
388
 
389
  if tab_section:
 
413
  with space1:
414
  tab_section = st.selectbox('Dataset', filters_1_list)
415
  with space2:
416
+ metric = st.selectbox('Metric', ['LLAMA3_70B_JUDGE', 'GPT4O_JUDGE'])
417
  metric = metric.lower()
418
 
419
  if tab_section:
 
440
  with space1:
441
  tab_section = st.selectbox('Dataset', filters_1_list)
442
  with space2:
443
+ metric = st.selectbox('Metric', ['LLAMA3_70B_JUDGE', 'GPT4O_JUDGE'])
444
  metric = metric.lower()
445
 
446
  if tab_section:
 
470
  with space1:
471
  tab_section = st.selectbox('Dataset', filters_1_list)
472
  with space2:
473
+ metric = st.selectbox('Metric', ['LLAMA3_70B_JUDGE', 'GPT4O_JUDGE'])
474
  metric = metric.lower()
475
 
476
  if tab_section:
 
498
  with space1:
499
  tab_section = st.selectbox('Dataset', filters_1_list)
500
  with space2:
501
+ metric = st.selectbox('Metric', ['LLAMA3_70B_JUDGE', 'GPT4O_JUDGE'])
502
  metric = metric.lower()
503
 
504
  if tab_section:
 
561
  'YTB-SDS-Batch1',
562
  'YTB-PQA-Batch1',
563
  ]:
564
+ metric = st.selectbox('Metric', ['LLAMA3_70B_JUDGE', 'GPT4O_JUDGE'])
565
  metric = metric.lower()
566
  else:
567
  raise ValueError('Invalid dataset')
 
588
  with space1:
589
  tab_section = st.selectbox('Dataset', filters_1_list)
590
  with space2:
591
+ metric = st.selectbox('Metric', ['LLAMA3_70B_JUDGE', 'STRING_MATCH', 'GPT4O_JUDGE'])
592
  metric = metric.lower()
593
 
594
  if tab_section: