binwang commited on
Commit
7f996b6
·
verified ·
1 Parent(s): c89a583

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app/content.py +9 -0
  2. app/pages.py +22 -4
app/content.py CHANGED
@@ -53,6 +53,10 @@ displayname2datasetname = {
53
  'MNSC-PART6-SDS' : 'imda_part6_30s_ds_human_test',
54
  'SEAME-Dev-Man' : 'seame_dev_man',
55
  'SEAME-Dev-Sge' : 'seame_dev_sge',
 
 
 
 
56
 
57
  'CNA' : 'cna_test',
58
  'IDPC' : 'idpc_test',
@@ -130,6 +134,11 @@ dataset_diaplay_information = {
130
  'SEAME-Dev-Man' : 'SEAME dataset, English-Mandarin Code-swithcing',
131
  'SEAME-Dev-Sge' : 'SEAME dataset, English-Mandarin Code-swithcing',
132
 
 
 
 
 
 
133
  'CNA' : 'Under Development',
134
  'IDPC' : 'Under Development',
135
  'Parliament' : 'Under Development',
 
53
  'MNSC-PART6-SDS' : 'imda_part6_30s_ds_human_test',
54
  'SEAME-Dev-Man' : 'seame_dev_man',
55
  'SEAME-Dev-Sge' : 'seame_dev_sge',
56
+ 'MMAU-mini' : 'mmau_mini',
57
+ 'MMAU-mini-music' : 'mmau_mini_music',
58
+ 'MMAU-mini-sound' : 'mmau_mini_sound',
59
+ 'MMAU-mini-speech' : 'mmau_mini_speech',
60
 
61
  'CNA' : 'cna_test',
62
  'IDPC' : 'idpc_test',
 
134
  'SEAME-Dev-Man' : 'SEAME dataset, English-Mandarin Code-swithcing',
135
  'SEAME-Dev-Sge' : 'SEAME dataset, English-Mandarin Code-swithcing',
136
 
137
+ 'MMAU-mini' : 'MMAU Dataset, Mini version, MMAU: A Massive Multi-Task Audio Understanding and Reasoning Benchmark',
138
+ 'MMAU-mini-music' : 'MMAU Dataset, Mini version, MMAU: A Massive Multi-Task Audio Understanding and Reasoning Benchmark',
139
+ 'MMAU-mini-sound' : 'MMAU Dataset, Mini version, MMAU: A Massive Multi-Task Audio Understanding and Reasoning Benchmark',
140
+ 'MMAU-mini-speech' : 'MMAU Dataset, Mini version, MMAU: A Massive Multi-Task Audio Understanding and Reasoning Benchmark',
141
+
142
  'CNA' : 'Under Development',
143
  'IDPC' : 'Under Development',
144
  'Parliament' : 'Under Development',
app/pages.py CHANGED
@@ -55,7 +55,7 @@ def dashboard():
55
  - AudioBench is a comprehensive evaluation benchmark designed for general instruction-following audio large language models.
56
  - AudioBench is an evaluation benchmark that we continually improve and maintain.
57
 
58
- Below are the initial 26 datasets that are included in AudioBench. We are now exteneded to over 40 datasets and going to extend to more in the future.
59
  """
60
  )
61
 
@@ -65,9 +65,9 @@ def dashboard():
65
 
66
  st.markdown("###### :dart: Our Benchmark includes: ")
67
  cols = st.columns(8)
68
- cols[0].metric(label="Tasks", value=">8")
69
- cols[1].metric(label="Datasets", value=">40")
70
- cols[2].metric(label="Evaluated Models", value=">5")
71
 
72
  st.divider()
73
  with st.container():
@@ -575,4 +575,22 @@ def under_development():
575
  def mmau_evaluation():
576
  st.title("Task: MMAU-Audio Understanding")
577
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
578
 
 
 
 
 
55
  - AudioBench is a comprehensive evaluation benchmark designed for general instruction-following audio large language models.
56
  - AudioBench is an evaluation benchmark that we continually improve and maintain.
57
 
58
+ Below are the initial 26 datasets that are included in AudioBench. We are now exteneded to over 50 datasets and going to extend to more in the future.
59
  """
60
  )
61
 
 
65
 
66
  st.markdown("###### :dart: Our Benchmark includes: ")
67
  cols = st.columns(8)
68
+ cols[0].metric(label="Tasks", value=">10")
69
+ cols[1].metric(label="Datasets", value=">50")
70
+ cols[2].metric(label="Evaluated Models", value=">10")
71
 
72
  st.divider()
73
  with st.container():
 
575
  def mmau_evaluation():
576
  st.title("Task: MMAU-Audio Understanding")
577
 
578
+ dataset_list = [
579
+ 'MMAU-mini',
580
+ 'MMAU-mini-music',
581
+ 'MMAU-mini-sound',
582
+ 'MMAU-mini-speech',
583
+ ]
584
+ filters_1_list = dataset_list
585
+
586
+ space1, space2, _, _ = st.columns([0.4, 0.4, 0.2 ,0.2])
587
+
588
+ with space1:
589
+ tab_section = st.selectbox('Dataset', filters_1_list)
590
+ with space2:
591
+ metric = st.selectbox('Metric', ['LLAMA3_70B_JUDGE'])
592
+ metric = metric.lower()
593
 
594
+ if tab_section:
595
+ dataset_contents(dataset_diaplay_information[tab_section], metrics_info[metric])
596
+ draw_table(tab_section, metric)