AudioBench-Leaderboard-Extend

Running

binwang commited on Mar 14

Commit

7f996b6

verified ·

1 Parent(s): c89a583

Upload folder using huggingface_hub

Files changed (2) hide show

app/content.py CHANGED Viewed

@@ -53,6 +53,10 @@ displayname2datasetname = {
     'MNSC-PART6-SDS'       : 'imda_part6_30s_ds_human_test',
     'SEAME-Dev-Man'        : 'seame_dev_man',
     'SEAME-Dev-Sge'        : 'seame_dev_sge',
     'CNA'             : 'cna_test',
     'IDPC'            : 'idpc_test',
@@ -130,6 +134,11 @@ dataset_diaplay_information = {
     'SEAME-Dev-Man'        : 'SEAME dataset, English-Mandarin Code-swithcing',
     'SEAME-Dev-Sge'        : 'SEAME dataset, English-Mandarin Code-swithcing',
     'CNA'             : 'Under Development',
     'IDPC'            : 'Under Development',
     'Parliament'      : 'Under Development',

     'MNSC-PART6-SDS'       : 'imda_part6_30s_ds_human_test',
     'SEAME-Dev-Man'        : 'seame_dev_man',
     'SEAME-Dev-Sge'        : 'seame_dev_sge',
+    'MMAU-mini'            : 'mmau_mini',
+    'MMAU-mini-music'      : 'mmau_mini_music',
+    'MMAU-mini-sound'      : 'mmau_mini_sound',
+    'MMAU-mini-speech'     : 'mmau_mini_speech',
     'CNA'             : 'cna_test',
     'IDPC'            : 'idpc_test',
     'SEAME-Dev-Man'        : 'SEAME dataset, English-Mandarin Code-swithcing',
     'SEAME-Dev-Sge'        : 'SEAME dataset, English-Mandarin Code-swithcing',
+    'MMAU-mini'            : 'MMAU Dataset, Mini version, MMAU: A Massive Multi-Task Audio Understanding and Reasoning Benchmark',
+    'MMAU-mini-music'      : 'MMAU Dataset, Mini version, MMAU: A Massive Multi-Task Audio Understanding and Reasoning Benchmark',
+    'MMAU-mini-sound'      : 'MMAU Dataset, Mini version, MMAU: A Massive Multi-Task Audio Understanding and Reasoning Benchmark',
+    'MMAU-mini-speech'     : 'MMAU Dataset, Mini version, MMAU: A Massive Multi-Task Audio Understanding and Reasoning Benchmark',
     'CNA'             : 'Under Development',
     'IDPC'            : 'Under Development',
     'Parliament'      : 'Under Development',

app/pages.py CHANGED Viewed

@@ -55,7 +55,7 @@ def dashboard():
                 - AudioBench is a comprehensive evaluation benchmark designed for general instruction-following audio large language models.
                 - AudioBench is an evaluation benchmark that we continually improve and maintain.
-                Below are the initial 26 datasets that are included in AudioBench. We are now exteneded to over 40 datasets and going to extend to more in the future.
                 """
                 )
@@ -65,9 +65,9 @@ def dashboard():
         st.markdown("###### :dart: Our Benchmark includes: ")
         cols = st.columns(8)
-        cols[0].metric(label="Tasks", value=">8")
-        cols[1].metric(label="Datasets", value=">40")
-        cols[2].metric(label="Evaluated Models", value=">5")
     st.divider()
     with st.container():
@@ -575,4 +575,22 @@ def under_development():
 def mmau_evaluation():
     st.title("Task: MMAU-Audio Understanding")

                 - AudioBench is a comprehensive evaluation benchmark designed for general instruction-following audio large language models.
                 - AudioBench is an evaluation benchmark that we continually improve and maintain.
+                Below are the initial 26 datasets that are included in AudioBench. We are now exteneded to over 50 datasets and going to extend to more in the future.
                 """
                 )
         st.markdown("###### :dart: Our Benchmark includes: ")
         cols = st.columns(8)
+        cols[0].metric(label="Tasks", value=">10")
+        cols[1].metric(label="Datasets", value=">50")
+        cols[2].metric(label="Evaluated Models", value=">10")
     st.divider()
     with st.container():
 def mmau_evaluation():
     st.title("Task: MMAU-Audio Understanding")
+    dataset_list = [
+        'MMAU-mini',
+        'MMAU-mini-music',
+        'MMAU-mini-sound',
+        'MMAU-mini-speech',
+        ]
+    filters_1_list = dataset_list
+    space1, space2, _, _ = st.columns([0.4, 0.4, 0.2 ,0.2])
+    with space1:
+        tab_section = st.selectbox('Dataset', filters_1_list)
+    with space2:
+        metric = st.selectbox('Metric', ['LLAMA3_70B_JUDGE'])
+        metric = metric.lower()
+    if tab_section:
+        dataset_contents(dataset_diaplay_information[tab_section], metrics_info[metric])
+        draw_table(tab_section, metric)