Upload folder using huggingface_hub
Browse files- app/content.py +9 -0
- app/pages.py +22 -4
app/content.py
CHANGED
@@ -53,6 +53,10 @@ displayname2datasetname = {
|
|
53 |
'MNSC-PART6-SDS' : 'imda_part6_30s_ds_human_test',
|
54 |
'SEAME-Dev-Man' : 'seame_dev_man',
|
55 |
'SEAME-Dev-Sge' : 'seame_dev_sge',
|
|
|
|
|
|
|
|
|
56 |
|
57 |
'CNA' : 'cna_test',
|
58 |
'IDPC' : 'idpc_test',
|
@@ -130,6 +134,11 @@ dataset_diaplay_information = {
|
|
130 |
'SEAME-Dev-Man' : 'SEAME dataset, English-Mandarin Code-swithcing',
|
131 |
'SEAME-Dev-Sge' : 'SEAME dataset, English-Mandarin Code-swithcing',
|
132 |
|
|
|
|
|
|
|
|
|
|
|
133 |
'CNA' : 'Under Development',
|
134 |
'IDPC' : 'Under Development',
|
135 |
'Parliament' : 'Under Development',
|
|
|
53 |
'MNSC-PART6-SDS' : 'imda_part6_30s_ds_human_test',
|
54 |
'SEAME-Dev-Man' : 'seame_dev_man',
|
55 |
'SEAME-Dev-Sge' : 'seame_dev_sge',
|
56 |
+
'MMAU-mini' : 'mmau_mini',
|
57 |
+
'MMAU-mini-music' : 'mmau_mini_music',
|
58 |
+
'MMAU-mini-sound' : 'mmau_mini_sound',
|
59 |
+
'MMAU-mini-speech' : 'mmau_mini_speech',
|
60 |
|
61 |
'CNA' : 'cna_test',
|
62 |
'IDPC' : 'idpc_test',
|
|
|
134 |
'SEAME-Dev-Man' : 'SEAME dataset, English-Mandarin Code-swithcing',
|
135 |
'SEAME-Dev-Sge' : 'SEAME dataset, English-Mandarin Code-swithcing',
|
136 |
|
137 |
+
'MMAU-mini' : 'MMAU Dataset, Mini version, MMAU: A Massive Multi-Task Audio Understanding and Reasoning Benchmark',
|
138 |
+
'MMAU-mini-music' : 'MMAU Dataset, Mini version, MMAU: A Massive Multi-Task Audio Understanding and Reasoning Benchmark',
|
139 |
+
'MMAU-mini-sound' : 'MMAU Dataset, Mini version, MMAU: A Massive Multi-Task Audio Understanding and Reasoning Benchmark',
|
140 |
+
'MMAU-mini-speech' : 'MMAU Dataset, Mini version, MMAU: A Massive Multi-Task Audio Understanding and Reasoning Benchmark',
|
141 |
+
|
142 |
'CNA' : 'Under Development',
|
143 |
'IDPC' : 'Under Development',
|
144 |
'Parliament' : 'Under Development',
|
app/pages.py
CHANGED
@@ -55,7 +55,7 @@ def dashboard():
|
|
55 |
- AudioBench is a comprehensive evaluation benchmark designed for general instruction-following audio large language models.
|
56 |
- AudioBench is an evaluation benchmark that we continually improve and maintain.
|
57 |
|
58 |
-
Below are the initial 26 datasets that are included in AudioBench. We are now exteneded to over
|
59 |
"""
|
60 |
)
|
61 |
|
@@ -65,9 +65,9 @@ def dashboard():
|
|
65 |
|
66 |
st.markdown("###### :dart: Our Benchmark includes: ")
|
67 |
cols = st.columns(8)
|
68 |
-
cols[0].metric(label="Tasks", value=">
|
69 |
-
cols[1].metric(label="Datasets", value=">
|
70 |
-
cols[2].metric(label="Evaluated Models", value=">
|
71 |
|
72 |
st.divider()
|
73 |
with st.container():
|
@@ -575,4 +575,22 @@ def under_development():
|
|
575 |
def mmau_evaluation():
|
576 |
st.title("Task: MMAU-Audio Understanding")
|
577 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
578 |
|
|
|
|
|
|
|
|
55 |
- AudioBench is a comprehensive evaluation benchmark designed for general instruction-following audio large language models.
|
56 |
- AudioBench is an evaluation benchmark that we continually improve and maintain.
|
57 |
|
58 |
+
Below are the initial 26 datasets that are included in AudioBench. We are now exteneded to over 50 datasets and going to extend to more in the future.
|
59 |
"""
|
60 |
)
|
61 |
|
|
|
65 |
|
66 |
st.markdown("###### :dart: Our Benchmark includes: ")
|
67 |
cols = st.columns(8)
|
68 |
+
cols[0].metric(label="Tasks", value=">10")
|
69 |
+
cols[1].metric(label="Datasets", value=">50")
|
70 |
+
cols[2].metric(label="Evaluated Models", value=">10")
|
71 |
|
72 |
st.divider()
|
73 |
with st.container():
|
|
|
575 |
def mmau_evaluation():
|
576 |
st.title("Task: MMAU-Audio Understanding")
|
577 |
|
578 |
+
dataset_list = [
|
579 |
+
'MMAU-mini',
|
580 |
+
'MMAU-mini-music',
|
581 |
+
'MMAU-mini-sound',
|
582 |
+
'MMAU-mini-speech',
|
583 |
+
]
|
584 |
+
filters_1_list = dataset_list
|
585 |
+
|
586 |
+
space1, space2, _, _ = st.columns([0.4, 0.4, 0.2 ,0.2])
|
587 |
+
|
588 |
+
with space1:
|
589 |
+
tab_section = st.selectbox('Dataset', filters_1_list)
|
590 |
+
with space2:
|
591 |
+
metric = st.selectbox('Metric', ['LLAMA3_70B_JUDGE'])
|
592 |
+
metric = metric.lower()
|
593 |
|
594 |
+
if tab_section:
|
595 |
+
dataset_contents(dataset_diaplay_information[tab_section], metrics_info[metric])
|
596 |
+
draw_table(tab_section, metric)
|