Upload organize_model_results.json with huggingface_hub
Browse files- organize_model_results.json +76 -0
organize_model_results.json
CHANGED
@@ -6,11 +6,13 @@
|
|
6 |
"Qwen2-Audio-7B-Instruct": 29.187525646286417,
|
7 |
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 24.640951990151827,
|
8 |
"phi_4_multimodal_instruct": 26.815757078375054,
|
|
|
9 |
"WavLLM_fairseq": 39.96717275338531,
|
10 |
"SALMONN_7B": 34.222404595814524,
|
11 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 39.32704144439885
|
12 |
},
|
13 |
"gpt4o_judge": {
|
|
|
14 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 39.462453836684446
|
15 |
}
|
16 |
},
|
@@ -51,6 +53,7 @@
|
|
51 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 3.445086705202312
|
52 |
},
|
53 |
"gpt4o_judge": {
|
|
|
54 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 4.61271676300578
|
55 |
}
|
56 |
},
|
@@ -81,6 +84,7 @@
|
|
81 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 64.0
|
82 |
},
|
83 |
"gpt4o_judge": {
|
|
|
84 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 67.0
|
85 |
}
|
86 |
},
|
@@ -119,6 +123,9 @@
|
|
119 |
"WavLLM_fairseq": 44.3133951137321,
|
120 |
"SALMONN_7B": 50.88458298230834,
|
121 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 56.44481887110362
|
|
|
|
|
|
|
122 |
}
|
123 |
},
|
124 |
"imda_30s_sqa_human_test": {
|
@@ -228,6 +235,7 @@
|
|
228 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 63.0
|
229 |
},
|
230 |
"gpt4o_judge": {
|
|
|
231 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 75.0
|
232 |
}
|
233 |
},
|
@@ -241,6 +249,9 @@
|
|
241 |
"WavLLM_fairseq": 59.76095617529881,
|
242 |
"SALMONN_7B": 23.804780876494025,
|
243 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 46.713147410358566
|
|
|
|
|
|
|
244 |
}
|
245 |
},
|
246 |
"public_sg_speech_qa_test": {
|
@@ -256,6 +267,7 @@
|
|
256 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 64.94186046511628
|
257 |
},
|
258 |
"gpt4o_judge": {
|
|
|
259 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 73.02325581395348
|
260 |
}
|
261 |
},
|
@@ -292,11 +304,13 @@
|
|
292 |
"Qwen2-Audio-7B-Instruct": 64.86264249672958,
|
293 |
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 88.61894972902262,
|
294 |
"phi_4_multimodal_instruct": 77.58549803774996,
|
|
|
295 |
"WavLLM_fairseq": 77.64903756307233,
|
296 |
"SALMONN_7B": 66.39506634273968,
|
297 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 83.81984675761541
|
298 |
},
|
299 |
"gpt4o_judge": {
|
|
|
300 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 90.12521024107643
|
301 |
}
|
302 |
},
|
@@ -326,6 +340,9 @@
|
|
326 |
"WavLLM_fairseq": 51.072796934865906,
|
327 |
"SALMONN_7B": 41.7624521072797,
|
328 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 45.593869731800766
|
|
|
|
|
|
|
329 |
}
|
330 |
},
|
331 |
"imda_part4_30s_sqa_human_test": {
|
@@ -341,6 +358,7 @@
|
|
341 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 53.8
|
342 |
},
|
343 |
"gpt4o_judge": {
|
|
|
344 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 61.4
|
345 |
}
|
346 |
},
|
@@ -354,6 +372,9 @@
|
|
354 |
"WavLLM_fairseq": 69.61427985227739,
|
355 |
"SALMONN_7B": 88.79770209273697,
|
356 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 42.921624948707425
|
|
|
|
|
|
|
357 |
}
|
358 |
},
|
359 |
"imda_gr_dialogue": {
|
@@ -367,6 +388,9 @@
|
|
367 |
"WavLLM_fairseq": 46.766666666666666,
|
368 |
"SALMONN_7B": 42.733333333333334,
|
369 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 25.433333333333337
|
|
|
|
|
|
|
370 |
}
|
371 |
},
|
372 |
"imda_30s_ds_human_test": {
|
@@ -521,6 +545,7 @@
|
|
521 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 37.400000000000006
|
522 |
},
|
523 |
"gpt4o_judge": {
|
|
|
524 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 47.400000000000006
|
525 |
}
|
526 |
},
|
@@ -535,6 +560,9 @@
|
|
535 |
"WavLLM_fairseq": 0.23333333333333336,
|
536 |
"SALMONN_7B": 0.06666666666666667,
|
537 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 9.666666666666666
|
|
|
|
|
|
|
538 |
}
|
539 |
},
|
540 |
"iemocap_gender_test": {
|
@@ -548,6 +576,9 @@
|
|
548 |
"WavLLM_fairseq": 51.932270916334666,
|
549 |
"SALMONN_7B": 81.31474103585658,
|
550 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 44.22310756972111
|
|
|
|
|
|
|
551 |
}
|
552 |
},
|
553 |
"ytb_asr_batch2": {
|
@@ -586,6 +617,9 @@
|
|
586 |
"WavLLM_fairseq": 66.31439894319684,
|
587 |
"SALMONN_7B": 50.99075297225891,
|
588 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 85.2928225451343
|
|
|
|
|
|
|
589 |
}
|
590 |
},
|
591 |
"dream_tts_mcq_test": {
|
@@ -599,6 +633,9 @@
|
|
599 |
"WavLLM_fairseq": 66.5446941975954,
|
600 |
"SALMONN_7B": 56.455828541557764,
|
601 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 86.4610559330894
|
|
|
|
|
|
|
602 |
}
|
603 |
},
|
604 |
"imda_part5_30s_ds_human_test": {
|
@@ -614,6 +651,7 @@
|
|
614 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 49.0
|
615 |
},
|
616 |
"gpt4o_judge": {
|
|
|
617 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 56.8
|
618 |
}
|
619 |
},
|
@@ -644,6 +682,7 @@
|
|
644 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 49.0
|
645 |
},
|
646 |
"gpt4o_judge": {
|
|
|
647 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 52.800000000000004
|
648 |
}
|
649 |
},
|
@@ -660,6 +699,7 @@
|
|
660 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 36.0
|
661 |
},
|
662 |
"gpt4o_judge": {
|
|
|
663 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 48.2
|
664 |
}
|
665 |
},
|
@@ -682,6 +722,7 @@
|
|
682 |
"Qwen2-Audio-7B-Instruct": 40.77727272727273,
|
683 |
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 3.0954545454545457,
|
684 |
"phi_4_multimodal_instruct": 26.386363636363637,
|
|
|
685 |
"WavLLM_fairseq": 5.5,
|
686 |
"SALMONN_7B": 37.445454545454545,
|
687 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 2.4727272727272727
|
@@ -698,6 +739,7 @@
|
|
698 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.07953048457785493
|
699 |
},
|
700 |
"gpt4o_judge": {
|
|
|
701 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 4.868181818181818
|
702 |
}
|
703 |
},
|
@@ -712,6 +754,9 @@
|
|
712 |
"WavLLM_fairseq": 2.6833333333333336,
|
713 |
"SALMONN_7B": 2.5166666666666666,
|
714 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 12.416666666666666
|
|
|
|
|
|
|
715 |
}
|
716 |
},
|
717 |
"imda_part6_30s_sqa_test": {
|
@@ -748,6 +793,7 @@
|
|
748 |
"whisper_large_v3": 1.600581653970121,
|
749 |
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 27.620150160643625,
|
750 |
"phi_4_multimodal_instruct": 15.012558278964478,
|
|
|
751 |
"WavLLM_fairseq": 13.841886973016162,
|
752 |
"SALMONN_7B": 14.102682915273142,
|
753 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 10.930203684508578
|
@@ -766,6 +812,7 @@
|
|
766 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 29.47134606841404
|
767 |
},
|
768 |
"gpt4o_judge": {
|
|
|
769 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 28.076410484229232
|
770 |
}
|
771 |
},
|
@@ -809,6 +856,7 @@
|
|
809 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 70.8
|
810 |
},
|
811 |
"gpt4o_judge": {
|
|
|
812 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 77.8
|
813 |
}
|
814 |
},
|
@@ -848,6 +896,7 @@
|
|
848 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 17.380191693290733
|
849 |
},
|
850 |
"gpt4o_judge": {
|
|
|
851 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 14.63258785942492
|
852 |
}
|
853 |
},
|
@@ -886,6 +935,7 @@
|
|
886 |
"whisper_large_v3": 0.02107778621423822,
|
887 |
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 8.433062902024755,
|
888 |
"phi_4_multimodal_instruct": 0.19835914151649442,
|
|
|
889 |
"WavLLM_fairseq": 0.0033159224040994286,
|
890 |
"SALMONN_7B": 0.00046745670226766583,
|
891 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 1.0368044741318085
|
@@ -904,6 +954,7 @@
|
|
904 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 16.710526315789473
|
905 |
},
|
906 |
"gpt4o_judge": {
|
|
|
907 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 14.736842105263158
|
908 |
}
|
909 |
},
|
@@ -927,6 +978,7 @@
|
|
927 |
"whisper_large_v3": 0.16408986541757878,
|
928 |
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 35.274306071307024,
|
929 |
"phi_4_multimodal_instruct": 45.295964957544776,
|
|
|
930 |
"WavLLM_fairseq": 31.96381187282953,
|
931 |
"SALMONN_7B": 33.88941292215531,
|
932 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 5.987143868370054
|
@@ -1051,6 +1103,9 @@
|
|
1051 |
"WavLLM_fairseq": 49.06666666666666,
|
1052 |
"SALMONN_7B": 59.766666666666666,
|
1053 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 36.016666666666666
|
|
|
|
|
|
|
1054 |
}
|
1055 |
},
|
1056 |
"slue_p2_sqa5_test": {
|
@@ -1066,6 +1121,7 @@
|
|
1066 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 82.99019607843137
|
1067 |
},
|
1068 |
"gpt4o_judge": {
|
|
|
1069 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 87.79411764705883
|
1070 |
}
|
1071 |
},
|
@@ -1094,6 +1150,9 @@
|
|
1094 |
"WavLLM_fairseq": 41.57088122605364,
|
1095 |
"SALMONN_7B": 30.536398467432953,
|
1096 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 36.81992337164751
|
|
|
|
|
|
|
1097 |
}
|
1098 |
},
|
1099 |
"tedlium3_test": {
|
@@ -1150,6 +1209,7 @@
|
|
1150 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 57.800000000000004
|
1151 |
},
|
1152 |
"gpt4o_judge": {
|
|
|
1153 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 64.80000000000001
|
1154 |
}
|
1155 |
},
|
@@ -1166,6 +1226,7 @@
|
|
1166 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 57.199999999999996
|
1167 |
},
|
1168 |
"gpt4o_judge": {
|
|
|
1169 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 64.4
|
1170 |
}
|
1171 |
},
|
@@ -1218,6 +1279,21 @@
|
|
1218 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 63.68000000000001
|
1219 |
}
|
1220 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1221 |
"imda_30s_ar_test": {
|
1222 |
"llama3_70b_judge": {
|
1223 |
"Qwen2-Audio-7B-Instruct": 5.106666666666667,
|
|
|
6 |
"Qwen2-Audio-7B-Instruct": 29.187525646286417,
|
7 |
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 24.640951990151827,
|
8 |
"phi_4_multimodal_instruct": 26.815757078375054,
|
9 |
+
"seallms_audio_7b": 8.658186294624539,
|
10 |
"WavLLM_fairseq": 39.96717275338531,
|
11 |
"SALMONN_7B": 34.222404595814524,
|
12 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 39.32704144439885
|
13 |
},
|
14 |
"gpt4o_judge": {
|
15 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 14.813295034878948,
|
16 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 39.462453836684446
|
17 |
}
|
18 |
},
|
|
|
53 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 3.445086705202312
|
54 |
},
|
55 |
"gpt4o_judge": {
|
56 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 31.641618497109825,
|
57 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 4.61271676300578
|
58 |
}
|
59 |
},
|
|
|
84 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 64.0
|
85 |
},
|
86 |
"gpt4o_judge": {
|
87 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 71.6,
|
88 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 67.0
|
89 |
}
|
90 |
},
|
|
|
123 |
"WavLLM_fairseq": 44.3133951137321,
|
124 |
"SALMONN_7B": 50.88458298230834,
|
125 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 56.44481887110362
|
126 |
+
},
|
127 |
+
"gpt4o_judge": {
|
128 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 57.87700084245998
|
129 |
}
|
130 |
},
|
131 |
"imda_30s_sqa_human_test": {
|
|
|
235 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 63.0
|
236 |
},
|
237 |
"gpt4o_judge": {
|
238 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 74.2,
|
239 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 75.0
|
240 |
}
|
241 |
},
|
|
|
249 |
"WavLLM_fairseq": 59.76095617529881,
|
250 |
"SALMONN_7B": 23.804780876494025,
|
251 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 46.713147410358566
|
252 |
+
},
|
253 |
+
"gpt4o_judge": {
|
254 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 37.45019920318725
|
255 |
}
|
256 |
},
|
257 |
"public_sg_speech_qa_test": {
|
|
|
267 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 64.94186046511628
|
268 |
},
|
269 |
"gpt4o_judge": {
|
270 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 64.18604651162791,
|
271 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 73.02325581395348
|
272 |
}
|
273 |
},
|
|
|
304 |
"Qwen2-Audio-7B-Instruct": 64.86264249672958,
|
305 |
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 88.61894972902262,
|
306 |
"phi_4_multimodal_instruct": 77.58549803774996,
|
307 |
+
"seallms_audio_7b": 67.73313399364605,
|
308 |
"WavLLM_fairseq": 77.64903756307233,
|
309 |
"SALMONN_7B": 66.39506634273968,
|
310 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 83.81984675761541
|
311 |
},
|
312 |
"gpt4o_judge": {
|
313 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 74.99159035694262,
|
314 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 90.12521024107643
|
315 |
}
|
316 |
},
|
|
|
340 |
"WavLLM_fairseq": 51.072796934865906,
|
341 |
"SALMONN_7B": 41.7624521072797,
|
342 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 45.593869731800766
|
343 |
+
},
|
344 |
+
"gpt4o_judge": {
|
345 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 47.356321839080465
|
346 |
}
|
347 |
},
|
348 |
"imda_part4_30s_sqa_human_test": {
|
|
|
358 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 53.8
|
359 |
},
|
360 |
"gpt4o_judge": {
|
361 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 60.0,
|
362 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 61.4
|
363 |
}
|
364 |
},
|
|
|
372 |
"WavLLM_fairseq": 69.61427985227739,
|
373 |
"SALMONN_7B": 88.79770209273697,
|
374 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 42.921624948707425
|
375 |
+
},
|
376 |
+
"gpt4o_judge": {
|
377 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 99.46655724251129
|
378 |
}
|
379 |
},
|
380 |
"imda_gr_dialogue": {
|
|
|
388 |
"WavLLM_fairseq": 46.766666666666666,
|
389 |
"SALMONN_7B": 42.733333333333334,
|
390 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 25.433333333333337
|
391 |
+
},
|
392 |
+
"gpt4o_judge": {
|
393 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 93.86666666666666
|
394 |
}
|
395 |
},
|
396 |
"imda_30s_ds_human_test": {
|
|
|
545 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 37.400000000000006
|
546 |
},
|
547 |
"gpt4o_judge": {
|
548 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 59.2,
|
549 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 47.400000000000006
|
550 |
}
|
551 |
},
|
|
|
560 |
"WavLLM_fairseq": 0.23333333333333336,
|
561 |
"SALMONN_7B": 0.06666666666666667,
|
562 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 9.666666666666666
|
563 |
+
},
|
564 |
+
"gpt4o_judge": {
|
565 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 77.13333333333333
|
566 |
}
|
567 |
},
|
568 |
"iemocap_gender_test": {
|
|
|
576 |
"WavLLM_fairseq": 51.932270916334666,
|
577 |
"SALMONN_7B": 81.31474103585658,
|
578 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 44.22310756972111
|
579 |
+
},
|
580 |
+
"gpt4o_judge": {
|
581 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 98.20717131474103
|
582 |
}
|
583 |
},
|
584 |
"ytb_asr_batch2": {
|
|
|
617 |
"WavLLM_fairseq": 66.31439894319684,
|
618 |
"SALMONN_7B": 50.99075297225891,
|
619 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 85.2928225451343
|
620 |
+
},
|
621 |
+
"gpt4o_judge": {
|
622 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 88.77146631439894
|
623 |
}
|
624 |
},
|
625 |
"dream_tts_mcq_test": {
|
|
|
633 |
"WavLLM_fairseq": 66.5446941975954,
|
634 |
"SALMONN_7B": 56.455828541557764,
|
635 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 86.4610559330894
|
636 |
+
},
|
637 |
+
"gpt4o_judge": {
|
638 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 84.31782540512285
|
639 |
}
|
640 |
},
|
641 |
"imda_part5_30s_ds_human_test": {
|
|
|
651 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 49.0
|
652 |
},
|
653 |
"gpt4o_judge": {
|
654 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 64.0,
|
655 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 56.8
|
656 |
}
|
657 |
},
|
|
|
682 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 49.0
|
683 |
},
|
684 |
"gpt4o_judge": {
|
685 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 60.599999999999994,
|
686 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 52.800000000000004
|
687 |
}
|
688 |
},
|
|
|
699 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 36.0
|
700 |
},
|
701 |
"gpt4o_judge": {
|
702 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 55.199999999999996,
|
703 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 48.2
|
704 |
}
|
705 |
},
|
|
|
722 |
"Qwen2-Audio-7B-Instruct": 40.77727272727273,
|
723 |
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 3.0954545454545457,
|
724 |
"phi_4_multimodal_instruct": 26.386363636363637,
|
725 |
+
"seallms_audio_7b": 53.20909090909091,
|
726 |
"WavLLM_fairseq": 5.5,
|
727 |
"SALMONN_7B": 37.445454545454545,
|
728 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 2.4727272727272727
|
|
|
739 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.07953048457785493
|
740 |
},
|
741 |
"gpt4o_judge": {
|
742 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 39.29545454545455,
|
743 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 4.868181818181818
|
744 |
}
|
745 |
},
|
|
|
754 |
"WavLLM_fairseq": 2.6833333333333336,
|
755 |
"SALMONN_7B": 2.5166666666666666,
|
756 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 12.416666666666666
|
757 |
+
},
|
758 |
+
"gpt4o_judge": {
|
759 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 10.116666666666667
|
760 |
}
|
761 |
},
|
762 |
"imda_part6_30s_sqa_test": {
|
|
|
793 |
"whisper_large_v3": 1.600581653970121,
|
794 |
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 27.620150160643625,
|
795 |
"phi_4_multimodal_instruct": 15.012558278964478,
|
796 |
+
"seallms_audio_7b": 27.583542512329426,
|
797 |
"WavLLM_fairseq": 13.841886973016162,
|
798 |
"SALMONN_7B": 14.102682915273142,
|
799 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 10.930203684508578
|
|
|
812 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 29.47134606841404
|
813 |
},
|
814 |
"gpt4o_judge": {
|
815 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 64.09333981526495,
|
816 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 28.076410484229232
|
817 |
}
|
818 |
},
|
|
|
856 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 70.8
|
857 |
},
|
858 |
"gpt4o_judge": {
|
859 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 78.60000000000001,
|
860 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 77.8
|
861 |
}
|
862 |
},
|
|
|
896 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 17.380191693290733
|
897 |
},
|
898 |
"gpt4o_judge": {
|
899 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 50.60702875399361,
|
900 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 14.63258785942492
|
901 |
}
|
902 |
},
|
|
|
935 |
"whisper_large_v3": 0.02107778621423822,
|
936 |
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 8.433062902024755,
|
937 |
"phi_4_multimodal_instruct": 0.19835914151649442,
|
938 |
+
"seallms_audio_7b": 0.012334972259958572,
|
939 |
"WavLLM_fairseq": 0.0033159224040994286,
|
940 |
"SALMONN_7B": 0.00046745670226766583,
|
941 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 1.0368044741318085
|
|
|
954 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 16.710526315789473
|
955 |
},
|
956 |
"gpt4o_judge": {
|
957 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 46.31578947368421,
|
958 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 14.736842105263158
|
959 |
}
|
960 |
},
|
|
|
978 |
"whisper_large_v3": 0.16408986541757878,
|
979 |
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 35.274306071307024,
|
980 |
"phi_4_multimodal_instruct": 45.295964957544776,
|
981 |
+
"seallms_audio_7b": 36.4496678966979,
|
982 |
"WavLLM_fairseq": 31.96381187282953,
|
983 |
"SALMONN_7B": 33.88941292215531,
|
984 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 5.987143868370054
|
|
|
1103 |
"WavLLM_fairseq": 49.06666666666666,
|
1104 |
"SALMONN_7B": 59.766666666666666,
|
1105 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 36.016666666666666
|
1106 |
+
},
|
1107 |
+
"gpt4o_judge": {
|
1108 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 66.9
|
1109 |
}
|
1110 |
},
|
1111 |
"slue_p2_sqa5_test": {
|
|
|
1121 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 82.99019607843137
|
1122 |
},
|
1123 |
"gpt4o_judge": {
|
1124 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 88.23529411764707,
|
1125 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 87.79411764705883
|
1126 |
}
|
1127 |
},
|
|
|
1150 |
"WavLLM_fairseq": 41.57088122605364,
|
1151 |
"SALMONN_7B": 30.536398467432953,
|
1152 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 36.81992337164751
|
1153 |
+
},
|
1154 |
+
"gpt4o_judge": {
|
1155 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 36.206896551724135
|
1156 |
}
|
1157 |
},
|
1158 |
"tedlium3_test": {
|
|
|
1209 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 57.800000000000004
|
1210 |
},
|
1211 |
"gpt4o_judge": {
|
1212 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 66.8,
|
1213 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 64.80000000000001
|
1214 |
}
|
1215 |
},
|
|
|
1226 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 57.199999999999996
|
1227 |
},
|
1228 |
"gpt4o_judge": {
|
1229 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 70.0,
|
1230 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 64.4
|
1231 |
}
|
1232 |
},
|
|
|
1279 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 63.68000000000001
|
1280 |
}
|
1281 |
},
|
1282 |
+
"gigaspeech2_viet": {
|
1283 |
+
"wer": {
|
1284 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.4960741822016732
|
1285 |
+
}
|
1286 |
+
},
|
1287 |
+
"gigaspeech2_thai": {
|
1288 |
+
"wer": {
|
1289 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.8901628256099774
|
1290 |
+
}
|
1291 |
+
},
|
1292 |
+
"gigaspeech2_indo": {
|
1293 |
+
"wer": {
|
1294 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.5087211232500294
|
1295 |
+
}
|
1296 |
+
},
|
1297 |
"imda_30s_ar_test": {
|
1298 |
"llama3_70b_judge": {
|
1299 |
"Qwen2-Audio-7B-Instruct": 5.106666666666667,
|