binwang commited on
Commit
aa729a4
·
verified ·
1 Parent(s): 61dd7eb

Upload organize_model_results.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. organize_model_results.json +112 -108
organize_model_results.json CHANGED
@@ -16,6 +16,118 @@
16
  "MERaLiON-AudioLLM-Whisper-SEA-LION": 88.77146631439894
17
  }
18
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  "slue_p2_sqa5_test": {
20
  "llama3_70b_judge": {
21
  "gpt-4o-audio": 89.41176470588235,
@@ -1003,114 +1115,6 @@
1003
  "cascade_whisper_large_v3_llama_3_8b_instruct": 5.987143868370054
1004
  }
1005
  },
1006
- "mmau_mini": {
1007
- "string_match": {
1008
- "Qwen-Audio-Chat": 38.5,
1009
- "MERaLiON-AudioLLM-Whisper-SEA-LION": 60.5,
1010
- "Qwen2-Audio-7B-Instruct": 44.4,
1011
- "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 46.7,
1012
- "gemini-1.5-flash": 31.4,
1013
- "phi_4_multimodal_instruct": 54.50000000000001,
1014
- "seallms_audio_7b": 51.5,
1015
- "SALMONN_7B": 40.5,
1016
- "cascade_whisper_large_v3_llama_3_8b_instruct": 51.0
1017
- },
1018
- "llama3_70b_judge": {
1019
- "Qwen-Audio-Chat": 53.6,
1020
- "MERaLiON-AudioLLM-Whisper-SEA-LION": 64.60000000000001,
1021
- "Qwen2-Audio-7B-Instruct": 58.9,
1022
- "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 52.7,
1023
- "gemini-1.5-flash": 58.199999999999996,
1024
- "phi_4_multimodal_instruct": 59.4,
1025
- "seallms_audio_7b": 60.199999999999996,
1026
- "SALMONN_7B": 48.4,
1027
- "cascade_whisper_large_v3_llama_3_8b_instruct": 55.60000000000001
1028
- },
1029
- "gpt4o_judge": {
1030
- "MERaLiON-AudioLLM-Whisper-SEA-LION": 63.9
1031
- }
1032
- },
1033
- "mmau_mini_music": {
1034
- "string_match": {
1035
- "Qwen-Audio-Chat": 0.4311377245508982,
1036
- "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6077844311377245,
1037
- "Qwen2-Audio-7B-Instruct": 0.45808383233532934,
1038
- "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.39520958083832336,
1039
- "gemini-1.5-flash": 0.2904191616766467,
1040
- "phi_4_multimodal_instruct": 0.6377245508982036,
1041
- "seallms_audio_7b": 0.6047904191616766,
1042
- "SALMONN_7B": 0.4820359281437126,
1043
- "cascade_whisper_large_v3_llama_3_8b_instruct": 0.5
1044
- },
1045
- "llama3_70b_judge": {
1046
- "Qwen-Audio-Chat": 0.5958083832335329,
1047
- "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6437125748502994,
1048
- "Qwen2-Audio-7B-Instruct": 0.6017964071856288,
1049
- "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.44011976047904194,
1050
- "gemini-1.5-flash": 0.5868263473053892,
1051
- "phi_4_multimodal_instruct": 0.688622754491018,
1052
- "seallms_audio_7b": 0.6646706586826348,
1053
- "SALMONN_7B": 0.5598802395209581,
1054
- "cascade_whisper_large_v3_llama_3_8b_instruct": 0.5359281437125748
1055
- },
1056
- "gpt4o_judge": {
1057
- "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6347305389221557
1058
- }
1059
- },
1060
- "mmau_mini_sound": {
1061
- "string_match": {
1062
- "Qwen-Audio-Chat": 0.43543543543543545,
1063
- "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6606606606606606,
1064
- "Qwen2-Audio-7B-Instruct": 0.4744744744744745,
1065
- "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.45045045045045046,
1066
- "gemini-1.5-flash": 0.3483483483483483,
1067
- "phi_4_multimodal_instruct": 0.5975975975975976,
1068
- "seallms_audio_7b": 0.5165165165165165,
1069
- "SALMONN_7B": 0.4594594594594595,
1070
- "cascade_whisper_large_v3_llama_3_8b_instruct": 0.46546546546546547
1071
- },
1072
- "llama3_70b_judge": {
1073
- "Qwen-Audio-Chat": 0.5945945945945946,
1074
- "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.7027027027027027,
1075
- "Qwen2-Audio-7B-Instruct": 0.6306306306306306,
1076
- "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.5345345345345346,
1077
- "gemini-1.5-flash": 0.5885885885885885,
1078
- "phi_4_multimodal_instruct": 0.6456456456456456,
1079
- "seallms_audio_7b": 0.6486486486486487,
1080
- "SALMONN_7B": 0.5105105105105106,
1081
- "cascade_whisper_large_v3_llama_3_8b_instruct": 0.5105105105105106
1082
- },
1083
- "gpt4o_judge": {
1084
- "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6996996996996997
1085
- }
1086
- },
1087
- "mmau_mini_speech": {
1088
- "string_match": {
1089
- "Qwen-Audio-Chat": 0.2882882882882883,
1090
- "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.5465465465465466,
1091
- "Qwen2-Audio-7B-Instruct": 0.3993993993993994,
1092
- "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.5555555555555556,
1093
- "gemini-1.5-flash": 0.3033033033033033,
1094
- "phi_4_multimodal_instruct": 0.3993993993993994,
1095
- "seallms_audio_7b": 0.42342342342342343,
1096
- "SALMONN_7B": 0.2732732732732733,
1097
- "cascade_whisper_large_v3_llama_3_8b_instruct": 0.5645645645645646
1098
- },
1099
- "llama3_70b_judge": {
1100
- "Qwen-Audio-Chat": 0.4174174174174174,
1101
- "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.5915915915915916,
1102
- "Qwen2-Audio-7B-Instruct": 0.5345345345345346,
1103
- "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.6066066066066066,
1104
- "gemini-1.5-flash": 0.5705705705705706,
1105
- "phi_4_multimodal_instruct": 0.44744744744744747,
1106
- "seallms_audio_7b": 0.4924924924924925,
1107
- "SALMONN_7B": 0.3813813813813814,
1108
- "cascade_whisper_large_v3_llama_3_8b_instruct": 0.6216216216216216
1109
- },
1110
- "gpt4o_judge": {
1111
- "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.5825825825825826
1112
- }
1113
- },
1114
  "imda_part5_30s_sqa_test": {
1115
  "llama3_70b_judge": {
1116
  "Qwen-Audio-Chat": 61.260000000000005,
 
16
  "MERaLiON-AudioLLM-Whisper-SEA-LION": 88.77146631439894
17
  }
18
  },
19
+ "mmau_mini": {
20
+ "string_match": {
21
+ "gpt-4o-audio": 0.0,
22
+ "Qwen-Audio-Chat": 38.5,
23
+ "MERaLiON-AudioLLM-Whisper-SEA-LION": 60.5,
24
+ "Qwen2-Audio-7B-Instruct": 44.4,
25
+ "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 46.7,
26
+ "gemini-1.5-flash": 31.4,
27
+ "phi_4_multimodal_instruct": 54.50000000000001,
28
+ "seallms_audio_7b": 51.5,
29
+ "SALMONN_7B": 40.5,
30
+ "cascade_whisper_large_v3_llama_3_8b_instruct": 51.0
31
+ },
32
+ "llama3_70b_judge": {
33
+ "Qwen-Audio-Chat": 53.6,
34
+ "MERaLiON-AudioLLM-Whisper-SEA-LION": 64.60000000000001,
35
+ "Qwen2-Audio-7B-Instruct": 58.9,
36
+ "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 52.7,
37
+ "gemini-1.5-flash": 58.199999999999996,
38
+ "phi_4_multimodal_instruct": 59.4,
39
+ "seallms_audio_7b": 60.199999999999996,
40
+ "SALMONN_7B": 48.4,
41
+ "cascade_whisper_large_v3_llama_3_8b_instruct": 55.60000000000001
42
+ },
43
+ "gpt4o_judge": {
44
+ "MERaLiON-AudioLLM-Whisper-SEA-LION": 63.9
45
+ }
46
+ },
47
+ "mmau_mini_music": {
48
+ "string_match": {
49
+ "gpt-4o-audio": 0.0,
50
+ "Qwen-Audio-Chat": 0.4311377245508982,
51
+ "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6077844311377245,
52
+ "Qwen2-Audio-7B-Instruct": 0.45808383233532934,
53
+ "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.39520958083832336,
54
+ "gemini-1.5-flash": 0.2904191616766467,
55
+ "phi_4_multimodal_instruct": 0.6377245508982036,
56
+ "seallms_audio_7b": 0.6047904191616766,
57
+ "SALMONN_7B": 0.4820359281437126,
58
+ "cascade_whisper_large_v3_llama_3_8b_instruct": 0.5
59
+ },
60
+ "llama3_70b_judge": {
61
+ "Qwen-Audio-Chat": 0.5958083832335329,
62
+ "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6437125748502994,
63
+ "Qwen2-Audio-7B-Instruct": 0.6017964071856288,
64
+ "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.44011976047904194,
65
+ "gemini-1.5-flash": 0.5868263473053892,
66
+ "phi_4_multimodal_instruct": 0.688622754491018,
67
+ "seallms_audio_7b": 0.6646706586826348,
68
+ "SALMONN_7B": 0.5598802395209581,
69
+ "cascade_whisper_large_v3_llama_3_8b_instruct": 0.5359281437125748
70
+ },
71
+ "gpt4o_judge": {
72
+ "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6347305389221557
73
+ }
74
+ },
75
+ "mmau_mini_sound": {
76
+ "string_match": {
77
+ "gpt-4o-audio": 0.0,
78
+ "Qwen-Audio-Chat": 0.43543543543543545,
79
+ "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6606606606606606,
80
+ "Qwen2-Audio-7B-Instruct": 0.4744744744744745,
81
+ "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.45045045045045046,
82
+ "gemini-1.5-flash": 0.3483483483483483,
83
+ "phi_4_multimodal_instruct": 0.5975975975975976,
84
+ "seallms_audio_7b": 0.5165165165165165,
85
+ "SALMONN_7B": 0.4594594594594595,
86
+ "cascade_whisper_large_v3_llama_3_8b_instruct": 0.46546546546546547
87
+ },
88
+ "llama3_70b_judge": {
89
+ "Qwen-Audio-Chat": 0.5945945945945946,
90
+ "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.7027027027027027,
91
+ "Qwen2-Audio-7B-Instruct": 0.6306306306306306,
92
+ "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.5345345345345346,
93
+ "gemini-1.5-flash": 0.5885885885885885,
94
+ "phi_4_multimodal_instruct": 0.6456456456456456,
95
+ "seallms_audio_7b": 0.6486486486486487,
96
+ "SALMONN_7B": 0.5105105105105106,
97
+ "cascade_whisper_large_v3_llama_3_8b_instruct": 0.5105105105105106
98
+ },
99
+ "gpt4o_judge": {
100
+ "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6996996996996997
101
+ }
102
+ },
103
+ "mmau_mini_speech": {
104
+ "string_match": {
105
+ "gpt-4o-audio": 0.0,
106
+ "Qwen-Audio-Chat": 0.2882882882882883,
107
+ "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.5465465465465466,
108
+ "Qwen2-Audio-7B-Instruct": 0.3993993993993994,
109
+ "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.5555555555555556,
110
+ "gemini-1.5-flash": 0.3033033033033033,
111
+ "phi_4_multimodal_instruct": 0.3993993993993994,
112
+ "seallms_audio_7b": 0.42342342342342343,
113
+ "SALMONN_7B": 0.2732732732732733,
114
+ "cascade_whisper_large_v3_llama_3_8b_instruct": 0.5645645645645646
115
+ },
116
+ "llama3_70b_judge": {
117
+ "Qwen-Audio-Chat": 0.4174174174174174,
118
+ "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.5915915915915916,
119
+ "Qwen2-Audio-7B-Instruct": 0.5345345345345346,
120
+ "cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.6066066066066066,
121
+ "gemini-1.5-flash": 0.5705705705705706,
122
+ "phi_4_multimodal_instruct": 0.44744744744744747,
123
+ "seallms_audio_7b": 0.4924924924924925,
124
+ "SALMONN_7B": 0.3813813813813814,
125
+ "cascade_whisper_large_v3_llama_3_8b_instruct": 0.6216216216216216
126
+ },
127
+ "gpt4o_judge": {
128
+ "MERaLiON-AudioLLM-Whisper-SEA-LION": 0.5825825825825826
129
+ }
130
+ },
131
  "slue_p2_sqa5_test": {
132
  "llama3_70b_judge": {
133
  "gpt-4o-audio": 89.41176470588235,
 
1115
  "cascade_whisper_large_v3_llama_3_8b_instruct": 5.987143868370054
1116
  }
1117
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1118
  "imda_part5_30s_sqa_test": {
1119
  "llama3_70b_judge": {
1120
  "Qwen-Audio-Chat": 61.260000000000005,