abrek commited on
Commit
bdbe179
·
verified ·
1 Parent(s): 403ba24

Add Qwen3-32B results

Browse files
Files changed (1) hide show
  1. results/zero-shot/Qwen3-32B.json +201 -0
results/zero-shot/Qwen3-32B.json ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "model": "Qwen/Qwen3-32B",
4
+ "architecture": "Qwen3ForCausalLM",
5
+ "dtype": "bfloat16",
6
+ "type": "instruction-tuned",
7
+ "num_parameters": "32b",
8
+ "api": "hf"
9
+ },
10
+ "results": [
11
+ {
12
+ "name": "xquad_tr",
13
+ "task": "extractive_question_answering",
14
+ "exact_match": 0.07394957983193277,
15
+ "f1": 0.18168641044790357
16
+ },
17
+ {
18
+ "name": "xcopa_tr",
19
+ "task": "multiple_choice",
20
+ "acc": 0.71,
21
+ "acc_norm": 0.71
22
+ },
23
+ {
24
+ "name": "turkishmmlu",
25
+ "task": "multiple_choice",
26
+ "acc": 0.13555555555555557,
27
+ "acc_norm": 0.13555555555555557
28
+ },
29
+ {
30
+ "name": "turkish_plu",
31
+ "task": "multiple_choice",
32
+ "acc": 0.53888,
33
+ "acc_norm": 0.43808
34
+ },
35
+ {
36
+ "name": "turkish_plu_goal_inference",
37
+ "task": "multiple_choice",
38
+ "acc": 0.47192353643966545,
39
+ "acc_norm": 0.2986857825567503
40
+ },
41
+ {
42
+ "name": "turkish_plu_next_event_prediction",
43
+ "task": "multiple_choice",
44
+ "acc": 0.5526717557251909,
45
+ "acc_norm": 0.29770992366412213
46
+ },
47
+ {
48
+ "name": "turkish_plu_step_inference",
49
+ "task": "multiple_choice",
50
+ "acc": 0.4035947712418301,
51
+ "acc_norm": 0.39869281045751637
52
+ },
53
+ {
54
+ "name": "turkish_plu_step_ordering",
55
+ "task": "multiple_choice",
56
+ "acc": 0.6660137120470128,
57
+ "acc_norm": 0.6660137120470128
58
+ },
59
+ {
60
+ "name": "turkce_atasozleri",
61
+ "task": "multiple_choice",
62
+ "acc": 0.8682080924855491,
63
+ "acc_norm": 0.8682080924855491
64
+ },
65
+ {
66
+ "name": "check_worthiness",
67
+ "task": "multiple_choice",
68
+ "acc": 0.5594149908592322,
69
+ "acc_norm": 0.6238574040219378
70
+ },
71
+ {
72
+ "name": "relevance_judgment",
73
+ "task": "multiple_choice",
74
+ "acc": 0.600091407678245,
75
+ "acc_norm": 0.5781535648994516
76
+ },
77
+ {
78
+ "name": "tquad",
79
+ "task": "extractive_question_answering",
80
+ "exact_match": 0.10089686098654709,
81
+ "f1": 0.2031672244380758
82
+ },
83
+ {
84
+ "name": "sts_tr",
85
+ "task": "text_classification",
86
+ "acc": 0.19289340101522842,
87
+ "acc_norm": 0.11820159535895576
88
+ },
89
+ {
90
+ "name": "offenseval_tr",
91
+ "task": "text_classification",
92
+ "acc": 0.23469387755102042,
93
+ "acc_norm": 0.7970521541950113
94
+ },
95
+ {
96
+ "name": "mnli_tr",
97
+ "task": "natural_language_inference",
98
+ "acc": 0.3214,
99
+ "acc_norm": 0.3213
100
+ },
101
+ {
102
+ "name": "snli_tr",
103
+ "task": "natural_language_inference",
104
+ "acc": 0.334,
105
+ "acc_norm": 0.3234
106
+ },
107
+ {
108
+ "name": "xnli_tr",
109
+ "task": "natural_language_inference",
110
+ "acc": 0.3267465069860279,
111
+ "acc_norm": 0.3333333333333333
112
+ },
113
+ {
114
+ "name": "news_cat",
115
+ "task": "text_classification",
116
+ "acc": 0.532,
117
+ "acc_norm": 0.324
118
+ },
119
+ {
120
+ "name": "ironytr",
121
+ "task": "text_classification",
122
+ "acc": 0.515,
123
+ "acc_norm": 0.5
124
+ },
125
+ {
126
+ "name": "exams_tr",
127
+ "task": "multiple_choice",
128
+ "acc": 0.356234096692112,
129
+ "acc_norm": 0.33587786259541985
130
+ },
131
+ {
132
+ "name": "circumflex_tr",
133
+ "task": "multiple_choice",
134
+ "acc": 0.5857142857142857,
135
+ "acc_norm": 0.5857142857142857
136
+ },
137
+ {
138
+ "name": "bilmecebench",
139
+ "task": "multiple_choice",
140
+ "acc": 0.6131221719457014,
141
+ "acc_norm": 0.6131221719457014
142
+ },
143
+ {
144
+ "name": "belebele_tr",
145
+ "task": "multiple_choice",
146
+ "acc": 0.8644444444444445,
147
+ "acc_norm": 0.8644444444444445
148
+ },
149
+ {
150
+ "name": "turkishmmlu",
151
+ "task": "multiple_choice",
152
+ "acc": 0.6677777777777778,
153
+ "acc_norm": 0.6677777777777778
154
+ },
155
+ {
156
+ "name": "xlsum_tr",
157
+ "task": "summarization",
158
+ "rouge1": 0.30984540466217936,
159
+ "rouge2": 0.1526819990882598,
160
+ "rougeL": 0.24056293296185868
161
+ },
162
+ {
163
+ "name": "wmt-tr-en-prompt",
164
+ "task": "machine_translation",
165
+ "wer": 1.9097642345703865,
166
+ "bleu": 0.07525786863429962
167
+ },
168
+ {
169
+ "name": "wiki_lingua_tr",
170
+ "task": "summarization",
171
+ "rouge1": 0.19307064669127558,
172
+ "rouge2": 0.07085141067185688,
173
+ "rougeL": 0.14979216116861716
174
+ },
175
+ {
176
+ "name": "tr-wikihow-summ",
177
+ "task": "summarization",
178
+ "rouge1": 0.19704935938448176,
179
+ "rouge2": 0.06326750543141127,
180
+ "rougeL": 0.1459271843295593
181
+ },
182
+ {
183
+ "name": "mlsum_tr",
184
+ "task": "summarization",
185
+ "rouge1": 0.4022274311113533,
186
+ "rouge2": 0.255063284434877,
187
+ "rougeL": 0.33089850060045467
188
+ },
189
+ {
190
+ "name": "gecturk_generation",
191
+ "task": "grammatical_error_correction",
192
+ "exact_match": 0.2694400308151572
193
+ },
194
+ {
195
+ "name": "mkqa_tr",
196
+ "task": "extractive_question_answering",
197
+ "exact_match": 0.011245930748742231,
198
+ "f1": 0.044142577167201706
199
+ }
200
+ ]
201
+ }