Spaces:
Running
Running
Add Qwen3-32B results
Browse files- results/zero-shot/Qwen3-32B.json +201 -0
results/zero-shot/Qwen3-32B.json
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": {
|
3 |
+
"model": "Qwen/Qwen3-32B",
|
4 |
+
"architecture": "Qwen3ForCausalLM",
|
5 |
+
"dtype": "bfloat16",
|
6 |
+
"type": "instruction-tuned",
|
7 |
+
"num_parameters": "32b",
|
8 |
+
"api": "hf"
|
9 |
+
},
|
10 |
+
"results": [
|
11 |
+
{
|
12 |
+
"name": "xquad_tr",
|
13 |
+
"task": "extractive_question_answering",
|
14 |
+
"exact_match": 0.07394957983193277,
|
15 |
+
"f1": 0.18168641044790357
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"name": "xcopa_tr",
|
19 |
+
"task": "multiple_choice",
|
20 |
+
"acc": 0.71,
|
21 |
+
"acc_norm": 0.71
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"name": "turkishmmlu",
|
25 |
+
"task": "multiple_choice",
|
26 |
+
"acc": 0.13555555555555557,
|
27 |
+
"acc_norm": 0.13555555555555557
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"name": "turkish_plu",
|
31 |
+
"task": "multiple_choice",
|
32 |
+
"acc": 0.53888,
|
33 |
+
"acc_norm": 0.43808
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"name": "turkish_plu_goal_inference",
|
37 |
+
"task": "multiple_choice",
|
38 |
+
"acc": 0.47192353643966545,
|
39 |
+
"acc_norm": 0.2986857825567503
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"name": "turkish_plu_next_event_prediction",
|
43 |
+
"task": "multiple_choice",
|
44 |
+
"acc": 0.5526717557251909,
|
45 |
+
"acc_norm": 0.29770992366412213
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"name": "turkish_plu_step_inference",
|
49 |
+
"task": "multiple_choice",
|
50 |
+
"acc": 0.4035947712418301,
|
51 |
+
"acc_norm": 0.39869281045751637
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"name": "turkish_plu_step_ordering",
|
55 |
+
"task": "multiple_choice",
|
56 |
+
"acc": 0.6660137120470128,
|
57 |
+
"acc_norm": 0.6660137120470128
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"name": "turkce_atasozleri",
|
61 |
+
"task": "multiple_choice",
|
62 |
+
"acc": 0.8682080924855491,
|
63 |
+
"acc_norm": 0.8682080924855491
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"name": "check_worthiness",
|
67 |
+
"task": "multiple_choice",
|
68 |
+
"acc": 0.5594149908592322,
|
69 |
+
"acc_norm": 0.6238574040219378
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"name": "relevance_judgment",
|
73 |
+
"task": "multiple_choice",
|
74 |
+
"acc": 0.600091407678245,
|
75 |
+
"acc_norm": 0.5781535648994516
|
76 |
+
},
|
77 |
+
{
|
78 |
+
"name": "tquad",
|
79 |
+
"task": "extractive_question_answering",
|
80 |
+
"exact_match": 0.10089686098654709,
|
81 |
+
"f1": 0.2031672244380758
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"name": "sts_tr",
|
85 |
+
"task": "text_classification",
|
86 |
+
"acc": 0.19289340101522842,
|
87 |
+
"acc_norm": 0.11820159535895576
|
88 |
+
},
|
89 |
+
{
|
90 |
+
"name": "offenseval_tr",
|
91 |
+
"task": "text_classification",
|
92 |
+
"acc": 0.23469387755102042,
|
93 |
+
"acc_norm": 0.7970521541950113
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"name": "mnli_tr",
|
97 |
+
"task": "natural_language_inference",
|
98 |
+
"acc": 0.3214,
|
99 |
+
"acc_norm": 0.3213
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"name": "snli_tr",
|
103 |
+
"task": "natural_language_inference",
|
104 |
+
"acc": 0.334,
|
105 |
+
"acc_norm": 0.3234
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"name": "xnli_tr",
|
109 |
+
"task": "natural_language_inference",
|
110 |
+
"acc": 0.3267465069860279,
|
111 |
+
"acc_norm": 0.3333333333333333
|
112 |
+
},
|
113 |
+
{
|
114 |
+
"name": "news_cat",
|
115 |
+
"task": "text_classification",
|
116 |
+
"acc": 0.532,
|
117 |
+
"acc_norm": 0.324
|
118 |
+
},
|
119 |
+
{
|
120 |
+
"name": "ironytr",
|
121 |
+
"task": "text_classification",
|
122 |
+
"acc": 0.515,
|
123 |
+
"acc_norm": 0.5
|
124 |
+
},
|
125 |
+
{
|
126 |
+
"name": "exams_tr",
|
127 |
+
"task": "multiple_choice",
|
128 |
+
"acc": 0.356234096692112,
|
129 |
+
"acc_norm": 0.33587786259541985
|
130 |
+
},
|
131 |
+
{
|
132 |
+
"name": "circumflex_tr",
|
133 |
+
"task": "multiple_choice",
|
134 |
+
"acc": 0.5857142857142857,
|
135 |
+
"acc_norm": 0.5857142857142857
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"name": "bilmecebench",
|
139 |
+
"task": "multiple_choice",
|
140 |
+
"acc": 0.6131221719457014,
|
141 |
+
"acc_norm": 0.6131221719457014
|
142 |
+
},
|
143 |
+
{
|
144 |
+
"name": "belebele_tr",
|
145 |
+
"task": "multiple_choice",
|
146 |
+
"acc": 0.8644444444444445,
|
147 |
+
"acc_norm": 0.8644444444444445
|
148 |
+
},
|
149 |
+
{
|
150 |
+
"name": "turkishmmlu",
|
151 |
+
"task": "multiple_choice",
|
152 |
+
"acc": 0.6677777777777778,
|
153 |
+
"acc_norm": 0.6677777777777778
|
154 |
+
},
|
155 |
+
{
|
156 |
+
"name": "xlsum_tr",
|
157 |
+
"task": "summarization",
|
158 |
+
"rouge1": 0.30984540466217936,
|
159 |
+
"rouge2": 0.1526819990882598,
|
160 |
+
"rougeL": 0.24056293296185868
|
161 |
+
},
|
162 |
+
{
|
163 |
+
"name": "wmt-tr-en-prompt",
|
164 |
+
"task": "machine_translation",
|
165 |
+
"wer": 1.9097642345703865,
|
166 |
+
"bleu": 0.07525786863429962
|
167 |
+
},
|
168 |
+
{
|
169 |
+
"name": "wiki_lingua_tr",
|
170 |
+
"task": "summarization",
|
171 |
+
"rouge1": 0.19307064669127558,
|
172 |
+
"rouge2": 0.07085141067185688,
|
173 |
+
"rougeL": 0.14979216116861716
|
174 |
+
},
|
175 |
+
{
|
176 |
+
"name": "tr-wikihow-summ",
|
177 |
+
"task": "summarization",
|
178 |
+
"rouge1": 0.19704935938448176,
|
179 |
+
"rouge2": 0.06326750543141127,
|
180 |
+
"rougeL": 0.1459271843295593
|
181 |
+
},
|
182 |
+
{
|
183 |
+
"name": "mlsum_tr",
|
184 |
+
"task": "summarization",
|
185 |
+
"rouge1": 0.4022274311113533,
|
186 |
+
"rouge2": 0.255063284434877,
|
187 |
+
"rougeL": 0.33089850060045467
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"name": "gecturk_generation",
|
191 |
+
"task": "grammatical_error_correction",
|
192 |
+
"exact_match": 0.2694400308151572
|
193 |
+
},
|
194 |
+
{
|
195 |
+
"name": "mkqa_tr",
|
196 |
+
"task": "extractive_question_answering",
|
197 |
+
"exact_match": 0.011245930748742231,
|
198 |
+
"f1": 0.044142577167201706
|
199 |
+
}
|
200 |
+
]
|
201 |
+
}
|