abrek commited on
Commit
3c1441b
·
verified ·
1 Parent(s): 2c0582b

Add TurkishMMLU dataset and results (#2)

Browse files

- Add TurkishMMLU dataset and results (b949e7e3ca62dc6da4c6623dbee14f0cc1843004)

Files changed (34) hide show
  1. data/datasets.json +8 -0
  2. results/zero-shot/CerebrumTech__cere-llama-3-8b-tr.json +6 -0
  3. results/zero-shot/Llama-3.3-70B-Instruct.json +6 -0
  4. results/zero-shot/Ministral-8B-Instruct.json +6 -0
  5. results/zero-shot/Mistral-7B-Instruct-v0.3.json +6 -0
  6. results/zero-shot/Mistral-7B-v0.3.json +6 -0
  7. results/zero-shot/Mixtral-8x7B-Instruct-v0.1.json +6 -0
  8. results/zero-shot/Qwen2.5-0.5B-Instruct.json +6 -0
  9. results/zero-shot/Qwen2.5-0.5B.json +6 -0
  10. results/zero-shot/Qwen2.5-1.5B-Instruct.json +6 -0
  11. results/zero-shot/Qwen2.5-1.5B.json +6 -0
  12. results/zero-shot/Qwen2.5-14B-Instruct.json +6 -0
  13. results/zero-shot/Qwen2.5-14B.json +6 -0
  14. results/zero-shot/Qwen2.5-3B-Instruct.json +6 -0
  15. results/zero-shot/Qwen2.5-3B.json +6 -0
  16. results/zero-shot/Qwen2.5-7B-Instruct.json +6 -0
  17. results/zero-shot/Qwen2.5-7B.json +6 -0
  18. results/zero-shot/aya-23-35B.json +6 -0
  19. results/zero-shot/aya-23-8b.json +6 -0
  20. results/zero-shot/aya-expanse-32b.json +6 -0
  21. results/zero-shot/aya-expanse-8b.json +6 -0
  22. results/zero-shot/aya101.json +6 -0
  23. results/zero-shot/commencis-7b.json +6 -0
  24. results/zero-shot/kanarya-2b.json +6 -0
  25. results/zero-shot/llama-3-8b-instruct.json +7 -0
  26. results/zero-shot/llama-3-8b.json +7 -0
  27. results/zero-shot/llama-3.1-8b-instruct.json +7 -0
  28. results/zero-shot/llama-3.1-8b.json +6 -0
  29. results/zero-shot/llama-3.2-1b.json +7 -0
  30. results/zero-shot/llama-3.2-3b-instruct.json +7 -0
  31. results/zero-shot/llama-3.2-3b.json +7 -0
  32. results/zero-shot/mistral-7b.json +6 -0
  33. results/zero-shot/trendyol-7b.json +6 -0
  34. results/zero-shot/turna.json +6 -0
data/datasets.json CHANGED
@@ -189,5 +189,13 @@
189
  "url": "https://huggingface.co/datasets/furkanunluturk/turkce-atasozleri",
190
  "hf_name": "abrek/turkce-atasozleri-lm-evaluation-harness",
191
  "generative": false
 
 
 
 
 
 
 
 
192
  }
193
  }
 
189
  "url": "https://huggingface.co/datasets/furkanunluturk/turkce-atasozleri",
190
  "hf_name": "abrek/turkce-atasozleri-lm-evaluation-harness",
191
  "generative": false
192
+ },
193
+ "turkishmmlu": {
194
+ "name": "TurkishMMLU",
195
+ "task": "multiple_choice",
196
+ "description": "TurkishMMLU is a multiple-choice dataset for Turkish Natural Language Processing (NLP) community based on Turkish Highschool Curricula for nine subjects.",
197
+ "url": "https://huggingface.co/datasets/AYueksel/TurkishMMLU",
198
+ "hf_name": "AYueksel/TurkishMMLU",
199
+ "generative": false
200
  }
201
  }
results/zero-shot/CerebrumTech__cere-llama-3-8b-tr.json CHANGED
@@ -174,6 +174,12 @@
174
  "task": "multiple_choice",
175
  "acc": 0.48092485549132946,
176
  "acc_norm": 0.48092485549132946
 
 
 
 
 
 
177
  }
178
  ]
179
  }
 
174
  "task": "multiple_choice",
175
  "acc": 0.48092485549132946,
176
  "acc_norm": 0.48092485549132946
177
+ },
178
+ {
179
+ "name": "turkishmmlu",
180
+ "task": "multiple_choice",
181
+ "acc": 0.25555555555555554,
182
+ "acc_norm": 0.25555555555555554
183
  }
184
  ]
185
  }
results/zero-shot/Llama-3.3-70B-Instruct.json CHANGED
@@ -176,6 +176,12 @@
176
  "task": "multiple_choice",
177
  "acc": 0.9254335260115607,
178
  "acc_norm": 0.9254335260115607
 
 
 
 
 
 
179
  }
180
  ]
181
  }
 
176
  "task": "multiple_choice",
177
  "acc": 0.9254335260115607,
178
  "acc_norm": 0.9254335260115607
179
+ },
180
+ {
181
+ "name": "turkishmmlu",
182
+ "task": "multiple_choice",
183
+ "acc": 0.646,
184
+ "acc_norm": 0.646
185
  }
186
  ]
187
  }
results/zero-shot/Ministral-8B-Instruct.json CHANGED
@@ -173,6 +173,12 @@
173
  "task": "multiple_choice",
174
  "acc": 0.4046242774566474,
175
  "acc_norm": 0.4046242774566474
 
 
 
 
 
 
176
  }
177
  ]
178
  }
 
173
  "task": "multiple_choice",
174
  "acc": 0.4046242774566474,
175
  "acc_norm": 0.4046242774566474
176
+ },
177
+ {
178
+ "name": "turkishmmlu",
179
+ "task": "multiple_choice",
180
+ "acc": 0.2644444444444444,
181
+ "acc_norm": 0.2644444444444444
182
  }
183
  ]
184
  }
results/zero-shot/Mistral-7B-Instruct-v0.3.json CHANGED
@@ -173,6 +173,12 @@
173
  "task": "multiple_choice",
174
  "acc": 0.3,
175
  "acc_norm": 0.3
 
 
 
 
 
 
176
  }
177
  ]
178
  }
 
173
  "task": "multiple_choice",
174
  "acc": 0.3,
175
  "acc_norm": 0.3
176
+ },
177
+ {
178
+ "name": "turkishmmlu",
179
+ "task": "multiple_choice",
180
+ "acc": 0.19555555555555557,
181
+ "acc_norm": 0.19555555555555557
182
  }
183
  ]
184
  }
results/zero-shot/Mistral-7B-v0.3.json CHANGED
@@ -173,6 +173,12 @@
173
  "task": "multiple_choice",
174
  "acc": 0.27572254335260116,
175
  "acc_norm": 0.27572254335260116
 
 
 
 
 
 
176
  }
177
  ]
178
  }
 
173
  "task": "multiple_choice",
174
  "acc": 0.27572254335260116,
175
  "acc_norm": 0.27572254335260116
176
+ },
177
+ {
178
+ "name": "turkishmmlu",
179
+ "task": "multiple_choice",
180
+ "acc": 0.2688888888888889,
181
+ "acc_norm": 0.2688888888888889
182
  }
183
  ]
184
  }
results/zero-shot/Mixtral-8x7B-Instruct-v0.1.json CHANGED
@@ -175,6 +175,12 @@
175
  "task": "multiple_choice",
176
  "acc": 0.5150289017341041,
177
  "acc_norm": 0.5150289017341041
 
 
 
 
 
 
178
  }
179
  ]
180
  }
 
175
  "task": "multiple_choice",
176
  "acc": 0.5150289017341041,
177
  "acc_norm": 0.5150289017341041
178
+ },
179
+ {
180
+ "name": "turkishmmlu",
181
+ "task": "multiple_choice",
182
+ "acc": 0.358,
183
+ "acc_norm": 0.358
184
  }
185
  ]
186
  }
results/zero-shot/Qwen2.5-0.5B-Instruct.json CHANGED
@@ -173,6 +173,12 @@
173
  "task": "multiple_choice",
174
  "acc": 0.2832369942196532,
175
  "acc_norm": 0.2832369942196532
 
 
 
 
 
 
176
  }
177
  ]
178
  }
 
173
  "task": "multiple_choice",
174
  "acc": 0.2832369942196532,
175
  "acc_norm": 0.2832369942196532
176
+ },
177
+ {
178
+ "name": "turkishmmlu",
179
+ "task": "multiple_choice",
180
+ "acc": 0.2111111111111111,
181
+ "acc_norm": 0.2111111111111111
182
  }
183
  ]
184
  }
results/zero-shot/Qwen2.5-0.5B.json CHANGED
@@ -173,6 +173,12 @@
173
  "task": "multiple_choice",
174
  "acc": 0.20346820809248556,
175
  "acc_norm": 0.20346820809248556
 
 
 
 
 
 
176
  }
177
  ]
178
  }
 
173
  "task": "multiple_choice",
174
  "acc": 0.20346820809248556,
175
  "acc_norm": 0.20346820809248556
176
+ },
177
+ {
178
+ "name": "turkishmmlu",
179
+ "task": "multiple_choice",
180
+ "acc": 0.17888888888888888,
181
+ "acc_norm": 0.17888888888888888
182
  }
183
  ]
184
  }
results/zero-shot/Qwen2.5-1.5B-Instruct.json CHANGED
@@ -173,6 +173,12 @@
173
  "task": "multiple_choice",
174
  "acc": 0.3468208092485549,
175
  "acc_norm": 0.3468208092485549
 
 
 
 
 
 
176
  }
177
  ]
178
  }
 
173
  "task": "multiple_choice",
174
  "acc": 0.3468208092485549,
175
  "acc_norm": 0.3468208092485549
176
+ },
177
+ {
178
+ "name": "turkishmmlu",
179
+ "task": "multiple_choice",
180
+ "acc": 0.28888888888888886,
181
+ "acc_norm": 0.28888888888888886
182
  }
183
  ]
184
  }
results/zero-shot/Qwen2.5-1.5B.json CHANGED
@@ -173,6 +173,12 @@
173
  "task": "multiple_choice",
174
  "acc": 0.2300578034682081,
175
  "acc_norm": 0.2300578034682081
 
 
 
 
 
 
176
  }
177
  ]
178
  }
 
173
  "task": "multiple_choice",
174
  "acc": 0.2300578034682081,
175
  "acc_norm": 0.2300578034682081
176
+ },
177
+ {
178
+ "name": "turkishmmlu",
179
+ "task": "multiple_choice",
180
+ "acc": 0.23,
181
+ "acc_norm": 0.23
182
  }
183
  ]
184
  }
results/zero-shot/Qwen2.5-14B-Instruct.json CHANGED
@@ -175,6 +175,12 @@
175
  "task": "multiple_choice",
176
  "acc": 0.7832369942196532,
177
  "acc_norm": 0.7832369942196532
 
 
 
 
 
 
178
  }
179
  ]
180
  }
 
175
  "task": "multiple_choice",
176
  "acc": 0.7832369942196532,
177
  "acc_norm": 0.7832369942196532
178
+ },
179
+ {
180
+ "name": "turkishmmlu",
181
+ "task": "multiple_choice",
182
+ "acc": 0.5944444444444444,
183
+ "acc_norm": 0.5944444444444444
184
  }
185
  ]
186
  }
results/zero-shot/Qwen2.5-14B.json CHANGED
@@ -175,6 +175,12 @@
175
  "task": "multiple_choice",
176
  "acc": 0.753757225433526,
177
  "acc_norm": 0.753757225433526
 
 
 
 
 
 
178
  }
179
  ]
180
  }
 
175
  "task": "multiple_choice",
176
  "acc": 0.753757225433526,
177
  "acc_norm": 0.753757225433526
178
+ },
179
+ {
180
+ "name": "turkishmmlu",
181
+ "task": "multiple_choice",
182
+ "acc": 0.5622222222222222,
183
+ "acc_norm": 0.5622222222222222
184
  }
185
  ]
186
  }
results/zero-shot/Qwen2.5-3B-Instruct.json CHANGED
@@ -173,6 +173,12 @@
173
  "task": "multiple_choice",
174
  "acc": 0.6011560693641619,
175
  "acc_norm": 0.6011560693641619
 
 
 
 
 
 
176
  }
177
  ]
178
  }
 
173
  "task": "multiple_choice",
174
  "acc": 0.6011560693641619,
175
  "acc_norm": 0.6011560693641619
176
+ },
177
+ {
178
+ "name": "turkishmmlu",
179
+ "task": "multiple_choice",
180
+ "acc": 0.37777777777777777,
181
+ "acc_norm": 0.37777777777777777
182
  }
183
  ]
184
  }
results/zero-shot/Qwen2.5-3B.json CHANGED
@@ -173,6 +173,12 @@
173
  "task": "multiple_choice",
174
  "acc": 0.4346820809248555,
175
  "acc_norm": 0.4346820809248555
 
 
 
 
 
 
176
  }
177
  ]
178
  }
 
173
  "task": "multiple_choice",
174
  "acc": 0.4346820809248555,
175
  "acc_norm": 0.4346820809248555
176
+ },
177
+ {
178
+ "name": "turkishmmlu",
179
+ "task": "multiple_choice",
180
+ "acc": 0.22555555555555556,
181
+ "acc_norm": 0.22555555555555556
182
  }
183
  ]
184
  }
results/zero-shot/Qwen2.5-7B-Instruct.json CHANGED
@@ -173,6 +173,12 @@
173
  "task": "multiple_choice",
174
  "acc": 0.7121387283236994,
175
  "acc_norm": 0.7121387283236994
 
 
 
 
 
 
176
  }
177
  ]
178
  }
 
173
  "task": "multiple_choice",
174
  "acc": 0.7121387283236994,
175
  "acc_norm": 0.7121387283236994
176
+ },
177
+ {
178
+ "name": "turkishmmlu",
179
+ "task": "multiple_choice",
180
+ "acc": 0.47555555555555556,
181
+ "acc_norm": 0.47555555555555556
182
  }
183
  ]
184
  }
results/zero-shot/Qwen2.5-7B.json CHANGED
@@ -173,6 +173,12 @@
173
  "task": "multiple_choice",
174
  "acc": 0.7352601156069364,
175
  "acc_norm": 0.7352601156069364
 
 
 
 
 
 
176
  }
177
  ]
178
  }
 
173
  "task": "multiple_choice",
174
  "acc": 0.7352601156069364,
175
  "acc_norm": 0.7352601156069364
176
+ },
177
+ {
178
+ "name": "turkishmmlu",
179
+ "task": "multiple_choice",
180
+ "acc": 0.49333333333333335,
181
+ "acc_norm": 0.49333333333333335
182
  }
183
  ]
184
  }
results/zero-shot/aya-23-35B.json CHANGED
@@ -175,6 +175,12 @@
175
  "task": "multiple_choice",
176
  "acc": 0.5687861271676301,
177
  "acc_norm": 0.5687861271676301
 
 
 
 
 
 
178
  }
179
  ]
180
  }
 
175
  "task": "multiple_choice",
176
  "acc": 0.5687861271676301,
177
  "acc_norm": 0.5687861271676301
178
+ },
179
+ {
180
+ "name": "turkishmmlu",
181
+ "task": "multiple_choice",
182
+ "acc": 0.4533333333333333,
183
+ "acc_norm": 0.4533333333333333
184
  }
185
  ]
186
  }
results/zero-shot/aya-23-8b.json CHANGED
@@ -169,6 +169,12 @@
169
  "task": "multiple_choice",
170
  "acc": 0.44971098265895953,
171
  "acc_norm": 0.44971098265895953
 
 
 
 
 
 
172
  }
173
  ]
174
  }
 
169
  "task": "multiple_choice",
170
  "acc": 0.44971098265895953,
171
  "acc_norm": 0.44971098265895953
172
+ },
173
+ {
174
+ "name": "turkishmmlu",
175
+ "task": "multiple_choice",
176
+ "acc": 0.33,
177
+ "acc_norm": 0.33
178
  }
179
  ]
180
  }
results/zero-shot/aya-expanse-32b.json CHANGED
@@ -174,6 +174,12 @@
174
  "task": "multiple_choice",
175
  "acc": 0.8236994219653179,
176
  "acc_norm": 0.8236994219653179
 
 
 
 
 
 
177
  }
178
  ]
179
  }
 
174
  "task": "multiple_choice",
175
  "acc": 0.8236994219653179,
176
  "acc_norm": 0.8236994219653179
177
+ },
178
+ {
179
+ "name": "turkishmmlu",
180
+ "task": "multiple_choice",
181
+ "acc": 0.5688888888888889,
182
+ "acc_norm": 0.5688888888888889
183
  }
184
  ]
185
  }
results/zero-shot/aya-expanse-8b.json CHANGED
@@ -160,6 +160,12 @@
160
  "task": "multiple_choice",
161
  "acc": 0.723121387283237,
162
  "acc_norm": 0.723121387283237
 
 
 
 
 
 
163
  }
164
  ]
165
  }
 
160
  "task": "multiple_choice",
161
  "acc": 0.723121387283237,
162
  "acc_norm": 0.723121387283237
163
+ },
164
+ {
165
+ "name": "turkishmmlu",
166
+ "task": "multiple_choice",
167
+ "acc": 0.46555555555555556,
168
+ "acc_norm": 0.46555555555555556
169
  }
170
  ]
171
  }
results/zero-shot/aya101.json CHANGED
@@ -173,6 +173,12 @@
173
  "task": "multiple_choice",
174
  "acc": 0.009826589595375723,
175
  "acc_norm": 0.009826589595375723
 
 
 
 
 
 
176
  }
177
  ]
178
  }
 
173
  "task": "multiple_choice",
174
  "acc": 0.009826589595375723,
175
  "acc_norm": 0.009826589595375723
176
+ },
177
+ {
178
+ "name": "turkishmmlu",
179
+ "task": "multiple_choice",
180
+ "acc": 0.374,
181
+ "acc_norm": 0.374
182
  }
183
  ]
184
  }
results/zero-shot/commencis-7b.json CHANGED
@@ -173,6 +173,12 @@
173
  "task": "multiple_choice",
174
  "acc": 0.22658959537572254,
175
  "acc_norm": 0.22658959537572254
 
 
 
 
 
 
176
  }
177
  ]
178
  }
 
173
  "task": "multiple_choice",
174
  "acc": 0.22658959537572254,
175
  "acc_norm": 0.22658959537572254
176
+ },
177
+ {
178
+ "name": "turkishmmlu",
179
+ "task": "multiple_choice",
180
+ "acc": 0.24666666666666667,
181
+ "acc_norm": 0.24666666666666667
182
  }
183
  ]
184
  }
results/zero-shot/kanarya-2b.json CHANGED
@@ -172,6 +172,12 @@
172
  "task": "multiple_choice",
173
  "acc": 0.0,
174
  "acc_norm": 0.0
 
 
 
 
 
 
175
  }
176
  ]
177
  }
 
172
  "task": "multiple_choice",
173
  "acc": 0.0,
174
  "acc_norm": 0.0
175
+ },
176
+ {
177
+ "name": "turkishmmlu",
178
+ "task": "multiple_choice",
179
+ "acc": 0.18,
180
+ "acc_norm": 0.18
181
  }
182
  ]
183
  }
results/zero-shot/llama-3-8b-instruct.json CHANGED
@@ -168,6 +168,13 @@
168
  "task": "multiple_choice",
169
  "acc": 0.6947976878612717,
170
  "acc_norm": 0.6947976878612717
 
 
 
 
 
 
171
  }
 
172
  ]
173
  }
 
168
  "task": "multiple_choice",
169
  "acc": 0.6947976878612717,
170
  "acc_norm": 0.6947976878612717
171
+ },
172
+ {
173
+ "name": "turkishmmlu",
174
+ "task": "multiple_choice",
175
+ "acc": 0.3811111111111111,
176
+ "acc_norm": 0.3811111111111111
177
  }
178
+
179
  ]
180
  }
results/zero-shot/llama-3-8b.json CHANGED
@@ -167,6 +167,13 @@
167
  "task": "multiple_choice",
168
  "acc": 0.44046242774566474,
169
  "acc_norm": 0.44046242774566474
 
 
 
 
 
 
170
  }
 
171
  ]
172
  }
 
167
  "task": "multiple_choice",
168
  "acc": 0.44046242774566474,
169
  "acc_norm": 0.44046242774566474
170
+ },
171
+ {
172
+ "name": "turkishmmlu",
173
+ "task": "multiple_choice",
174
+ "acc": 0.2544444444444444,
175
+ "acc_norm": 0.2544444444444444
176
  }
177
+
178
  ]
179
  }
results/zero-shot/llama-3.1-8b-instruct.json CHANGED
@@ -167,6 +167,13 @@
167
  "task": "multiple_choice",
168
  "acc": 0.7549132947976879,
169
  "acc_norm": 0.7549132947976879
 
 
 
 
 
 
170
  }
 
171
  ]
172
  }
 
167
  "task": "multiple_choice",
168
  "acc": 0.7549132947976879,
169
  "acc_norm": 0.7549132947976879
170
+ },
171
+ {
172
+ "name": "turkishmmlu",
173
+ "task": "multiple_choice",
174
+ "acc": 0.3811111111111111,
175
+ "acc_norm": 0.3811111111111111
176
  }
177
+
178
  ]
179
  }
results/zero-shot/llama-3.1-8b.json CHANGED
@@ -167,6 +167,12 @@
167
  "task": "multiple_choice",
168
  "acc": 0.5410404624277456,
169
  "acc_norm": 0.5410404624277456
 
 
 
 
 
 
170
  }
171
  ]
172
  }
 
167
  "task": "multiple_choice",
168
  "acc": 0.5410404624277456,
169
  "acc_norm": 0.5410404624277456
170
+ },
171
+ {
172
+ "name": "turkishmmlu",
173
+ "task": "multiple_choice",
174
+ "acc": 0.3055555555555556,
175
+ "acc_norm": 0.3055555555555556
176
  }
177
  ]
178
  }
results/zero-shot/llama-3.2-1b.json CHANGED
@@ -199,6 +199,13 @@
199
  "task": "multiple_choice",
200
  "acc": 0.21676300578034682,
201
  "acc_norm": 0.21676300578034682
 
 
 
 
 
 
202
  }
 
203
  ]
204
  }
 
199
  "task": "multiple_choice",
200
  "acc": 0.21676300578034682,
201
  "acc_norm": 0.21676300578034682
202
+ },
203
+ {
204
+ "name": "turkishmmlu",
205
+ "task": "multiple_choice",
206
+ "acc": 0.18888888888888888,
207
+ "acc_norm": 0.18888888888888888
208
  }
209
+
210
  ]
211
  }
results/zero-shot/llama-3.2-3b-instruct.json CHANGED
@@ -192,6 +192,13 @@
192
  "task": "multiple_choice",
193
  "acc": 0.010982658959537572,
194
  "acc_norm": 0.010982658959537572
 
 
 
 
 
 
195
  }
 
196
  ]
197
  }
 
192
  "task": "multiple_choice",
193
  "acc": 0.010982658959537572,
194
  "acc_norm": 0.010982658959537572
195
+ },
196
+ {
197
+ "name": "turkishmmlu",
198
+ "task": "multiple_choice",
199
+ "acc": 0.34444444444444444,
200
+ "acc_norm": 0.34444444444444444
201
  }
202
+
203
  ]
204
  }
results/zero-shot/llama-3.2-3b.json CHANGED
@@ -161,6 +161,13 @@
161
  "task": "multiple_choice",
162
  "acc": 0.1994219653179191,
163
  "acc_norm": 0.1994219653179191
 
 
 
 
 
 
164
  }
 
165
  ]
166
  }
 
161
  "task": "multiple_choice",
162
  "acc": 0.1994219653179191,
163
  "acc_norm": 0.1994219653179191
164
+ },
165
+ {
166
+ "name": "turkishmmlu",
167
+ "task": "multiple_choice",
168
+ "acc": 0.29,
169
+ "acc_norm": 0.29
170
  }
171
+
172
  ]
173
  }
results/zero-shot/mistral-7b.json CHANGED
@@ -166,6 +166,12 @@
166
  "task": "multiple_choice",
167
  "acc": 0.30809248554913293,
168
  "acc_norm": 0.30809248554913293
 
 
 
 
 
 
169
  }
170
  ]
171
  }
 
166
  "task": "multiple_choice",
167
  "acc": 0.30809248554913293,
168
  "acc_norm": 0.30809248554913293
169
+ },
170
+ {
171
+ "name": "turkishmmlu",
172
+ "task": "multiple_choice",
173
+ "acc": 0.20333333333333334,
174
+ "acc_norm": 0.20333333333333334
175
  }
176
  ]
177
  }
results/zero-shot/trendyol-7b.json CHANGED
@@ -173,6 +173,12 @@
173
  "task": "multiple_choice",
174
  "acc": 0.0,
175
  "acc_norm": 0.0
 
 
 
 
 
 
176
  }
177
  ]
178
  }
 
173
  "task": "multiple_choice",
174
  "acc": 0.0,
175
  "acc_norm": 0.0
176
+ },
177
+ {
178
+ "name": "turkishmmlu",
179
+ "task": "multiple_choice",
180
+ "acc": 0.2477777777777778,
181
+ "acc_norm": 0.2477777777777778
182
  }
183
  ]
184
  }
results/zero-shot/turna.json CHANGED
@@ -173,6 +173,12 @@
173
  "task": "multiple_choice",
174
  "acc": 0.19248554913294796,
175
  "acc_norm": 0.19248554913294796
 
 
 
 
 
 
176
  }
177
  ]
178
  }
 
173
  "task": "multiple_choice",
174
  "acc": 0.19248554913294796,
175
  "acc_norm": 0.19248554913294796
176
+ },
177
+ {
178
+ "name": "turkishmmlu",
179
+ "task": "multiple_choice",
180
+ "acc": 0.19333333333333333,
181
+ "acc_norm": 0.19333333333333333
182
  }
183
  ]
184
  }