Spaces:
Running
Running
Add TurkishMMLU dataset and results (#2)
Browse files- Add TurkishMMLU dataset and results (b949e7e3ca62dc6da4c6623dbee14f0cc1843004)
- data/datasets.json +8 -0
- results/zero-shot/CerebrumTech__cere-llama-3-8b-tr.json +6 -0
- results/zero-shot/Llama-3.3-70B-Instruct.json +6 -0
- results/zero-shot/Ministral-8B-Instruct.json +6 -0
- results/zero-shot/Mistral-7B-Instruct-v0.3.json +6 -0
- results/zero-shot/Mistral-7B-v0.3.json +6 -0
- results/zero-shot/Mixtral-8x7B-Instruct-v0.1.json +6 -0
- results/zero-shot/Qwen2.5-0.5B-Instruct.json +6 -0
- results/zero-shot/Qwen2.5-0.5B.json +6 -0
- results/zero-shot/Qwen2.5-1.5B-Instruct.json +6 -0
- results/zero-shot/Qwen2.5-1.5B.json +6 -0
- results/zero-shot/Qwen2.5-14B-Instruct.json +6 -0
- results/zero-shot/Qwen2.5-14B.json +6 -0
- results/zero-shot/Qwen2.5-3B-Instruct.json +6 -0
- results/zero-shot/Qwen2.5-3B.json +6 -0
- results/zero-shot/Qwen2.5-7B-Instruct.json +6 -0
- results/zero-shot/Qwen2.5-7B.json +6 -0
- results/zero-shot/aya-23-35B.json +6 -0
- results/zero-shot/aya-23-8b.json +6 -0
- results/zero-shot/aya-expanse-32b.json +6 -0
- results/zero-shot/aya-expanse-8b.json +6 -0
- results/zero-shot/aya101.json +6 -0
- results/zero-shot/commencis-7b.json +6 -0
- results/zero-shot/kanarya-2b.json +6 -0
- results/zero-shot/llama-3-8b-instruct.json +7 -0
- results/zero-shot/llama-3-8b.json +7 -0
- results/zero-shot/llama-3.1-8b-instruct.json +7 -0
- results/zero-shot/llama-3.1-8b.json +6 -0
- results/zero-shot/llama-3.2-1b.json +7 -0
- results/zero-shot/llama-3.2-3b-instruct.json +7 -0
- results/zero-shot/llama-3.2-3b.json +7 -0
- results/zero-shot/mistral-7b.json +6 -0
- results/zero-shot/trendyol-7b.json +6 -0
- results/zero-shot/turna.json +6 -0
data/datasets.json
CHANGED
@@ -189,5 +189,13 @@
|
|
189 |
"url": "https://huggingface.co/datasets/furkanunluturk/turkce-atasozleri",
|
190 |
"hf_name": "abrek/turkce-atasozleri-lm-evaluation-harness",
|
191 |
"generative": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
}
|
193 |
}
|
|
|
189 |
"url": "https://huggingface.co/datasets/furkanunluturk/turkce-atasozleri",
|
190 |
"hf_name": "abrek/turkce-atasozleri-lm-evaluation-harness",
|
191 |
"generative": false
|
192 |
+
},
|
193 |
+
"turkishmmlu": {
|
194 |
+
"name": "TurkishMMLU",
|
195 |
+
"task": "multiple_choice",
|
196 |
+
"description": "TurkishMMLU is a multiple-choice dataset for Turkish Natural Language Processing (NLP) community based on Turkish Highschool Curricula for nine subjects.",
|
197 |
+
"url": "https://huggingface.co/datasets/AYueksel/TurkishMMLU",
|
198 |
+
"hf_name": "AYueksel/TurkishMMLU",
|
199 |
+
"generative": false
|
200 |
}
|
201 |
}
|
results/zero-shot/CerebrumTech__cere-llama-3-8b-tr.json
CHANGED
@@ -174,6 +174,12 @@
|
|
174 |
"task": "multiple_choice",
|
175 |
"acc": 0.48092485549132946,
|
176 |
"acc_norm": 0.48092485549132946
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
}
|
178 |
]
|
179 |
}
|
|
|
174 |
"task": "multiple_choice",
|
175 |
"acc": 0.48092485549132946,
|
176 |
"acc_norm": 0.48092485549132946
|
177 |
+
},
|
178 |
+
{
|
179 |
+
"name": "turkishmmlu",
|
180 |
+
"task": "multiple_choice",
|
181 |
+
"acc": 0.25555555555555554,
|
182 |
+
"acc_norm": 0.25555555555555554
|
183 |
}
|
184 |
]
|
185 |
}
|
results/zero-shot/Llama-3.3-70B-Instruct.json
CHANGED
@@ -176,6 +176,12 @@
|
|
176 |
"task": "multiple_choice",
|
177 |
"acc": 0.9254335260115607,
|
178 |
"acc_norm": 0.9254335260115607
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
}
|
180 |
]
|
181 |
}
|
|
|
176 |
"task": "multiple_choice",
|
177 |
"acc": 0.9254335260115607,
|
178 |
"acc_norm": 0.9254335260115607
|
179 |
+
},
|
180 |
+
{
|
181 |
+
"name": "turkishmmlu",
|
182 |
+
"task": "multiple_choice",
|
183 |
+
"acc": 0.646,
|
184 |
+
"acc_norm": 0.646
|
185 |
}
|
186 |
]
|
187 |
}
|
results/zero-shot/Ministral-8B-Instruct.json
CHANGED
@@ -173,6 +173,12 @@
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.4046242774566474,
|
175 |
"acc_norm": 0.4046242774566474
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
}
|
177 |
]
|
178 |
}
|
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.4046242774566474,
|
175 |
"acc_norm": 0.4046242774566474
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"name": "turkishmmlu",
|
179 |
+
"task": "multiple_choice",
|
180 |
+
"acc": 0.2644444444444444,
|
181 |
+
"acc_norm": 0.2644444444444444
|
182 |
}
|
183 |
]
|
184 |
}
|
results/zero-shot/Mistral-7B-Instruct-v0.3.json
CHANGED
@@ -173,6 +173,12 @@
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.3,
|
175 |
"acc_norm": 0.3
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
}
|
177 |
]
|
178 |
}
|
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.3,
|
175 |
"acc_norm": 0.3
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"name": "turkishmmlu",
|
179 |
+
"task": "multiple_choice",
|
180 |
+
"acc": 0.19555555555555557,
|
181 |
+
"acc_norm": 0.19555555555555557
|
182 |
}
|
183 |
]
|
184 |
}
|
results/zero-shot/Mistral-7B-v0.3.json
CHANGED
@@ -173,6 +173,12 @@
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.27572254335260116,
|
175 |
"acc_norm": 0.27572254335260116
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
}
|
177 |
]
|
178 |
}
|
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.27572254335260116,
|
175 |
"acc_norm": 0.27572254335260116
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"name": "turkishmmlu",
|
179 |
+
"task": "multiple_choice",
|
180 |
+
"acc": 0.2688888888888889,
|
181 |
+
"acc_norm": 0.2688888888888889
|
182 |
}
|
183 |
]
|
184 |
}
|
results/zero-shot/Mixtral-8x7B-Instruct-v0.1.json
CHANGED
@@ -175,6 +175,12 @@
|
|
175 |
"task": "multiple_choice",
|
176 |
"acc": 0.5150289017341041,
|
177 |
"acc_norm": 0.5150289017341041
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
}
|
179 |
]
|
180 |
}
|
|
|
175 |
"task": "multiple_choice",
|
176 |
"acc": 0.5150289017341041,
|
177 |
"acc_norm": 0.5150289017341041
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"name": "turkishmmlu",
|
181 |
+
"task": "multiple_choice",
|
182 |
+
"acc": 0.358,
|
183 |
+
"acc_norm": 0.358
|
184 |
}
|
185 |
]
|
186 |
}
|
results/zero-shot/Qwen2.5-0.5B-Instruct.json
CHANGED
@@ -173,6 +173,12 @@
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.2832369942196532,
|
175 |
"acc_norm": 0.2832369942196532
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
}
|
177 |
]
|
178 |
}
|
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.2832369942196532,
|
175 |
"acc_norm": 0.2832369942196532
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"name": "turkishmmlu",
|
179 |
+
"task": "multiple_choice",
|
180 |
+
"acc": 0.2111111111111111,
|
181 |
+
"acc_norm": 0.2111111111111111
|
182 |
}
|
183 |
]
|
184 |
}
|
results/zero-shot/Qwen2.5-0.5B.json
CHANGED
@@ -173,6 +173,12 @@
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.20346820809248556,
|
175 |
"acc_norm": 0.20346820809248556
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
}
|
177 |
]
|
178 |
}
|
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.20346820809248556,
|
175 |
"acc_norm": 0.20346820809248556
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"name": "turkishmmlu",
|
179 |
+
"task": "multiple_choice",
|
180 |
+
"acc": 0.17888888888888888,
|
181 |
+
"acc_norm": 0.17888888888888888
|
182 |
}
|
183 |
]
|
184 |
}
|
results/zero-shot/Qwen2.5-1.5B-Instruct.json
CHANGED
@@ -173,6 +173,12 @@
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.3468208092485549,
|
175 |
"acc_norm": 0.3468208092485549
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
}
|
177 |
]
|
178 |
}
|
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.3468208092485549,
|
175 |
"acc_norm": 0.3468208092485549
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"name": "turkishmmlu",
|
179 |
+
"task": "multiple_choice",
|
180 |
+
"acc": 0.28888888888888886,
|
181 |
+
"acc_norm": 0.28888888888888886
|
182 |
}
|
183 |
]
|
184 |
}
|
results/zero-shot/Qwen2.5-1.5B.json
CHANGED
@@ -173,6 +173,12 @@
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.2300578034682081,
|
175 |
"acc_norm": 0.2300578034682081
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
}
|
177 |
]
|
178 |
}
|
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.2300578034682081,
|
175 |
"acc_norm": 0.2300578034682081
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"name": "turkishmmlu",
|
179 |
+
"task": "multiple_choice",
|
180 |
+
"acc": 0.23,
|
181 |
+
"acc_norm": 0.23
|
182 |
}
|
183 |
]
|
184 |
}
|
results/zero-shot/Qwen2.5-14B-Instruct.json
CHANGED
@@ -175,6 +175,12 @@
|
|
175 |
"task": "multiple_choice",
|
176 |
"acc": 0.7832369942196532,
|
177 |
"acc_norm": 0.7832369942196532
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
}
|
179 |
]
|
180 |
}
|
|
|
175 |
"task": "multiple_choice",
|
176 |
"acc": 0.7832369942196532,
|
177 |
"acc_norm": 0.7832369942196532
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"name": "turkishmmlu",
|
181 |
+
"task": "multiple_choice",
|
182 |
+
"acc": 0.5944444444444444,
|
183 |
+
"acc_norm": 0.5944444444444444
|
184 |
}
|
185 |
]
|
186 |
}
|
results/zero-shot/Qwen2.5-14B.json
CHANGED
@@ -175,6 +175,12 @@
|
|
175 |
"task": "multiple_choice",
|
176 |
"acc": 0.753757225433526,
|
177 |
"acc_norm": 0.753757225433526
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
}
|
179 |
]
|
180 |
}
|
|
|
175 |
"task": "multiple_choice",
|
176 |
"acc": 0.753757225433526,
|
177 |
"acc_norm": 0.753757225433526
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"name": "turkishmmlu",
|
181 |
+
"task": "multiple_choice",
|
182 |
+
"acc": 0.5622222222222222,
|
183 |
+
"acc_norm": 0.5622222222222222
|
184 |
}
|
185 |
]
|
186 |
}
|
results/zero-shot/Qwen2.5-3B-Instruct.json
CHANGED
@@ -173,6 +173,12 @@
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.6011560693641619,
|
175 |
"acc_norm": 0.6011560693641619
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
}
|
177 |
]
|
178 |
}
|
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.6011560693641619,
|
175 |
"acc_norm": 0.6011560693641619
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"name": "turkishmmlu",
|
179 |
+
"task": "multiple_choice",
|
180 |
+
"acc": 0.37777777777777777,
|
181 |
+
"acc_norm": 0.37777777777777777
|
182 |
}
|
183 |
]
|
184 |
}
|
results/zero-shot/Qwen2.5-3B.json
CHANGED
@@ -173,6 +173,12 @@
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.4346820809248555,
|
175 |
"acc_norm": 0.4346820809248555
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
}
|
177 |
]
|
178 |
}
|
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.4346820809248555,
|
175 |
"acc_norm": 0.4346820809248555
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"name": "turkishmmlu",
|
179 |
+
"task": "multiple_choice",
|
180 |
+
"acc": 0.22555555555555556,
|
181 |
+
"acc_norm": 0.22555555555555556
|
182 |
}
|
183 |
]
|
184 |
}
|
results/zero-shot/Qwen2.5-7B-Instruct.json
CHANGED
@@ -173,6 +173,12 @@
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.7121387283236994,
|
175 |
"acc_norm": 0.7121387283236994
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
}
|
177 |
]
|
178 |
}
|
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.7121387283236994,
|
175 |
"acc_norm": 0.7121387283236994
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"name": "turkishmmlu",
|
179 |
+
"task": "multiple_choice",
|
180 |
+
"acc": 0.47555555555555556,
|
181 |
+
"acc_norm": 0.47555555555555556
|
182 |
}
|
183 |
]
|
184 |
}
|
results/zero-shot/Qwen2.5-7B.json
CHANGED
@@ -173,6 +173,12 @@
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.7352601156069364,
|
175 |
"acc_norm": 0.7352601156069364
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
}
|
177 |
]
|
178 |
}
|
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.7352601156069364,
|
175 |
"acc_norm": 0.7352601156069364
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"name": "turkishmmlu",
|
179 |
+
"task": "multiple_choice",
|
180 |
+
"acc": 0.49333333333333335,
|
181 |
+
"acc_norm": 0.49333333333333335
|
182 |
}
|
183 |
]
|
184 |
}
|
results/zero-shot/aya-23-35B.json
CHANGED
@@ -175,6 +175,12 @@
|
|
175 |
"task": "multiple_choice",
|
176 |
"acc": 0.5687861271676301,
|
177 |
"acc_norm": 0.5687861271676301
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
}
|
179 |
]
|
180 |
}
|
|
|
175 |
"task": "multiple_choice",
|
176 |
"acc": 0.5687861271676301,
|
177 |
"acc_norm": 0.5687861271676301
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"name": "turkishmmlu",
|
181 |
+
"task": "multiple_choice",
|
182 |
+
"acc": 0.4533333333333333,
|
183 |
+
"acc_norm": 0.4533333333333333
|
184 |
}
|
185 |
]
|
186 |
}
|
results/zero-shot/aya-23-8b.json
CHANGED
@@ -169,6 +169,12 @@
|
|
169 |
"task": "multiple_choice",
|
170 |
"acc": 0.44971098265895953,
|
171 |
"acc_norm": 0.44971098265895953
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
}
|
173 |
]
|
174 |
}
|
|
|
169 |
"task": "multiple_choice",
|
170 |
"acc": 0.44971098265895953,
|
171 |
"acc_norm": 0.44971098265895953
|
172 |
+
},
|
173 |
+
{
|
174 |
+
"name": "turkishmmlu",
|
175 |
+
"task": "multiple_choice",
|
176 |
+
"acc": 0.33,
|
177 |
+
"acc_norm": 0.33
|
178 |
}
|
179 |
]
|
180 |
}
|
results/zero-shot/aya-expanse-32b.json
CHANGED
@@ -174,6 +174,12 @@
|
|
174 |
"task": "multiple_choice",
|
175 |
"acc": 0.8236994219653179,
|
176 |
"acc_norm": 0.8236994219653179
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
}
|
178 |
]
|
179 |
}
|
|
|
174 |
"task": "multiple_choice",
|
175 |
"acc": 0.8236994219653179,
|
176 |
"acc_norm": 0.8236994219653179
|
177 |
+
},
|
178 |
+
{
|
179 |
+
"name": "turkishmmlu",
|
180 |
+
"task": "multiple_choice",
|
181 |
+
"acc": 0.5688888888888889,
|
182 |
+
"acc_norm": 0.5688888888888889
|
183 |
}
|
184 |
]
|
185 |
}
|
results/zero-shot/aya-expanse-8b.json
CHANGED
@@ -160,6 +160,12 @@
|
|
160 |
"task": "multiple_choice",
|
161 |
"acc": 0.723121387283237,
|
162 |
"acc_norm": 0.723121387283237
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
}
|
164 |
]
|
165 |
}
|
|
|
160 |
"task": "multiple_choice",
|
161 |
"acc": 0.723121387283237,
|
162 |
"acc_norm": 0.723121387283237
|
163 |
+
},
|
164 |
+
{
|
165 |
+
"name": "turkishmmlu",
|
166 |
+
"task": "multiple_choice",
|
167 |
+
"acc": 0.46555555555555556,
|
168 |
+
"acc_norm": 0.46555555555555556
|
169 |
}
|
170 |
]
|
171 |
}
|
results/zero-shot/aya101.json
CHANGED
@@ -173,6 +173,12 @@
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.009826589595375723,
|
175 |
"acc_norm": 0.009826589595375723
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
}
|
177 |
]
|
178 |
}
|
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.009826589595375723,
|
175 |
"acc_norm": 0.009826589595375723
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"name": "turkishmmlu",
|
179 |
+
"task": "multiple_choice",
|
180 |
+
"acc": 0.374,
|
181 |
+
"acc_norm": 0.374
|
182 |
}
|
183 |
]
|
184 |
}
|
results/zero-shot/commencis-7b.json
CHANGED
@@ -173,6 +173,12 @@
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.22658959537572254,
|
175 |
"acc_norm": 0.22658959537572254
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
}
|
177 |
]
|
178 |
}
|
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.22658959537572254,
|
175 |
"acc_norm": 0.22658959537572254
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"name": "turkishmmlu",
|
179 |
+
"task": "multiple_choice",
|
180 |
+
"acc": 0.24666666666666667,
|
181 |
+
"acc_norm": 0.24666666666666667
|
182 |
}
|
183 |
]
|
184 |
}
|
results/zero-shot/kanarya-2b.json
CHANGED
@@ -172,6 +172,12 @@
|
|
172 |
"task": "multiple_choice",
|
173 |
"acc": 0.0,
|
174 |
"acc_norm": 0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
}
|
176 |
]
|
177 |
}
|
|
|
172 |
"task": "multiple_choice",
|
173 |
"acc": 0.0,
|
174 |
"acc_norm": 0.0
|
175 |
+
},
|
176 |
+
{
|
177 |
+
"name": "turkishmmlu",
|
178 |
+
"task": "multiple_choice",
|
179 |
+
"acc": 0.18,
|
180 |
+
"acc_norm": 0.18
|
181 |
}
|
182 |
]
|
183 |
}
|
results/zero-shot/llama-3-8b-instruct.json
CHANGED
@@ -168,6 +168,13 @@
|
|
168 |
"task": "multiple_choice",
|
169 |
"acc": 0.6947976878612717,
|
170 |
"acc_norm": 0.6947976878612717
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
}
|
|
|
172 |
]
|
173 |
}
|
|
|
168 |
"task": "multiple_choice",
|
169 |
"acc": 0.6947976878612717,
|
170 |
"acc_norm": 0.6947976878612717
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"name": "turkishmmlu",
|
174 |
+
"task": "multiple_choice",
|
175 |
+
"acc": 0.3811111111111111,
|
176 |
+
"acc_norm": 0.3811111111111111
|
177 |
}
|
178 |
+
|
179 |
]
|
180 |
}
|
results/zero-shot/llama-3-8b.json
CHANGED
@@ -167,6 +167,13 @@
|
|
167 |
"task": "multiple_choice",
|
168 |
"acc": 0.44046242774566474,
|
169 |
"acc_norm": 0.44046242774566474
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
}
|
|
|
171 |
]
|
172 |
}
|
|
|
167 |
"task": "multiple_choice",
|
168 |
"acc": 0.44046242774566474,
|
169 |
"acc_norm": 0.44046242774566474
|
170 |
+
},
|
171 |
+
{
|
172 |
+
"name": "turkishmmlu",
|
173 |
+
"task": "multiple_choice",
|
174 |
+
"acc": 0.2544444444444444,
|
175 |
+
"acc_norm": 0.2544444444444444
|
176 |
}
|
177 |
+
|
178 |
]
|
179 |
}
|
results/zero-shot/llama-3.1-8b-instruct.json
CHANGED
@@ -167,6 +167,13 @@
|
|
167 |
"task": "multiple_choice",
|
168 |
"acc": 0.7549132947976879,
|
169 |
"acc_norm": 0.7549132947976879
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
}
|
|
|
171 |
]
|
172 |
}
|
|
|
167 |
"task": "multiple_choice",
|
168 |
"acc": 0.7549132947976879,
|
169 |
"acc_norm": 0.7549132947976879
|
170 |
+
},
|
171 |
+
{
|
172 |
+
"name": "turkishmmlu",
|
173 |
+
"task": "multiple_choice",
|
174 |
+
"acc": 0.3811111111111111,
|
175 |
+
"acc_norm": 0.3811111111111111
|
176 |
}
|
177 |
+
|
178 |
]
|
179 |
}
|
results/zero-shot/llama-3.1-8b.json
CHANGED
@@ -167,6 +167,12 @@
|
|
167 |
"task": "multiple_choice",
|
168 |
"acc": 0.5410404624277456,
|
169 |
"acc_norm": 0.5410404624277456
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
}
|
171 |
]
|
172 |
}
|
|
|
167 |
"task": "multiple_choice",
|
168 |
"acc": 0.5410404624277456,
|
169 |
"acc_norm": 0.5410404624277456
|
170 |
+
},
|
171 |
+
{
|
172 |
+
"name": "turkishmmlu",
|
173 |
+
"task": "multiple_choice",
|
174 |
+
"acc": 0.3055555555555556,
|
175 |
+
"acc_norm": 0.3055555555555556
|
176 |
}
|
177 |
]
|
178 |
}
|
results/zero-shot/llama-3.2-1b.json
CHANGED
@@ -199,6 +199,13 @@
|
|
199 |
"task": "multiple_choice",
|
200 |
"acc": 0.21676300578034682,
|
201 |
"acc_norm": 0.21676300578034682
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
}
|
|
|
203 |
]
|
204 |
}
|
|
|
199 |
"task": "multiple_choice",
|
200 |
"acc": 0.21676300578034682,
|
201 |
"acc_norm": 0.21676300578034682
|
202 |
+
},
|
203 |
+
{
|
204 |
+
"name": "turkishmmlu",
|
205 |
+
"task": "multiple_choice",
|
206 |
+
"acc": 0.18888888888888888,
|
207 |
+
"acc_norm": 0.18888888888888888
|
208 |
}
|
209 |
+
|
210 |
]
|
211 |
}
|
results/zero-shot/llama-3.2-3b-instruct.json
CHANGED
@@ -192,6 +192,13 @@
|
|
192 |
"task": "multiple_choice",
|
193 |
"acc": 0.010982658959537572,
|
194 |
"acc_norm": 0.010982658959537572
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
}
|
|
|
196 |
]
|
197 |
}
|
|
|
192 |
"task": "multiple_choice",
|
193 |
"acc": 0.010982658959537572,
|
194 |
"acc_norm": 0.010982658959537572
|
195 |
+
},
|
196 |
+
{
|
197 |
+
"name": "turkishmmlu",
|
198 |
+
"task": "multiple_choice",
|
199 |
+
"acc": 0.34444444444444444,
|
200 |
+
"acc_norm": 0.34444444444444444
|
201 |
}
|
202 |
+
|
203 |
]
|
204 |
}
|
results/zero-shot/llama-3.2-3b.json
CHANGED
@@ -161,6 +161,13 @@
|
|
161 |
"task": "multiple_choice",
|
162 |
"acc": 0.1994219653179191,
|
163 |
"acc_norm": 0.1994219653179191
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
}
|
|
|
165 |
]
|
166 |
}
|
|
|
161 |
"task": "multiple_choice",
|
162 |
"acc": 0.1994219653179191,
|
163 |
"acc_norm": 0.1994219653179191
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"name": "turkishmmlu",
|
167 |
+
"task": "multiple_choice",
|
168 |
+
"acc": 0.29,
|
169 |
+
"acc_norm": 0.29
|
170 |
}
|
171 |
+
|
172 |
]
|
173 |
}
|
results/zero-shot/mistral-7b.json
CHANGED
@@ -166,6 +166,12 @@
|
|
166 |
"task": "multiple_choice",
|
167 |
"acc": 0.30809248554913293,
|
168 |
"acc_norm": 0.30809248554913293
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
}
|
170 |
]
|
171 |
}
|
|
|
166 |
"task": "multiple_choice",
|
167 |
"acc": 0.30809248554913293,
|
168 |
"acc_norm": 0.30809248554913293
|
169 |
+
},
|
170 |
+
{
|
171 |
+
"name": "turkishmmlu",
|
172 |
+
"task": "multiple_choice",
|
173 |
+
"acc": 0.20333333333333334,
|
174 |
+
"acc_norm": 0.20333333333333334
|
175 |
}
|
176 |
]
|
177 |
}
|
results/zero-shot/trendyol-7b.json
CHANGED
@@ -173,6 +173,12 @@
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.0,
|
175 |
"acc_norm": 0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
}
|
177 |
]
|
178 |
}
|
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.0,
|
175 |
"acc_norm": 0.0
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"name": "turkishmmlu",
|
179 |
+
"task": "multiple_choice",
|
180 |
+
"acc": 0.2477777777777778,
|
181 |
+
"acc_norm": 0.2477777777777778
|
182 |
}
|
183 |
]
|
184 |
}
|
results/zero-shot/turna.json
CHANGED
@@ -173,6 +173,12 @@
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.19248554913294796,
|
175 |
"acc_norm": 0.19248554913294796
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
}
|
177 |
]
|
178 |
}
|
|
|
173 |
"task": "multiple_choice",
|
174 |
"acc": 0.19248554913294796,
|
175 |
"acc_norm": 0.19248554913294796
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"name": "turkishmmlu",
|
179 |
+
"task": "multiple_choice",
|
180 |
+
"acc": 0.19333333333333333,
|
181 |
+
"acc_norm": 0.19333333333333333
|
182 |
}
|
183 |
]
|
184 |
}
|