abrek commited on
Commit
2c0582b
·
verified ·
1 Parent(s): 6ee4374

adds turkce_atasozleri dataset and results (#1)

Browse files

- adds turkce_atasozleri dataset and results (5e62ec852c98968ce5a3779433ddc810c073f6f6)

Files changed (35) hide show
  1. data.py +3 -2
  2. data/datasets.json +8 -0
  3. results/zero-shot/CerebrumTech__cere-llama-3-8b-tr.json +6 -0
  4. results/zero-shot/Llama-3.3-70B-Instruct.json +6 -0
  5. results/zero-shot/Ministral-8B-Instruct.json +6 -0
  6. results/zero-shot/Mistral-7B-Instruct-v0.3.json +6 -0
  7. results/zero-shot/Mistral-7B-v0.3.json +6 -0
  8. results/zero-shot/Mixtral-8x7B-Instruct-v0.1.json +6 -0
  9. results/zero-shot/Qwen2.5-0.5B-Instruct.json +6 -0
  10. results/zero-shot/Qwen2.5-0.5B.json +6 -0
  11. results/zero-shot/Qwen2.5-1.5B-Instruct.json +6 -0
  12. results/zero-shot/Qwen2.5-1.5B.json +6 -0
  13. results/zero-shot/Qwen2.5-14B-Instruct.json +6 -0
  14. results/zero-shot/Qwen2.5-14B.json +6 -0
  15. results/zero-shot/Qwen2.5-3B-Instruct.json +6 -0
  16. results/zero-shot/Qwen2.5-3B.json +6 -0
  17. results/zero-shot/Qwen2.5-7B-Instruct.json +6 -0
  18. results/zero-shot/Qwen2.5-7B.json +6 -0
  19. results/zero-shot/aya-23-35B.json +6 -0
  20. results/zero-shot/aya-23-8b.json +6 -0
  21. results/zero-shot/aya-expanse-32b.json +6 -0
  22. results/zero-shot/aya-expanse-8b.json +6 -0
  23. results/zero-shot/aya101.json +6 -0
  24. results/zero-shot/commencis-7b.json +6 -0
  25. results/zero-shot/kanarya-2b.json +6 -0
  26. results/zero-shot/llama-3-8b-instruct.json +6 -0
  27. results/zero-shot/llama-3-8b.json +6 -0
  28. results/zero-shot/llama-3.1-8b-instruct.json +6 -0
  29. results/zero-shot/llama-3.1-8b.json +6 -0
  30. results/zero-shot/llama-3.2-1b.json +6 -0
  31. results/zero-shot/llama-3.2-3b-instruct.json +6 -0
  32. results/zero-shot/llama-3.2-3b.json +6 -0
  33. results/zero-shot/mistral-7b.json +6 -0
  34. results/zero-shot/trendyol-7b.json +6 -0
  35. results/zero-shot/turna.json +6 -0
data.py CHANGED
@@ -64,6 +64,7 @@ DATASET_TASK_DICT = {
64
  # other generation
65
  'wmt-tr-en-prompt': Tasks.MACHINE_TRANSLATION,
66
  'gecturk_generation': Tasks.GRAMMATICAL_ERROR_CORRECTION,
 
67
  }
68
 
69
 
@@ -90,8 +91,8 @@ DATASET_GROUPS = {
90
  'description': 'Turkish splits of SQuAD-like datasets XQuAD and TQUAD.',
91
  },
92
  'MCQA': {
93
- 'datasets': ['xcopa_tr', 'exams_tr', 'belebele_tr'] + [x for x in DATASET_TASK_DICT.keys() if x.startswith('turkish_plu')],
94
- 'description': 'Multiple Choice Question Answering datasets: XCOPA, Exams, Belebele and Turkish PLU.'
95
  },
96
  'TC': {
97
  'datasets': ['sts_tr', 'offenseval_tr', 'news_cat', 'ironytr', ],
 
64
  # other generation
65
  'wmt-tr-en-prompt': Tasks.MACHINE_TRANSLATION,
66
  'gecturk_generation': Tasks.GRAMMATICAL_ERROR_CORRECTION,
67
+ 'turkce_atasozleri': Tasks.MULTIPLE_CHOICE,
68
  }
69
 
70
 
 
91
  'description': 'Turkish splits of SQuAD-like datasets XQuAD and TQUAD.',
92
  },
93
  'MCQA': {
94
+ 'datasets': ['xcopa_tr', 'exams_tr', 'belebele_tr', 'turkce_atasozleri'] + [x for x in DATASET_TASK_DICT.keys() if x.startswith('turkish_plu')],
95
+ 'description': 'Multiple Choice Question Answering datasets: XCOPA, Exams, Belebele, Turkish PLU, and Turkce-Atasozleri.'
96
  },
97
  'TC': {
98
  'datasets': ['sts_tr', 'offenseval_tr', 'news_cat', 'ironytr', ],
data/datasets.json CHANGED
@@ -181,5 +181,13 @@
181
  "url": "https://github.com/GGLAB-KU/gecturk",
182
  "hf_name": "mcemilg/GECTurk-generation",
183
  "generative": true
 
 
 
 
 
 
 
 
184
  }
185
  }
 
181
  "url": "https://github.com/GGLAB-KU/gecturk",
182
  "hf_name": "mcemilg/GECTurk-generation",
183
  "generative": true
184
+ },
185
+ "turkce_atasozleri": {
186
+ "name": "Proverbs",
187
+ "task": "multiple_choice",
188
+ "description": "A dataset for Turkish proverbs and their definitions.",
189
+ "url": "https://huggingface.co/datasets/furkanunluturk/turkce-atasozleri",
190
+ "hf_name": "abrek/turkce-atasozleri-lm-evaluation-harness",
191
+ "generative": false
192
  }
193
  }
results/zero-shot/CerebrumTech__cere-llama-3-8b-tr.json CHANGED
@@ -168,6 +168,12 @@
168
  "task": "multiple_choice",
169
  "acc": 0.5144444444444445,
170
  "acc_norm": 0.5144444444444445
 
 
 
 
 
 
171
  }
172
  ]
173
  }
 
168
  "task": "multiple_choice",
169
  "acc": 0.5144444444444445,
170
  "acc_norm": 0.5144444444444445
171
+ },
172
+ {
173
+ "name": "turkce_atasozleri",
174
+ "task": "multiple_choice",
175
+ "acc": 0.48092485549132946,
176
+ "acc_norm": 0.48092485549132946
177
  }
178
  ]
179
  }
results/zero-shot/Llama-3.3-70B-Instruct.json CHANGED
@@ -170,6 +170,12 @@
170
  "rouge1": 0.4242379216426208,
171
  "rouge2": 0.2810704842229309,
172
  "rougeL": 0.3500597320086401
 
 
 
 
 
 
173
  }
174
  ]
175
  }
 
170
  "rouge1": 0.4242379216426208,
171
  "rouge2": 0.2810704842229309,
172
  "rougeL": 0.3500597320086401
173
+ },
174
+ {
175
+ "name": "turkce_atasozleri",
176
+ "task": "multiple_choice",
177
+ "acc": 0.9254335260115607,
178
+ "acc_norm": 0.9254335260115607
179
  }
180
  ]
181
  }
results/zero-shot/Ministral-8B-Instruct.json CHANGED
@@ -167,6 +167,12 @@
167
  "name": "gecturk_generation",
168
  "task": "grammatical_error_correction",
169
  "exact_match": 0.009003803745967548
 
 
 
 
 
 
170
  }
171
  ]
172
  }
 
167
  "name": "gecturk_generation",
168
  "task": "grammatical_error_correction",
169
  "exact_match": 0.009003803745967548
170
+ },
171
+ {
172
+ "name": "turkce_atasozleri",
173
+ "task": "multiple_choice",
174
+ "acc": 0.4046242774566474,
175
+ "acc_norm": 0.4046242774566474
176
  }
177
  ]
178
  }
results/zero-shot/Mistral-7B-Instruct-v0.3.json CHANGED
@@ -167,6 +167,12 @@
167
  "name": "gecturk_generation",
168
  "task": "grammatical_error_correction",
169
  "exact_match": 0.009677885309836777
 
 
 
 
 
 
170
  }
171
  ]
172
  }
 
167
  "name": "gecturk_generation",
168
  "task": "grammatical_error_correction",
169
  "exact_match": 0.009677885309836777
170
+ },
171
+ {
172
+ "name": "turkce_atasozleri",
173
+ "task": "multiple_choice",
174
+ "acc": 0.3,
175
+ "acc_norm": 0.3
176
  }
177
  ]
178
  }
results/zero-shot/Mistral-7B-v0.3.json CHANGED
@@ -167,6 +167,12 @@
167
  "task": "multiple_choice",
168
  "acc": 0.4111111111111111,
169
  "acc_norm": 0.4111111111111111
 
 
 
 
 
 
170
  }
171
  ]
172
  }
 
167
  "task": "multiple_choice",
168
  "acc": 0.4111111111111111,
169
  "acc_norm": 0.4111111111111111
170
+ },
171
+ {
172
+ "name": "turkce_atasozleri",
173
+ "task": "multiple_choice",
174
+ "acc": 0.27572254335260116,
175
+ "acc_norm": 0.27572254335260116
176
  }
177
  ]
178
  }
results/zero-shot/Mixtral-8x7B-Instruct-v0.1.json CHANGED
@@ -169,6 +169,12 @@
169
  "name": "gecturk_generation",
170
  "task": "grammatical_error_correction",
171
  "exact_match": 0.036255958399537776
 
 
 
 
 
 
172
  }
173
  ]
174
  }
 
169
  "name": "gecturk_generation",
170
  "task": "grammatical_error_correction",
171
  "exact_match": 0.036255958399537776
172
+ },
173
+ {
174
+ "name": "turkce_atasozleri",
175
+ "task": "multiple_choice",
176
+ "acc": 0.5150289017341041,
177
+ "acc_norm": 0.5150289017341041
178
  }
179
  ]
180
  }
results/zero-shot/Qwen2.5-0.5B-Instruct.json CHANGED
@@ -167,6 +167,12 @@
167
  "task": "multiple_choice",
168
  "acc": 0.3,
169
  "acc_norm": 0.3
 
 
 
 
 
 
170
  }
171
  ]
172
  }
 
167
  "task": "multiple_choice",
168
  "acc": 0.3,
169
  "acc_norm": 0.3
170
+ },
171
+ {
172
+ "name": "turkce_atasozleri",
173
+ "task": "multiple_choice",
174
+ "acc": 0.2832369942196532,
175
+ "acc_norm": 0.2832369942196532
176
  }
177
  ]
178
  }
results/zero-shot/Qwen2.5-0.5B.json CHANGED
@@ -167,6 +167,12 @@
167
  "task": "multiple_choice",
168
  "acc": 0.29888888888888887,
169
  "acc_norm": 0.29888888888888887
 
 
 
 
 
 
170
  }
171
  ]
172
  }
 
167
  "task": "multiple_choice",
168
  "acc": 0.29888888888888887,
169
  "acc_norm": 0.29888888888888887
170
+ },
171
+ {
172
+ "name": "turkce_atasozleri",
173
+ "task": "multiple_choice",
174
+ "acc": 0.20346820809248556,
175
+ "acc_norm": 0.20346820809248556
176
  }
177
  ]
178
  }
results/zero-shot/Qwen2.5-1.5B-Instruct.json CHANGED
@@ -167,6 +167,12 @@
167
  "task": "multiple_choice",
168
  "acc": 0.5344444444444445,
169
  "acc_norm": 0.5344444444444445
 
 
 
 
 
 
170
  }
171
  ]
172
  }
 
167
  "task": "multiple_choice",
168
  "acc": 0.5344444444444445,
169
  "acc_norm": 0.5344444444444445
170
+ },
171
+ {
172
+ "name": "turkce_atasozleri",
173
+ "task": "multiple_choice",
174
+ "acc": 0.3468208092485549,
175
+ "acc_norm": 0.3468208092485549
176
  }
177
  ]
178
  }
results/zero-shot/Qwen2.5-1.5B.json CHANGED
@@ -167,6 +167,12 @@
167
  "task": "multiple_choice",
168
  "acc": 0.4666666666666667,
169
  "acc_norm": 0.4666666666666667
 
 
 
 
 
 
170
  }
171
  ]
172
  }
 
167
  "task": "multiple_choice",
168
  "acc": 0.4666666666666667,
169
  "acc_norm": 0.4666666666666667
170
+ },
171
+ {
172
+ "name": "turkce_atasozleri",
173
+ "task": "multiple_choice",
174
+ "acc": 0.2300578034682081,
175
+ "acc_norm": 0.2300578034682081
176
  }
177
  ]
178
  }
results/zero-shot/Qwen2.5-14B-Instruct.json CHANGED
@@ -169,6 +169,12 @@
169
  "name": "gecturk_generation",
170
  "task": "grammatical_error_correction",
171
  "exact_match": 0.00130001444460494
 
 
 
 
 
 
172
  }
173
  ]
174
  }
 
169
  "name": "gecturk_generation",
170
  "task": "grammatical_error_correction",
171
  "exact_match": 0.00130001444460494
172
+ },
173
+ {
174
+ "name": "turkce_atasozleri",
175
+ "task": "multiple_choice",
176
+ "acc": 0.7832369942196532,
177
+ "acc_norm": 0.7832369942196532
178
  }
179
  ]
180
  }
results/zero-shot/Qwen2.5-14B.json CHANGED
@@ -169,6 +169,12 @@
169
  "name": "gecturk_generation",
170
  "task": "grammatical_error_correction",
171
  "exact_match": 0.003707448601280755
 
 
 
 
 
 
172
  }
173
  ]
174
  }
 
169
  "name": "gecturk_generation",
170
  "task": "grammatical_error_correction",
171
  "exact_match": 0.003707448601280755
172
+ },
173
+ {
174
+ "name": "turkce_atasozleri",
175
+ "task": "multiple_choice",
176
+ "acc": 0.753757225433526,
177
+ "acc_norm": 0.753757225433526
178
  }
179
  ]
180
  }
results/zero-shot/Qwen2.5-3B-Instruct.json CHANGED
@@ -167,6 +167,12 @@
167
  "task": "multiple_choice",
168
  "acc": 0.6744444444444444,
169
  "acc_norm": 0.6744444444444444
 
 
 
 
 
 
170
  }
171
  ]
172
  }
 
167
  "task": "multiple_choice",
168
  "acc": 0.6744444444444444,
169
  "acc_norm": 0.6744444444444444
170
+ },
171
+ {
172
+ "name": "turkce_atasozleri",
173
+ "task": "multiple_choice",
174
+ "acc": 0.6011560693641619,
175
+ "acc_norm": 0.6011560693641619
176
  }
177
  ]
178
  }
results/zero-shot/Qwen2.5-3B.json CHANGED
@@ -167,6 +167,12 @@
167
  "task": "multiple_choice",
168
  "acc": 0.6188888888888889,
169
  "acc_norm": 0.6188888888888889
 
 
 
 
 
 
170
  }
171
  ]
172
  }
 
167
  "task": "multiple_choice",
168
  "acc": 0.6188888888888889,
169
  "acc_norm": 0.6188888888888889
170
+ },
171
+ {
172
+ "name": "turkce_atasozleri",
173
+ "task": "multiple_choice",
174
+ "acc": 0.4346820809248555,
175
+ "acc_norm": 0.4346820809248555
176
  }
177
  ]
178
  }
results/zero-shot/Qwen2.5-7B-Instruct.json CHANGED
@@ -167,6 +167,12 @@
167
  "task": "multiple_choice",
168
  "acc": 0.7344444444444445,
169
  "acc_norm": 0.7344444444444445
 
 
 
 
 
 
170
  }
171
  ]
172
  }
 
167
  "task": "multiple_choice",
168
  "acc": 0.7344444444444445,
169
  "acc_norm": 0.7344444444444445
170
+ },
171
+ {
172
+ "name": "turkce_atasozleri",
173
+ "task": "multiple_choice",
174
+ "acc": 0.7121387283236994,
175
+ "acc_norm": 0.7121387283236994
176
  }
177
  ]
178
  }
results/zero-shot/Qwen2.5-7B.json CHANGED
@@ -167,6 +167,12 @@
167
  "task": "multiple_choice",
168
  "acc": 0.7388888888888889,
169
  "acc_norm": 0.7388888888888889
 
 
 
 
 
 
170
  }
171
  ]
172
  }
 
167
  "task": "multiple_choice",
168
  "acc": 0.7388888888888889,
169
  "acc_norm": 0.7388888888888889
170
+ },
171
+ {
172
+ "name": "turkce_atasozleri",
173
+ "task": "multiple_choice",
174
+ "acc": 0.7352601156069364,
175
+ "acc_norm": 0.7352601156069364
176
  }
177
  ]
178
  }
results/zero-shot/aya-23-35B.json CHANGED
@@ -169,6 +169,12 @@
169
  "rouge1": 0.3920969453077054,
170
  "rouge2": 0.25937196554017156,
171
  "rougeL": 0.33144850765201345
 
 
 
 
 
 
172
  }
173
  ]
174
  }
 
169
  "rouge1": 0.3920969453077054,
170
  "rouge2": 0.25937196554017156,
171
  "rougeL": 0.33144850765201345
172
+ },
173
+ {
174
+ "name": "turkce_atasozleri",
175
+ "task": "multiple_choice",
176
+ "acc": 0.5687861271676301,
177
+ "acc_norm": 0.5687861271676301
178
  }
179
  ]
180
  }
results/zero-shot/aya-23-8b.json CHANGED
@@ -163,6 +163,12 @@
163
  "rouge1": 0.2855728817569547,
164
  "rouge2": 0.14081555638864124,
165
  "rougeL": 0.23467303626936886
 
 
 
 
 
 
166
  }
167
  ]
168
  }
 
163
  "rouge1": 0.2855728817569547,
164
  "rouge2": 0.14081555638864124,
165
  "rougeL": 0.23467303626936886
166
+ },
167
+ {
168
+ "name": "turkce_atasozleri",
169
+ "task": "multiple_choice",
170
+ "acc": 0.44971098265895953,
171
+ "acc_norm": 0.44971098265895953
172
  }
173
  ]
174
  }
results/zero-shot/aya-expanse-32b.json CHANGED
@@ -168,6 +168,12 @@
168
  "name": "gecturk_generation",
169
  "task": "grammatical_error_correction",
170
  "exact_match": 0
 
 
 
 
 
 
171
  }
172
  ]
173
  }
 
168
  "name": "gecturk_generation",
169
  "task": "grammatical_error_correction",
170
  "exact_match": 0
171
+ },
172
+ {
173
+ "name": "turkce_atasozleri",
174
+ "task": "multiple_choice",
175
+ "acc": 0.8236994219653179,
176
+ "acc_norm": 0.8236994219653179
177
  }
178
  ]
179
  }
results/zero-shot/aya-expanse-8b.json CHANGED
@@ -154,6 +154,12 @@
154
  "task": "extractive_question_answering",
155
  "exact_match": 0.2495798319327731,
156
  "f1": 0.4735125568867167
 
 
 
 
 
 
157
  }
158
  ]
159
  }
 
154
  "task": "extractive_question_answering",
155
  "exact_match": 0.2495798319327731,
156
  "f1": 0.4735125568867167
157
+ },
158
+ {
159
+ "name": "turkce_atasozleri",
160
+ "task": "multiple_choice",
161
+ "acc": 0.723121387283237,
162
+ "acc_norm": 0.723121387283237
163
  }
164
  ]
165
  }
results/zero-shot/aya101.json CHANGED
@@ -167,6 +167,12 @@
167
  "task": "multiple_choice",
168
  "acc": 0.2288888888888889,
169
  "acc_norm": 0.2288888888888889
 
 
 
 
 
 
170
  }
171
  ]
172
  }
 
167
  "task": "multiple_choice",
168
  "acc": 0.2288888888888889,
169
  "acc_norm": 0.2288888888888889
170
+ },
171
+ {
172
+ "name": "turkce_atasozleri",
173
+ "task": "multiple_choice",
174
+ "acc": 0.009826589595375723,
175
+ "acc_norm": 0.009826589595375723
176
  }
177
  ]
178
  }
results/zero-shot/commencis-7b.json CHANGED
@@ -167,6 +167,12 @@
167
  "task": "multiple_choice",
168
  "acc": 0.3233333333333333,
169
  "acc_norm": 0.3233333333333333
 
 
 
 
 
 
170
  }
171
  ]
172
  }
 
167
  "task": "multiple_choice",
168
  "acc": 0.3233333333333333,
169
  "acc_norm": 0.3233333333333333
170
+ },
171
+ {
172
+ "name": "turkce_atasozleri",
173
+ "task": "multiple_choice",
174
+ "acc": 0.22658959537572254,
175
+ "acc_norm": 0.22658959537572254
176
  }
177
  ]
178
  }
results/zero-shot/kanarya-2b.json CHANGED
@@ -166,6 +166,12 @@
166
  "task": "extractive_question_answering",
167
  "exact_match": 0.008403361344537815,
168
  "f1": 0.027799180278171867
 
 
 
 
 
 
169
  }
170
  ]
171
  }
 
166
  "task": "extractive_question_answering",
167
  "exact_match": 0.008403361344537815,
168
  "f1": 0.027799180278171867
169
+ },
170
+ {
171
+ "name": "turkce_atasozleri",
172
+ "task": "multiple_choice",
173
+ "acc": 0.0,
174
+ "acc_norm": 0.0
175
  }
176
  ]
177
  }
results/zero-shot/llama-3-8b-instruct.json CHANGED
@@ -162,6 +162,12 @@
162
  "rouge1": 0.29619456321037296,
163
  "rouge2": 0.13520487191226377,
164
  "rougeL": 0.220446635816053
 
 
 
 
 
 
165
  }
166
  ]
167
  }
 
162
  "rouge1": 0.29619456321037296,
163
  "rouge2": 0.13520487191226377,
164
  "rougeL": 0.220446635816053
165
+ },
166
+ {
167
+ "name": "turkce_atasozleri",
168
+ "task": "multiple_choice",
169
+ "acc": 0.6947976878612717,
170
+ "acc_norm": 0.6947976878612717
171
  }
172
  ]
173
  }
results/zero-shot/llama-3-8b.json CHANGED
@@ -161,6 +161,12 @@
161
  "rouge1": 0.2615001361521869,
162
  "rouge2": 0.11093149007661907,
163
  "rougeL": 0.20321693263972507
 
 
 
 
 
 
164
  }
165
  ]
166
  }
 
161
  "rouge1": 0.2615001361521869,
162
  "rouge2": 0.11093149007661907,
163
  "rougeL": 0.20321693263972507
164
+ },
165
+ {
166
+ "name": "turkce_atasozleri",
167
+ "task": "multiple_choice",
168
+ "acc": 0.44046242774566474,
169
+ "acc_norm": 0.44046242774566474
170
  }
171
  ]
172
  }
results/zero-shot/llama-3.1-8b-instruct.json CHANGED
@@ -161,6 +161,12 @@
161
  "rouge1": 0.2805962791068744,
162
  "rouge2": 0.12421139697660691,
163
  "rougeL": 0.21080710839195932
 
 
 
 
 
 
164
  }
165
  ]
166
  }
 
161
  "rouge1": 0.2805962791068744,
162
  "rouge2": 0.12421139697660691,
163
  "rougeL": 0.21080710839195932
164
+ },
165
+ {
166
+ "name": "turkce_atasozleri",
167
+ "task": "multiple_choice",
168
+ "acc": 0.7549132947976879,
169
+ "acc_norm": 0.7549132947976879
170
  }
171
  ]
172
  }
results/zero-shot/llama-3.1-8b.json CHANGED
@@ -161,6 +161,12 @@
161
  "rouge1": 0.2805962791068744,
162
  "rouge2": 0.12421139697660691,
163
  "rougeL": 0.21080710839195932
 
 
 
 
 
 
164
  }
165
  ]
166
  }
 
161
  "rouge1": 0.2805962791068744,
162
  "rouge2": 0.12421139697660691,
163
  "rougeL": 0.21080710839195932
164
+ },
165
+ {
166
+ "name": "turkce_atasozleri",
167
+ "task": "multiple_choice",
168
+ "acc": 0.5410404624277456,
169
+ "acc_norm": 0.5410404624277456
170
  }
171
  ]
172
  }
results/zero-shot/llama-3.2-1b.json CHANGED
@@ -193,6 +193,12 @@
193
  "rouge1": 0.2429304790539497,
194
  "rouge2": 0.09668008744707143,
195
  "rougeL": 0.18327092913535944
 
 
 
 
 
 
196
  }
197
  ]
198
  }
 
193
  "rouge1": 0.2429304790539497,
194
  "rouge2": 0.09668008744707143,
195
  "rougeL": 0.18327092913535944
196
+ },
197
+ {
198
+ "name": "turkce_atasozleri",
199
+ "task": "multiple_choice",
200
+ "acc": 0.21676300578034682,
201
+ "acc_norm": 0.21676300578034682
202
  }
203
  ]
204
  }
results/zero-shot/llama-3.2-3b-instruct.json CHANGED
@@ -186,6 +186,12 @@
186
  "rouge1": 0.2616423061938248,
187
  "rouge2": 0.11064039063859936,
188
  "rougeL": 0.19686955120787036
 
 
 
 
 
 
189
  }
190
  ]
191
  }
 
186
  "rouge1": 0.2616423061938248,
187
  "rouge2": 0.11064039063859936,
188
  "rougeL": 0.19686955120787036
189
+ },
190
+ {
191
+ "name": "turkce_atasozleri",
192
+ "task": "multiple_choice",
193
+ "acc": 0.010982658959537572,
194
+ "acc_norm": 0.010982658959537572
195
  }
196
  ]
197
  }
results/zero-shot/llama-3.2-3b.json CHANGED
@@ -155,6 +155,12 @@
155
  "rouge1": 0.2718989612690207,
156
  "rouge2": 0.12031087767355472,
157
  "rougeL": 0.21504760362022454
 
 
 
 
 
 
158
  }
159
  ]
160
  }
 
155
  "rouge1": 0.2718989612690207,
156
  "rouge2": 0.12031087767355472,
157
  "rougeL": 0.21504760362022454
158
+ },
159
+ {
160
+ "name": "turkce_atasozleri",
161
+ "task": "multiple_choice",
162
+ "acc": 0.1994219653179191,
163
+ "acc_norm": 0.1994219653179191
164
  }
165
  ]
166
  }
results/zero-shot/mistral-7b.json CHANGED
@@ -160,6 +160,12 @@
160
  "rouge1": 0.02720399421152351,
161
  "rouge2": 0.012032606076011431,
162
  "rougeL": 0.02311080687545987
 
 
 
 
 
 
163
  }
164
  ]
165
  }
 
160
  "rouge1": 0.02720399421152351,
161
  "rouge2": 0.012032606076011431,
162
  "rougeL": 0.02311080687545987
163
+ },
164
+ {
165
+ "name": "turkce_atasozleri",
166
+ "task": "multiple_choice",
167
+ "acc": 0.30809248554913293,
168
+ "acc_norm": 0.30809248554913293
169
  }
170
  ]
171
  }
results/zero-shot/trendyol-7b.json CHANGED
@@ -167,6 +167,12 @@
167
  "task": "multiple_choice",
168
  "acc": 0.3622222222222222,
169
  "acc_norm": 0.3622222222222222
 
 
 
 
 
 
170
  }
171
  ]
172
  }
 
167
  "task": "multiple_choice",
168
  "acc": 0.3622222222222222,
169
  "acc_norm": 0.3622222222222222
170
+ },
171
+ {
172
+ "name": "turkce_atasozleri",
173
+ "task": "multiple_choice",
174
+ "acc": 0.0,
175
+ "acc_norm": 0.0
176
  }
177
  ]
178
  }
results/zero-shot/turna.json CHANGED
@@ -167,6 +167,12 @@
167
  "task": "multiple_choice",
168
  "acc": 0.22555555555555556,
169
  "acc_norm": 0.22555555555555556
 
 
 
 
 
 
170
  }
171
  ]
172
  }
 
167
  "task": "multiple_choice",
168
  "acc": 0.22555555555555556,
169
  "acc_norm": 0.22555555555555556
170
+ },
171
+ {
172
+ "name": "turkce_atasozleri",
173
+ "task": "multiple_choice",
174
+ "acc": 0.19248554913294796,
175
+ "acc_norm": 0.19248554913294796
176
  }
177
  ]
178
  }