Spaces:
Running
Running
Fix missing PLU Results
Browse files- results/zero-shot/aya-23-8b.json +30 -24
- results/zero-shot/aya-expanse-8b.json +30 -24
- results/zero-shot/llama-3-8b-instruct.json +30 -24
- results/zero-shot/llama-3-8b.json +30 -24
- results/zero-shot/llama-3.1-8b-instruct.json +30 -24
- results/zero-shot/llama-3.1-8b.json +30 -24
- results/zero-shot/llama-3.2-1b.json +30 -24
- results/zero-shot/llama-3.2-3b-instruct.json +30 -24
- results/zero-shot/llama-3.2-3b.json +30 -24
results/zero-shot/aya-23-8b.json
CHANGED
@@ -83,30 +83,6 @@
|
|
83 |
"exact_match": 0.2062780269058296,
|
84 |
"f1": 0.4653972244152745
|
85 |
},
|
86 |
-
{
|
87 |
-
"name": "turkish_plu_goal_inference",
|
88 |
-
"task": "multiple_choice",
|
89 |
-
"acc": 0.3918757467144564,
|
90 |
-
"acc_norm": 0.3859020310633214
|
91 |
-
},
|
92 |
-
{
|
93 |
-
"name": "turkish_plu_next_event_prediction",
|
94 |
-
"task": "multiple_choice",
|
95 |
-
"acc": 0.4687022900763359,
|
96 |
-
"acc_norm": 0.5374045801526718
|
97 |
-
},
|
98 |
-
{
|
99 |
-
"name": "turkish_plu_step_inference",
|
100 |
-
"task": "multiple_choice",
|
101 |
-
"acc": 0.33986928104575165,
|
102 |
-
"acc_norm": 0.45098039215686275
|
103 |
-
},
|
104 |
-
{
|
105 |
-
"name": "turkish_plu_step_ordering",
|
106 |
-
"task": "multiple_choice",
|
107 |
-
"acc": 0.6180215475024485,
|
108 |
-
"acc_norm": 0.6180215475024485
|
109 |
-
},
|
110 |
{
|
111 |
"name": "xcopa_tr",
|
112 |
"task": "multiple_choice",
|
@@ -187,6 +163,36 @@
|
|
187 |
"task": "multiple_choice",
|
188 |
"acc": 0.5857142857142857,
|
189 |
"acc_norm": 0.5857142857142857
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
}
|
191 |
]
|
192 |
}
|
|
|
83 |
"exact_match": 0.2062780269058296,
|
84 |
"f1": 0.4653972244152745
|
85 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
{
|
87 |
"name": "xcopa_tr",
|
88 |
"task": "multiple_choice",
|
|
|
163 |
"task": "multiple_choice",
|
164 |
"acc": 0.5857142857142857,
|
165 |
"acc_norm": 0.5857142857142857
|
166 |
+
},
|
167 |
+
{
|
168 |
+
"name": "turkish_plu",
|
169 |
+
"task": "multiple_choice",
|
170 |
+
"acc": 0.4928,
|
171 |
+
"acc_norm": 0.40416
|
172 |
+
},
|
173 |
+
{
|
174 |
+
"name": "turkish_plu_goal_inference",
|
175 |
+
"task": "multiple_choice",
|
176 |
+
"acc": 0.42054958183990443,
|
177 |
+
"acc_norm": 0.2724014336917563
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"name": "turkish_plu_next_event_prediction",
|
181 |
+
"task": "multiple_choice",
|
182 |
+
"acc": 0.48854961832061067,
|
183 |
+
"acc_norm": 0.2732824427480916
|
184 |
+
},
|
185 |
+
{
|
186 |
+
"name": "turkish_plu_step_inference",
|
187 |
+
"task": "multiple_choice",
|
188 |
+
"acc": 0.37254901960784315,
|
189 |
+
"acc_norm": 0.35294117647058826
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"name": "turkish_plu_step_ordering",
|
193 |
+
"task": "multiple_choice",
|
194 |
+
"acc": 0.6268364348677767,
|
195 |
+
"acc_norm": 0.6268364348677767
|
196 |
}
|
197 |
]
|
198 |
}
|
results/zero-shot/aya-expanse-8b.json
CHANGED
@@ -93,30 +93,6 @@
|
|
93 |
"exact_match": 0.13452914798206278,
|
94 |
"f1": 0.435087842533856
|
95 |
},
|
96 |
-
{
|
97 |
-
"name": "turkish_plu_goal_inference",
|
98 |
-
"task": "multiple_choice",
|
99 |
-
"acc": 0.4062126642771804,
|
100 |
-
"acc_norm": 0.3930704898446834
|
101 |
-
},
|
102 |
-
{
|
103 |
-
"name": "turkish_plu_next_event_prediction",
|
104 |
-
"task": "multiple_choice",
|
105 |
-
"acc": 0.4900763358778626,
|
106 |
-
"acc_norm": 0.5465648854961832
|
107 |
-
},
|
108 |
-
{
|
109 |
-
"name": "turkish_plu_step_inference",
|
110 |
-
"task": "multiple_choice",
|
111 |
-
"acc": 0.3464052287581699,
|
112 |
-
"acc_norm": 0.4395424836601307
|
113 |
-
},
|
114 |
-
{
|
115 |
-
"name": "turkish_plu_step_ordering",
|
116 |
-
"task": "multiple_choice",
|
117 |
-
"acc": 0.5935357492654261,
|
118 |
-
"acc_norm": 0.5935357492654261
|
119 |
-
},
|
120 |
{
|
121 |
"name": "wiki_lingua_tr",
|
122 |
"task": "summarization",
|
@@ -185,6 +161,36 @@
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.5428571428571428,
|
187 |
"acc_norm": 0.5428571428571428
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
}
|
189 |
]
|
190 |
}
|
|
|
93 |
"exact_match": 0.13452914798206278,
|
94 |
"f1": 0.435087842533856
|
95 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
{
|
97 |
"name": "wiki_lingua_tr",
|
98 |
"task": "summarization",
|
|
|
161 |
"task": "multiple_choice",
|
162 |
"acc": 0.5428571428571428,
|
163 |
"acc_norm": 0.5428571428571428
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"name": "turkish_plu",
|
167 |
+
"task": "multiple_choice",
|
168 |
+
"acc": 0.50208,
|
169 |
+
"acc_norm": 0.40704
|
170 |
+
},
|
171 |
+
{
|
172 |
+
"name": "turkish_plu_goal_inference",
|
173 |
+
"task": "multiple_choice",
|
174 |
+
"acc": 0.43010752688172044,
|
175 |
+
"acc_norm": 0.27956989247311825
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"name": "turkish_plu_next_event_prediction",
|
179 |
+
"task": "multiple_choice",
|
180 |
+
"acc": 0.5114503816793893,
|
181 |
+
"acc_norm": 0.27938931297709924
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"name": "turkish_plu_step_inference",
|
185 |
+
"task": "multiple_choice",
|
186 |
+
"acc": 0.4035947712418301,
|
187 |
+
"acc_norm": 0.37254901960784315
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"name": "turkish_plu_step_ordering",
|
191 |
+
"task": "multiple_choice",
|
192 |
+
"acc": 0.614103819784525,
|
193 |
+
"acc_norm": 0.614103819784525
|
194 |
}
|
195 |
]
|
196 |
}
|
results/zero-shot/llama-3-8b-instruct.json
CHANGED
@@ -82,30 +82,6 @@
|
|
82 |
"exact_match": 0.1289237668161435,
|
83 |
"f1": 0.4134057883004977
|
84 |
},
|
85 |
-
{
|
86 |
-
"name": "turkish_plu_goal_inference",
|
87 |
-
"task": "multiple_choice",
|
88 |
-
"acc": 0.38829151732377537,
|
89 |
-
"acc_norm": 0.43130227001194743
|
90 |
-
},
|
91 |
-
{
|
92 |
-
"name": "turkish_plu_next_event_prediction",
|
93 |
-
"task": "multiple_choice",
|
94 |
-
"acc": 0.4549618320610687,
|
95 |
-
"acc_norm": 0.517557251908397
|
96 |
-
},
|
97 |
-
{
|
98 |
-
"name": "turkish_plu_step_inference",
|
99 |
-
"task": "multiple_choice",
|
100 |
-
"acc": 0.3137254901960784,
|
101 |
-
"acc_norm": 0.44281045751633985
|
102 |
-
},
|
103 |
-
{
|
104 |
-
"name": "turkish_plu_step_ordering",
|
105 |
-
"task": "multiple_choice",
|
106 |
-
"acc": 0.6160626836434868,
|
107 |
-
"acc_norm": 0.6160626836434868
|
108 |
-
},
|
109 |
{
|
110 |
"name": "xcopa_tr",
|
111 |
"task": "multiple_choice",
|
@@ -186,6 +162,36 @@
|
|
186 |
"task": "multiple_choice",
|
187 |
"acc": 0.6142857142857143,
|
188 |
"acc_norm": 0.6142857142857143
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
}
|
190 |
]
|
191 |
}
|
|
|
82 |
"exact_match": 0.1289237668161435,
|
83 |
"f1": 0.4134057883004977
|
84 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
{
|
86 |
"name": "xcopa_tr",
|
87 |
"task": "multiple_choice",
|
|
|
162 |
"task": "multiple_choice",
|
163 |
"acc": 0.6142857142857143,
|
164 |
"acc_norm": 0.6142857142857143
|
165 |
+
},
|
166 |
+
{
|
167 |
+
"name": "turkish_plu",
|
168 |
+
"task": "multiple_choice",
|
169 |
+
"acc": 0.47136,
|
170 |
+
"acc_norm": 0.4032
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"name": "turkish_plu_goal_inference",
|
174 |
+
"task": "multiple_choice",
|
175 |
+
"acc": 0.3763440860215054,
|
176 |
+
"acc_norm": 0.26642771804062126
|
177 |
+
},
|
178 |
+
{
|
179 |
+
"name": "turkish_plu_next_event_prediction",
|
180 |
+
"task": "multiple_choice",
|
181 |
+
"acc": 0.46564885496183206,
|
182 |
+
"acc_norm": 0.2595419847328244
|
183 |
+
},
|
184 |
+
{
|
185 |
+
"name": "turkish_plu_step_inference",
|
186 |
+
"task": "multiple_choice",
|
187 |
+
"acc": 0.3349673202614379,
|
188 |
+
"acc_norm": 0.35784313725490197
|
189 |
+
},
|
190 |
+
{
|
191 |
+
"name": "turkish_plu_step_ordering",
|
192 |
+
"task": "multiple_choice",
|
193 |
+
"acc": 0.6346718903036239,
|
194 |
+
"acc_norm": 0.6346718903036239
|
195 |
}
|
196 |
]
|
197 |
}
|
results/zero-shot/llama-3-8b.json
CHANGED
@@ -81,30 +81,6 @@
|
|
81 |
"exact_match": 0.28475336322869954,
|
82 |
"f1": 0.5013148868557868
|
83 |
},
|
84 |
-
{
|
85 |
-
"name": "turkish_plu_goal_inference",
|
86 |
-
"task": "multiple_choice",
|
87 |
-
"acc": 0.38948626045400236,
|
88 |
-
"acc_norm": 0.4169653524492234
|
89 |
-
},
|
90 |
-
{
|
91 |
-
"name": "turkish_plu_next_event_prediction",
|
92 |
-
"task": "multiple_choice",
|
93 |
-
"acc": 0.4488549618320611,
|
94 |
-
"acc_norm": 0.5328244274809161
|
95 |
-
},
|
96 |
-
{
|
97 |
-
"name": "turkish_plu_step_inference",
|
98 |
-
"task": "multiple_choice",
|
99 |
-
"acc": 0.32189542483660133,
|
100 |
-
"acc_norm": 0.47058823529411764
|
101 |
-
},
|
102 |
-
{
|
103 |
-
"name": "turkish_plu_step_ordering",
|
104 |
-
"task": "multiple_choice",
|
105 |
-
"acc": 0.6278158667972575,
|
106 |
-
"acc_norm": 0.6278158667972575
|
107 |
-
},
|
108 |
{
|
109 |
"name": "xcopa_tr",
|
110 |
"task": "multiple_choice",
|
@@ -185,6 +161,36 @@
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.5428571428571428,
|
187 |
"acc_norm": 0.5428571428571428
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
}
|
189 |
]
|
190 |
}
|
|
|
81 |
"exact_match": 0.28475336322869954,
|
82 |
"f1": 0.5013148868557868
|
83 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
{
|
85 |
"name": "xcopa_tr",
|
86 |
"task": "multiple_choice",
|
|
|
161 |
"task": "multiple_choice",
|
162 |
"acc": 0.5428571428571428,
|
163 |
"acc_norm": 0.5428571428571428
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"name": "turkish_plu",
|
167 |
+
"task": "multiple_choice",
|
168 |
+
"acc": 0.46496,
|
169 |
+
"acc_norm": 0.39616
|
170 |
+
},
|
171 |
+
{
|
172 |
+
"name": "turkish_plu_goal_inference",
|
173 |
+
"task": "multiple_choice",
|
174 |
+
"acc": 0.36917562724014336,
|
175 |
+
"acc_norm": 0.25925925925925924
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"name": "turkish_plu_next_event_prediction",
|
179 |
+
"task": "multiple_choice",
|
180 |
+
"acc": 0.46106870229007635,
|
181 |
+
"acc_norm": 0.26259541984732826
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"name": "turkish_plu_step_inference",
|
185 |
+
"task": "multiple_choice",
|
186 |
+
"acc": 0.3284313725490196,
|
187 |
+
"acc_norm": 0.33986928104575165
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"name": "turkish_plu_step_ordering",
|
191 |
+
"task": "multiple_choice",
|
192 |
+
"acc": 0.6278158667972575,
|
193 |
+
"acc_norm": 0.6278158667972575
|
194 |
}
|
195 |
]
|
196 |
}
|
results/zero-shot/llama-3.1-8b-instruct.json
CHANGED
@@ -81,30 +81,6 @@
|
|
81 |
"exact_match": 0.23318385650224216,
|
82 |
"f1": 0.5062272078338648
|
83 |
},
|
84 |
-
{
|
85 |
-
"name": "turkish_plu_goal_inference",
|
86 |
-
"task": "multiple_choice",
|
87 |
-
"acc": 0.40860215053763443,
|
88 |
-
"acc_norm": 0.45997610513739545
|
89 |
-
},
|
90 |
-
{
|
91 |
-
"name": "turkish_plu_next_event_prediction",
|
92 |
-
"task": "multiple_choice",
|
93 |
-
"acc": 0.4442748091603053,
|
94 |
-
"acc_norm": 0.5419847328244275
|
95 |
-
},
|
96 |
-
{
|
97 |
-
"name": "turkish_plu_step_inference",
|
98 |
-
"task": "multiple_choice",
|
99 |
-
"acc": 0.33169934640522875,
|
100 |
-
"acc_norm": 0.4624183006535948
|
101 |
-
},
|
102 |
-
{
|
103 |
-
"name": "turkish_plu_step_ordering",
|
104 |
-
"task": "multiple_choice",
|
105 |
-
"acc": 0.633692458374143,
|
106 |
-
"acc_norm": 0.633692458374143
|
107 |
-
},
|
108 |
{
|
109 |
"name": "xcopa_tr",
|
110 |
"task": "multiple_choice",
|
@@ -185,6 +161,36 @@
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.6428571428571429,
|
187 |
"acc_norm": 0.6428571428571429
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
}
|
189 |
]
|
190 |
}
|
|
|
81 |
"exact_match": 0.23318385650224216,
|
82 |
"f1": 0.5062272078338648
|
83 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
{
|
85 |
"name": "xcopa_tr",
|
86 |
"task": "multiple_choice",
|
|
|
161 |
"task": "multiple_choice",
|
162 |
"acc": 0.6428571428571429,
|
163 |
"acc_norm": 0.6428571428571429
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"name": "turkish_plu",
|
167 |
+
"task": "multiple_choice",
|
168 |
+
"acc": 0.4848,
|
169 |
+
"acc_norm": 0.40896
|
170 |
+
},
|
171 |
+
{
|
172 |
+
"name": "turkish_plu_goal_inference",
|
173 |
+
"task": "multiple_choice",
|
174 |
+
"acc": 0.40860215053763443,
|
175 |
+
"acc_norm": 0.27718040621266427
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"name": "turkish_plu_next_event_prediction",
|
179 |
+
"task": "multiple_choice",
|
180 |
+
"acc": 0.44580152671755724,
|
181 |
+
"acc_norm": 0.2549618320610687
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"name": "turkish_plu_step_inference",
|
185 |
+
"task": "multiple_choice",
|
186 |
+
"acc": 0.3431372549019608,
|
187 |
+
"acc_norm": 0.33986928104575165
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"name": "turkish_plu_step_ordering",
|
191 |
+
"task": "multiple_choice",
|
192 |
+
"acc": 0.6571988246816847,
|
193 |
+
"acc_norm": 0.6571988246816847
|
194 |
}
|
195 |
]
|
196 |
}
|
results/zero-shot/llama-3.1-8b.json
CHANGED
@@ -81,30 +81,6 @@
|
|
81 |
"exact_match": 0.2757847533632287,
|
82 |
"f1": 0.5178366277473359
|
83 |
},
|
84 |
-
{
|
85 |
-
"name": "turkish_plu_goal_inference",
|
86 |
-
"task": "multiple_choice",
|
87 |
-
"acc": 0.4145758661887694,
|
88 |
-
"acc_norm": 0.4324970131421744
|
89 |
-
},
|
90 |
-
{
|
91 |
-
"name": "turkish_plu_next_event_prediction",
|
92 |
-
"task": "multiple_choice",
|
93 |
-
"acc": 0.4488549618320611,
|
94 |
-
"acc_norm": 0.5358778625954198
|
95 |
-
},
|
96 |
-
{
|
97 |
-
"name": "turkish_plu_step_inference",
|
98 |
-
"task": "multiple_choice",
|
99 |
-
"acc": 0.3382352941176471,
|
100 |
-
"acc_norm": 0.4738562091503268
|
101 |
-
},
|
102 |
-
{
|
103 |
-
"name": "turkish_plu_step_ordering",
|
104 |
-
"task": "multiple_choice",
|
105 |
-
"acc": 0.6425073457394711,
|
106 |
-
"acc_norm": 0.6425073457394711
|
107 |
-
},
|
108 |
{
|
109 |
"name": "xcopa_tr",
|
110 |
"task": "multiple_choice",
|
@@ -185,6 +161,36 @@
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.5857142857142857,
|
187 |
"acc_norm": 0.5857142857142857
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
}
|
189 |
]
|
190 |
}
|
|
|
81 |
"exact_match": 0.2757847533632287,
|
82 |
"f1": 0.5178366277473359
|
83 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
{
|
85 |
"name": "xcopa_tr",
|
86 |
"task": "multiple_choice",
|
|
|
161 |
"task": "multiple_choice",
|
162 |
"acc": 0.5857142857142857,
|
163 |
"acc_norm": 0.5857142857142857
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"name": "turkish_plu",
|
167 |
+
"task": "multiple_choice",
|
168 |
+
"acc": 0.47552,
|
169 |
+
"acc_norm": 0.39872
|
170 |
+
},
|
171 |
+
{
|
172 |
+
"name": "turkish_plu_goal_inference",
|
173 |
+
"task": "multiple_choice",
|
174 |
+
"acc": 0.38829151732377537,
|
175 |
+
"acc_norm": 0.2628434886499403
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"name": "turkish_plu_next_event_prediction",
|
179 |
+
"task": "multiple_choice",
|
180 |
+
"acc": 0.4595419847328244,
|
181 |
+
"acc_norm": 0.26106870229007634
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"name": "turkish_plu_step_inference",
|
185 |
+
"task": "multiple_choice",
|
186 |
+
"acc": 0.35130718954248363,
|
187 |
+
"acc_norm": 0.3431372549019608
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"name": "turkish_plu_step_ordering",
|
191 |
+
"task": "multiple_choice",
|
192 |
+
"acc": 0.6317335945151812,
|
193 |
+
"acc_norm": 0.6317335945151812
|
194 |
}
|
195 |
]
|
196 |
}
|
results/zero-shot/llama-3.2-1b.json
CHANGED
@@ -93,30 +93,6 @@
|
|
93 |
"exact_match": 0.06278026905829596,
|
94 |
"f1": 0.21486130318406463
|
95 |
},
|
96 |
-
{
|
97 |
-
"name": "turkish_plu_goal_inference",
|
98 |
-
"task": "multiple_choice",
|
99 |
-
"acc": 0.35842293906810035,
|
100 |
-
"acc_norm": 0.4026284348864994
|
101 |
-
},
|
102 |
-
{
|
103 |
-
"name": "turkish_plu_next_event_prediction",
|
104 |
-
"task": "multiple_choice",
|
105 |
-
"acc": 0.3709923664122137,
|
106 |
-
"acc_norm": 0.467175572519084
|
107 |
-
},
|
108 |
-
{
|
109 |
-
"name": "turkish_plu_step_inference",
|
110 |
-
"task": "multiple_choice",
|
111 |
-
"acc": 0.27941176470588236,
|
112 |
-
"acc_norm": 0.41830065359477125
|
113 |
-
},
|
114 |
-
{
|
115 |
-
"name": "turkish_plu_step_ordering",
|
116 |
-
"task": "multiple_choice",
|
117 |
-
"acc": 0.5759059745347699,
|
118 |
-
"acc_norm": 0.5759059745347699
|
119 |
-
},
|
120 |
{
|
121 |
"name": "wiki_lingua_tr",
|
122 |
"task": "summarization",
|
@@ -217,6 +193,36 @@
|
|
217 |
"task": "multiple_choice",
|
218 |
"acc": 0.5285714285714286,
|
219 |
"acc_norm": 0.5285714285714286
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
}
|
221 |
]
|
222 |
}
|
|
|
93 |
"exact_match": 0.06278026905829596,
|
94 |
"f1": 0.21486130318406463
|
95 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
{
|
97 |
"name": "wiki_lingua_tr",
|
98 |
"task": "summarization",
|
|
|
193 |
"task": "multiple_choice",
|
194 |
"acc": 0.5285714285714286,
|
195 |
"acc_norm": 0.5285714285714286
|
196 |
+
},
|
197 |
+
{
|
198 |
+
"name": "turkish_plu",
|
199 |
+
"task": "multiple_choice",
|
200 |
+
"acc": 0.4208,
|
201 |
+
"acc_norm": 0.368
|
202 |
+
},
|
203 |
+
{
|
204 |
+
"name": "turkish_plu_goal_inference",
|
205 |
+
"task": "multiple_choice",
|
206 |
+
"acc": 0.36200716845878134,
|
207 |
+
"acc_norm": 0.23894862604540024
|
208 |
+
},
|
209 |
+
{
|
210 |
+
"name": "turkish_plu_next_event_prediction",
|
211 |
+
"task": "multiple_choice",
|
212 |
+
"acc": 0.37251908396946565,
|
213 |
+
"acc_norm": 0.2366412213740458
|
214 |
+
},
|
215 |
+
{
|
216 |
+
"name": "turkish_plu_step_inference",
|
217 |
+
"task": "multiple_choice",
|
218 |
+
"acc": 0.29248366013071897,
|
219 |
+
"acc_norm": 0.3366013071895425
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"name": "turkish_plu_step_ordering",
|
223 |
+
"task": "multiple_choice",
|
224 |
+
"acc": 0.5768854064642507,
|
225 |
+
"acc_norm": 0.5768854064642507
|
226 |
}
|
227 |
]
|
228 |
}
|
results/zero-shot/llama-3.2-3b-instruct.json
CHANGED
@@ -93,30 +93,6 @@
|
|
93 |
"exact_match": 0.18721973094170405,
|
94 |
"f1": 0.5109898180473623
|
95 |
},
|
96 |
-
{
|
97 |
-
"name": "turkish_plu_goal_inference",
|
98 |
-
"task": "multiple_choice",
|
99 |
-
"acc": 0.3321385902031063,
|
100 |
-
"acc_norm": 0.3548387096774194
|
101 |
-
},
|
102 |
-
{
|
103 |
-
"name": "turkish_plu_next_event_prediction",
|
104 |
-
"task": "multiple_choice",
|
105 |
-
"acc": 0.3648854961832061,
|
106 |
-
"acc_norm": 0.4488549618320611
|
107 |
-
},
|
108 |
-
{
|
109 |
-
"name": "turkish_plu_step_inference",
|
110 |
-
"task": "multiple_choice",
|
111 |
-
"acc": 0.24183006535947713,
|
112 |
-
"acc_norm": 0.3758169934640523
|
113 |
-
},
|
114 |
-
{
|
115 |
-
"name": "turkish_plu_step_ordering",
|
116 |
-
"task": "multiple_choice",
|
117 |
-
"acc": 0.5710088148873653,
|
118 |
-
"acc_norm": 0.5710088148873653
|
119 |
-
},
|
120 |
{
|
121 |
"name": "wiki_lingua_tr",
|
122 |
"task": "summarization",
|
@@ -217,6 +193,36 @@
|
|
217 |
"task": "multiple_choice",
|
218 |
"acc": 0.5428571428571428,
|
219 |
"acc_norm": 0.5428571428571428
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
}
|
221 |
]
|
222 |
}
|
|
|
93 |
"exact_match": 0.18721973094170405,
|
94 |
"f1": 0.5109898180473623
|
95 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
{
|
97 |
"name": "wiki_lingua_tr",
|
98 |
"task": "summarization",
|
|
|
193 |
"task": "multiple_choice",
|
194 |
"acc": 0.5428571428571428,
|
195 |
"acc_norm": 0.5428571428571428
|
196 |
+
},
|
197 |
+
{
|
198 |
+
"name": "turkish_plu",
|
199 |
+
"task": "multiple_choice",
|
200 |
+
"acc": 0.44,
|
201 |
+
"acc_norm": 0.3952
|
202 |
+
},
|
203 |
+
{
|
204 |
+
"name": "turkish_plu_goal_inference",
|
205 |
+
"task": "multiple_choice",
|
206 |
+
"acc": 0.3548387096774194,
|
207 |
+
"acc_norm": 0.26045400238948624
|
208 |
+
},
|
209 |
+
{
|
210 |
+
"name": "turkish_plu_next_event_prediction",
|
211 |
+
"task": "multiple_choice",
|
212 |
+
"acc": 0.3938931297709924,
|
213 |
+
"acc_norm": 0.24427480916030533
|
214 |
+
},
|
215 |
+
{
|
216 |
+
"name": "turkish_plu_step_inference",
|
217 |
+
"task": "multiple_choice",
|
218 |
+
"acc": 0.2777777777777778,
|
219 |
+
"acc_norm": 0.3382352941176471
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"name": "turkish_plu_step_ordering",
|
223 |
+
"task": "multiple_choice",
|
224 |
+
"acc": 0.6366307541625857,
|
225 |
+
"acc_norm": 0.6366307541625857
|
226 |
}
|
227 |
]
|
228 |
}
|
results/zero-shot/llama-3.2-3b.json
CHANGED
@@ -81,30 +81,6 @@
|
|
81 |
"exact_match": 0.21188340807174888,
|
82 |
"f1": 0.4583574684635767
|
83 |
},
|
84 |
-
{
|
85 |
-
"name": "turkish_plu_goal_inference",
|
86 |
-
"task": "multiple_choice",
|
87 |
-
"acc": 0.3906810035842294,
|
88 |
-
"acc_norm": 0.3906810035842294
|
89 |
-
},
|
90 |
-
{
|
91 |
-
"name": "turkish_plu_next_event_prediction",
|
92 |
-
"task": "multiple_choice",
|
93 |
-
"acc": 0.4122137404580153,
|
94 |
-
"acc_norm": 0.5389312977099237
|
95 |
-
},
|
96 |
-
{
|
97 |
-
"name": "turkish_plu_step_inference",
|
98 |
-
"task": "multiple_choice",
|
99 |
-
"acc": 0.30718954248366015,
|
100 |
-
"acc_norm": 0.4493464052287582
|
101 |
-
},
|
102 |
-
{
|
103 |
-
"name": "turkish_plu_step_ordering",
|
104 |
-
"task": "multiple_choice",
|
105 |
-
"acc": 0.5974534769833496,
|
106 |
-
"acc_norm": 0.5974534769833496
|
107 |
-
},
|
108 |
{
|
109 |
"name": "xcopa_tr",
|
110 |
"task": "multiple_choice",
|
@@ -185,6 +161,36 @@
|
|
185 |
"task": "multiple_choice",
|
186 |
"acc": 0.5714285714285714,
|
187 |
"acc_norm": 0.5714285714285714
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
}
|
189 |
]
|
190 |
}
|
|
|
81 |
"exact_match": 0.21188340807174888,
|
82 |
"f1": 0.4583574684635767
|
83 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
{
|
85 |
"name": "xcopa_tr",
|
86 |
"task": "multiple_choice",
|
|
|
161 |
"task": "multiple_choice",
|
162 |
"acc": 0.5714285714285714,
|
163 |
"acc_norm": 0.5714285714285714
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"name": "turkish_plu",
|
167 |
+
"task": "multiple_choice",
|
168 |
+
"acc": 0.45408,
|
169 |
+
"acc_norm": 0.38752
|
170 |
+
},
|
171 |
+
{
|
172 |
+
"name": "turkish_plu_goal_inference",
|
173 |
+
"task": "multiple_choice",
|
174 |
+
"acc": 0.4002389486260454,
|
175 |
+
"acc_norm": 0.25925925925925924
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"name": "turkish_plu_next_event_prediction",
|
179 |
+
"task": "multiple_choice",
|
180 |
+
"acc": 0.43206106870229005,
|
181 |
+
"acc_norm": 0.26106870229007634
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"name": "turkish_plu_step_inference",
|
185 |
+
"task": "multiple_choice",
|
186 |
+
"acc": 0.315359477124183,
|
187 |
+
"acc_norm": 0.35130718954248363
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"name": "turkish_plu_step_ordering",
|
191 |
+
"task": "multiple_choice",
|
192 |
+
"acc": 0.5954946131243879,
|
193 |
+
"acc_norm": 0.5954946131243879
|
194 |
}
|
195 |
]
|
196 |
}
|