abrek commited on
Commit
9dbe8e9
·
verified ·
1 Parent(s): 92895b2

Fix missing PLU Results

Browse files
results/zero-shot/aya-23-8b.json CHANGED
@@ -83,30 +83,6 @@
83
  "exact_match": 0.2062780269058296,
84
  "f1": 0.4653972244152745
85
  },
86
- {
87
- "name": "turkish_plu_goal_inference",
88
- "task": "multiple_choice",
89
- "acc": 0.3918757467144564,
90
- "acc_norm": 0.3859020310633214
91
- },
92
- {
93
- "name": "turkish_plu_next_event_prediction",
94
- "task": "multiple_choice",
95
- "acc": 0.4687022900763359,
96
- "acc_norm": 0.5374045801526718
97
- },
98
- {
99
- "name": "turkish_plu_step_inference",
100
- "task": "multiple_choice",
101
- "acc": 0.33986928104575165,
102
- "acc_norm": 0.45098039215686275
103
- },
104
- {
105
- "name": "turkish_plu_step_ordering",
106
- "task": "multiple_choice",
107
- "acc": 0.6180215475024485,
108
- "acc_norm": 0.6180215475024485
109
- },
110
  {
111
  "name": "xcopa_tr",
112
  "task": "multiple_choice",
@@ -187,6 +163,36 @@
187
  "task": "multiple_choice",
188
  "acc": 0.5857142857142857,
189
  "acc_norm": 0.5857142857142857
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  }
191
  ]
192
  }
 
83
  "exact_match": 0.2062780269058296,
84
  "f1": 0.4653972244152745
85
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  {
87
  "name": "xcopa_tr",
88
  "task": "multiple_choice",
 
163
  "task": "multiple_choice",
164
  "acc": 0.5857142857142857,
165
  "acc_norm": 0.5857142857142857
166
+ },
167
+ {
168
+ "name": "turkish_plu",
169
+ "task": "multiple_choice",
170
+ "acc": 0.4928,
171
+ "acc_norm": 0.40416
172
+ },
173
+ {
174
+ "name": "turkish_plu_goal_inference",
175
+ "task": "multiple_choice",
176
+ "acc": 0.42054958183990443,
177
+ "acc_norm": 0.2724014336917563
178
+ },
179
+ {
180
+ "name": "turkish_plu_next_event_prediction",
181
+ "task": "multiple_choice",
182
+ "acc": 0.48854961832061067,
183
+ "acc_norm": 0.2732824427480916
184
+ },
185
+ {
186
+ "name": "turkish_plu_step_inference",
187
+ "task": "multiple_choice",
188
+ "acc": 0.37254901960784315,
189
+ "acc_norm": 0.35294117647058826
190
+ },
191
+ {
192
+ "name": "turkish_plu_step_ordering",
193
+ "task": "multiple_choice",
194
+ "acc": 0.6268364348677767,
195
+ "acc_norm": 0.6268364348677767
196
  }
197
  ]
198
  }
results/zero-shot/aya-expanse-8b.json CHANGED
@@ -93,30 +93,6 @@
93
  "exact_match": 0.13452914798206278,
94
  "f1": 0.435087842533856
95
  },
96
- {
97
- "name": "turkish_plu_goal_inference",
98
- "task": "multiple_choice",
99
- "acc": 0.4062126642771804,
100
- "acc_norm": 0.3930704898446834
101
- },
102
- {
103
- "name": "turkish_plu_next_event_prediction",
104
- "task": "multiple_choice",
105
- "acc": 0.4900763358778626,
106
- "acc_norm": 0.5465648854961832
107
- },
108
- {
109
- "name": "turkish_plu_step_inference",
110
- "task": "multiple_choice",
111
- "acc": 0.3464052287581699,
112
- "acc_norm": 0.4395424836601307
113
- },
114
- {
115
- "name": "turkish_plu_step_ordering",
116
- "task": "multiple_choice",
117
- "acc": 0.5935357492654261,
118
- "acc_norm": 0.5935357492654261
119
- },
120
  {
121
  "name": "wiki_lingua_tr",
122
  "task": "summarization",
@@ -185,6 +161,36 @@
185
  "task": "multiple_choice",
186
  "acc": 0.5428571428571428,
187
  "acc_norm": 0.5428571428571428
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  }
189
  ]
190
  }
 
93
  "exact_match": 0.13452914798206278,
94
  "f1": 0.435087842533856
95
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  {
97
  "name": "wiki_lingua_tr",
98
  "task": "summarization",
 
161
  "task": "multiple_choice",
162
  "acc": 0.5428571428571428,
163
  "acc_norm": 0.5428571428571428
164
+ },
165
+ {
166
+ "name": "turkish_plu",
167
+ "task": "multiple_choice",
168
+ "acc": 0.50208,
169
+ "acc_norm": 0.40704
170
+ },
171
+ {
172
+ "name": "turkish_plu_goal_inference",
173
+ "task": "multiple_choice",
174
+ "acc": 0.43010752688172044,
175
+ "acc_norm": 0.27956989247311825
176
+ },
177
+ {
178
+ "name": "turkish_plu_next_event_prediction",
179
+ "task": "multiple_choice",
180
+ "acc": 0.5114503816793893,
181
+ "acc_norm": 0.27938931297709924
182
+ },
183
+ {
184
+ "name": "turkish_plu_step_inference",
185
+ "task": "multiple_choice",
186
+ "acc": 0.4035947712418301,
187
+ "acc_norm": 0.37254901960784315
188
+ },
189
+ {
190
+ "name": "turkish_plu_step_ordering",
191
+ "task": "multiple_choice",
192
+ "acc": 0.614103819784525,
193
+ "acc_norm": 0.614103819784525
194
  }
195
  ]
196
  }
results/zero-shot/llama-3-8b-instruct.json CHANGED
@@ -82,30 +82,6 @@
82
  "exact_match": 0.1289237668161435,
83
  "f1": 0.4134057883004977
84
  },
85
- {
86
- "name": "turkish_plu_goal_inference",
87
- "task": "multiple_choice",
88
- "acc": 0.38829151732377537,
89
- "acc_norm": 0.43130227001194743
90
- },
91
- {
92
- "name": "turkish_plu_next_event_prediction",
93
- "task": "multiple_choice",
94
- "acc": 0.4549618320610687,
95
- "acc_norm": 0.517557251908397
96
- },
97
- {
98
- "name": "turkish_plu_step_inference",
99
- "task": "multiple_choice",
100
- "acc": 0.3137254901960784,
101
- "acc_norm": 0.44281045751633985
102
- },
103
- {
104
- "name": "turkish_plu_step_ordering",
105
- "task": "multiple_choice",
106
- "acc": 0.6160626836434868,
107
- "acc_norm": 0.6160626836434868
108
- },
109
  {
110
  "name": "xcopa_tr",
111
  "task": "multiple_choice",
@@ -186,6 +162,36 @@
186
  "task": "multiple_choice",
187
  "acc": 0.6142857142857143,
188
  "acc_norm": 0.6142857142857143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  }
190
  ]
191
  }
 
82
  "exact_match": 0.1289237668161435,
83
  "f1": 0.4134057883004977
84
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  {
86
  "name": "xcopa_tr",
87
  "task": "multiple_choice",
 
162
  "task": "multiple_choice",
163
  "acc": 0.6142857142857143,
164
  "acc_norm": 0.6142857142857143
165
+ },
166
+ {
167
+ "name": "turkish_plu",
168
+ "task": "multiple_choice",
169
+ "acc": 0.47136,
170
+ "acc_norm": 0.4032
171
+ },
172
+ {
173
+ "name": "turkish_plu_goal_inference",
174
+ "task": "multiple_choice",
175
+ "acc": 0.3763440860215054,
176
+ "acc_norm": 0.26642771804062126
177
+ },
178
+ {
179
+ "name": "turkish_plu_next_event_prediction",
180
+ "task": "multiple_choice",
181
+ "acc": 0.46564885496183206,
182
+ "acc_norm": 0.2595419847328244
183
+ },
184
+ {
185
+ "name": "turkish_plu_step_inference",
186
+ "task": "multiple_choice",
187
+ "acc": 0.3349673202614379,
188
+ "acc_norm": 0.35784313725490197
189
+ },
190
+ {
191
+ "name": "turkish_plu_step_ordering",
192
+ "task": "multiple_choice",
193
+ "acc": 0.6346718903036239,
194
+ "acc_norm": 0.6346718903036239
195
  }
196
  ]
197
  }
results/zero-shot/llama-3-8b.json CHANGED
@@ -81,30 +81,6 @@
81
  "exact_match": 0.28475336322869954,
82
  "f1": 0.5013148868557868
83
  },
84
- {
85
- "name": "turkish_plu_goal_inference",
86
- "task": "multiple_choice",
87
- "acc": 0.38948626045400236,
88
- "acc_norm": 0.4169653524492234
89
- },
90
- {
91
- "name": "turkish_plu_next_event_prediction",
92
- "task": "multiple_choice",
93
- "acc": 0.4488549618320611,
94
- "acc_norm": 0.5328244274809161
95
- },
96
- {
97
- "name": "turkish_plu_step_inference",
98
- "task": "multiple_choice",
99
- "acc": 0.32189542483660133,
100
- "acc_norm": 0.47058823529411764
101
- },
102
- {
103
- "name": "turkish_plu_step_ordering",
104
- "task": "multiple_choice",
105
- "acc": 0.6278158667972575,
106
- "acc_norm": 0.6278158667972575
107
- },
108
  {
109
  "name": "xcopa_tr",
110
  "task": "multiple_choice",
@@ -185,6 +161,36 @@
185
  "task": "multiple_choice",
186
  "acc": 0.5428571428571428,
187
  "acc_norm": 0.5428571428571428
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  }
189
  ]
190
  }
 
81
  "exact_match": 0.28475336322869954,
82
  "f1": 0.5013148868557868
83
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  {
85
  "name": "xcopa_tr",
86
  "task": "multiple_choice",
 
161
  "task": "multiple_choice",
162
  "acc": 0.5428571428571428,
163
  "acc_norm": 0.5428571428571428
164
+ },
165
+ {
166
+ "name": "turkish_plu",
167
+ "task": "multiple_choice",
168
+ "acc": 0.46496,
169
+ "acc_norm": 0.39616
170
+ },
171
+ {
172
+ "name": "turkish_plu_goal_inference",
173
+ "task": "multiple_choice",
174
+ "acc": 0.36917562724014336,
175
+ "acc_norm": 0.25925925925925924
176
+ },
177
+ {
178
+ "name": "turkish_plu_next_event_prediction",
179
+ "task": "multiple_choice",
180
+ "acc": 0.46106870229007635,
181
+ "acc_norm": 0.26259541984732826
182
+ },
183
+ {
184
+ "name": "turkish_plu_step_inference",
185
+ "task": "multiple_choice",
186
+ "acc": 0.3284313725490196,
187
+ "acc_norm": 0.33986928104575165
188
+ },
189
+ {
190
+ "name": "turkish_plu_step_ordering",
191
+ "task": "multiple_choice",
192
+ "acc": 0.6278158667972575,
193
+ "acc_norm": 0.6278158667972575
194
  }
195
  ]
196
  }
results/zero-shot/llama-3.1-8b-instruct.json CHANGED
@@ -81,30 +81,6 @@
81
  "exact_match": 0.23318385650224216,
82
  "f1": 0.5062272078338648
83
  },
84
- {
85
- "name": "turkish_plu_goal_inference",
86
- "task": "multiple_choice",
87
- "acc": 0.40860215053763443,
88
- "acc_norm": 0.45997610513739545
89
- },
90
- {
91
- "name": "turkish_plu_next_event_prediction",
92
- "task": "multiple_choice",
93
- "acc": 0.4442748091603053,
94
- "acc_norm": 0.5419847328244275
95
- },
96
- {
97
- "name": "turkish_plu_step_inference",
98
- "task": "multiple_choice",
99
- "acc": 0.33169934640522875,
100
- "acc_norm": 0.4624183006535948
101
- },
102
- {
103
- "name": "turkish_plu_step_ordering",
104
- "task": "multiple_choice",
105
- "acc": 0.633692458374143,
106
- "acc_norm": 0.633692458374143
107
- },
108
  {
109
  "name": "xcopa_tr",
110
  "task": "multiple_choice",
@@ -185,6 +161,36 @@
185
  "task": "multiple_choice",
186
  "acc": 0.6428571428571429,
187
  "acc_norm": 0.6428571428571429
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  }
189
  ]
190
  }
 
81
  "exact_match": 0.23318385650224216,
82
  "f1": 0.5062272078338648
83
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  {
85
  "name": "xcopa_tr",
86
  "task": "multiple_choice",
 
161
  "task": "multiple_choice",
162
  "acc": 0.6428571428571429,
163
  "acc_norm": 0.6428571428571429
164
+ },
165
+ {
166
+ "name": "turkish_plu",
167
+ "task": "multiple_choice",
168
+ "acc": 0.4848,
169
+ "acc_norm": 0.40896
170
+ },
171
+ {
172
+ "name": "turkish_plu_goal_inference",
173
+ "task": "multiple_choice",
174
+ "acc": 0.40860215053763443,
175
+ "acc_norm": 0.27718040621266427
176
+ },
177
+ {
178
+ "name": "turkish_plu_next_event_prediction",
179
+ "task": "multiple_choice",
180
+ "acc": 0.44580152671755724,
181
+ "acc_norm": 0.2549618320610687
182
+ },
183
+ {
184
+ "name": "turkish_plu_step_inference",
185
+ "task": "multiple_choice",
186
+ "acc": 0.3431372549019608,
187
+ "acc_norm": 0.33986928104575165
188
+ },
189
+ {
190
+ "name": "turkish_plu_step_ordering",
191
+ "task": "multiple_choice",
192
+ "acc": 0.6571988246816847,
193
+ "acc_norm": 0.6571988246816847
194
  }
195
  ]
196
  }
results/zero-shot/llama-3.1-8b.json CHANGED
@@ -81,30 +81,6 @@
81
  "exact_match": 0.2757847533632287,
82
  "f1": 0.5178366277473359
83
  },
84
- {
85
- "name": "turkish_plu_goal_inference",
86
- "task": "multiple_choice",
87
- "acc": 0.4145758661887694,
88
- "acc_norm": 0.4324970131421744
89
- },
90
- {
91
- "name": "turkish_plu_next_event_prediction",
92
- "task": "multiple_choice",
93
- "acc": 0.4488549618320611,
94
- "acc_norm": 0.5358778625954198
95
- },
96
- {
97
- "name": "turkish_plu_step_inference",
98
- "task": "multiple_choice",
99
- "acc": 0.3382352941176471,
100
- "acc_norm": 0.4738562091503268
101
- },
102
- {
103
- "name": "turkish_plu_step_ordering",
104
- "task": "multiple_choice",
105
- "acc": 0.6425073457394711,
106
- "acc_norm": 0.6425073457394711
107
- },
108
  {
109
  "name": "xcopa_tr",
110
  "task": "multiple_choice",
@@ -185,6 +161,36 @@
185
  "task": "multiple_choice",
186
  "acc": 0.5857142857142857,
187
  "acc_norm": 0.5857142857142857
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  }
189
  ]
190
  }
 
81
  "exact_match": 0.2757847533632287,
82
  "f1": 0.5178366277473359
83
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  {
85
  "name": "xcopa_tr",
86
  "task": "multiple_choice",
 
161
  "task": "multiple_choice",
162
  "acc": 0.5857142857142857,
163
  "acc_norm": 0.5857142857142857
164
+ },
165
+ {
166
+ "name": "turkish_plu",
167
+ "task": "multiple_choice",
168
+ "acc": 0.47552,
169
+ "acc_norm": 0.39872
170
+ },
171
+ {
172
+ "name": "turkish_plu_goal_inference",
173
+ "task": "multiple_choice",
174
+ "acc": 0.38829151732377537,
175
+ "acc_norm": 0.2628434886499403
176
+ },
177
+ {
178
+ "name": "turkish_plu_next_event_prediction",
179
+ "task": "multiple_choice",
180
+ "acc": 0.4595419847328244,
181
+ "acc_norm": 0.26106870229007634
182
+ },
183
+ {
184
+ "name": "turkish_plu_step_inference",
185
+ "task": "multiple_choice",
186
+ "acc": 0.35130718954248363,
187
+ "acc_norm": 0.3431372549019608
188
+ },
189
+ {
190
+ "name": "turkish_plu_step_ordering",
191
+ "task": "multiple_choice",
192
+ "acc": 0.6317335945151812,
193
+ "acc_norm": 0.6317335945151812
194
  }
195
  ]
196
  }
results/zero-shot/llama-3.2-1b.json CHANGED
@@ -93,30 +93,6 @@
93
  "exact_match": 0.06278026905829596,
94
  "f1": 0.21486130318406463
95
  },
96
- {
97
- "name": "turkish_plu_goal_inference",
98
- "task": "multiple_choice",
99
- "acc": 0.35842293906810035,
100
- "acc_norm": 0.4026284348864994
101
- },
102
- {
103
- "name": "turkish_plu_next_event_prediction",
104
- "task": "multiple_choice",
105
- "acc": 0.3709923664122137,
106
- "acc_norm": 0.467175572519084
107
- },
108
- {
109
- "name": "turkish_plu_step_inference",
110
- "task": "multiple_choice",
111
- "acc": 0.27941176470588236,
112
- "acc_norm": 0.41830065359477125
113
- },
114
- {
115
- "name": "turkish_plu_step_ordering",
116
- "task": "multiple_choice",
117
- "acc": 0.5759059745347699,
118
- "acc_norm": 0.5759059745347699
119
- },
120
  {
121
  "name": "wiki_lingua_tr",
122
  "task": "summarization",
@@ -217,6 +193,36 @@
217
  "task": "multiple_choice",
218
  "acc": 0.5285714285714286,
219
  "acc_norm": 0.5285714285714286
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  }
221
  ]
222
  }
 
93
  "exact_match": 0.06278026905829596,
94
  "f1": 0.21486130318406463
95
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  {
97
  "name": "wiki_lingua_tr",
98
  "task": "summarization",
 
193
  "task": "multiple_choice",
194
  "acc": 0.5285714285714286,
195
  "acc_norm": 0.5285714285714286
196
+ },
197
+ {
198
+ "name": "turkish_plu",
199
+ "task": "multiple_choice",
200
+ "acc": 0.4208,
201
+ "acc_norm": 0.368
202
+ },
203
+ {
204
+ "name": "turkish_plu_goal_inference",
205
+ "task": "multiple_choice",
206
+ "acc": 0.36200716845878134,
207
+ "acc_norm": 0.23894862604540024
208
+ },
209
+ {
210
+ "name": "turkish_plu_next_event_prediction",
211
+ "task": "multiple_choice",
212
+ "acc": 0.37251908396946565,
213
+ "acc_norm": 0.2366412213740458
214
+ },
215
+ {
216
+ "name": "turkish_plu_step_inference",
217
+ "task": "multiple_choice",
218
+ "acc": 0.29248366013071897,
219
+ "acc_norm": 0.3366013071895425
220
+ },
221
+ {
222
+ "name": "turkish_plu_step_ordering",
223
+ "task": "multiple_choice",
224
+ "acc": 0.5768854064642507,
225
+ "acc_norm": 0.5768854064642507
226
  }
227
  ]
228
  }
results/zero-shot/llama-3.2-3b-instruct.json CHANGED
@@ -93,30 +93,6 @@
93
  "exact_match": 0.18721973094170405,
94
  "f1": 0.5109898180473623
95
  },
96
- {
97
- "name": "turkish_plu_goal_inference",
98
- "task": "multiple_choice",
99
- "acc": 0.3321385902031063,
100
- "acc_norm": 0.3548387096774194
101
- },
102
- {
103
- "name": "turkish_plu_next_event_prediction",
104
- "task": "multiple_choice",
105
- "acc": 0.3648854961832061,
106
- "acc_norm": 0.4488549618320611
107
- },
108
- {
109
- "name": "turkish_plu_step_inference",
110
- "task": "multiple_choice",
111
- "acc": 0.24183006535947713,
112
- "acc_norm": 0.3758169934640523
113
- },
114
- {
115
- "name": "turkish_plu_step_ordering",
116
- "task": "multiple_choice",
117
- "acc": 0.5710088148873653,
118
- "acc_norm": 0.5710088148873653
119
- },
120
  {
121
  "name": "wiki_lingua_tr",
122
  "task": "summarization",
@@ -217,6 +193,36 @@
217
  "task": "multiple_choice",
218
  "acc": 0.5428571428571428,
219
  "acc_norm": 0.5428571428571428
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  }
221
  ]
222
  }
 
93
  "exact_match": 0.18721973094170405,
94
  "f1": 0.5109898180473623
95
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  {
97
  "name": "wiki_lingua_tr",
98
  "task": "summarization",
 
193
  "task": "multiple_choice",
194
  "acc": 0.5428571428571428,
195
  "acc_norm": 0.5428571428571428
196
+ },
197
+ {
198
+ "name": "turkish_plu",
199
+ "task": "multiple_choice",
200
+ "acc": 0.44,
201
+ "acc_norm": 0.3952
202
+ },
203
+ {
204
+ "name": "turkish_plu_goal_inference",
205
+ "task": "multiple_choice",
206
+ "acc": 0.3548387096774194,
207
+ "acc_norm": 0.26045400238948624
208
+ },
209
+ {
210
+ "name": "turkish_plu_next_event_prediction",
211
+ "task": "multiple_choice",
212
+ "acc": 0.3938931297709924,
213
+ "acc_norm": 0.24427480916030533
214
+ },
215
+ {
216
+ "name": "turkish_plu_step_inference",
217
+ "task": "multiple_choice",
218
+ "acc": 0.2777777777777778,
219
+ "acc_norm": 0.3382352941176471
220
+ },
221
+ {
222
+ "name": "turkish_plu_step_ordering",
223
+ "task": "multiple_choice",
224
+ "acc": 0.6366307541625857,
225
+ "acc_norm": 0.6366307541625857
226
  }
227
  ]
228
  }
results/zero-shot/llama-3.2-3b.json CHANGED
@@ -81,30 +81,6 @@
81
  "exact_match": 0.21188340807174888,
82
  "f1": 0.4583574684635767
83
  },
84
- {
85
- "name": "turkish_plu_goal_inference",
86
- "task": "multiple_choice",
87
- "acc": 0.3906810035842294,
88
- "acc_norm": 0.3906810035842294
89
- },
90
- {
91
- "name": "turkish_plu_next_event_prediction",
92
- "task": "multiple_choice",
93
- "acc": 0.4122137404580153,
94
- "acc_norm": 0.5389312977099237
95
- },
96
- {
97
- "name": "turkish_plu_step_inference",
98
- "task": "multiple_choice",
99
- "acc": 0.30718954248366015,
100
- "acc_norm": 0.4493464052287582
101
- },
102
- {
103
- "name": "turkish_plu_step_ordering",
104
- "task": "multiple_choice",
105
- "acc": 0.5974534769833496,
106
- "acc_norm": 0.5974534769833496
107
- },
108
  {
109
  "name": "xcopa_tr",
110
  "task": "multiple_choice",
@@ -185,6 +161,36 @@
185
  "task": "multiple_choice",
186
  "acc": 0.5714285714285714,
187
  "acc_norm": 0.5714285714285714
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  }
189
  ]
190
  }
 
81
  "exact_match": 0.21188340807174888,
82
  "f1": 0.4583574684635767
83
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  {
85
  "name": "xcopa_tr",
86
  "task": "multiple_choice",
 
161
  "task": "multiple_choice",
162
  "acc": 0.5714285714285714,
163
  "acc_norm": 0.5714285714285714
164
+ },
165
+ {
166
+ "name": "turkish_plu",
167
+ "task": "multiple_choice",
168
+ "acc": 0.45408,
169
+ "acc_norm": 0.38752
170
+ },
171
+ {
172
+ "name": "turkish_plu_goal_inference",
173
+ "task": "multiple_choice",
174
+ "acc": 0.4002389486260454,
175
+ "acc_norm": 0.25925925925925924
176
+ },
177
+ {
178
+ "name": "turkish_plu_next_event_prediction",
179
+ "task": "multiple_choice",
180
+ "acc": 0.43206106870229005,
181
+ "acc_norm": 0.26106870229007634
182
+ },
183
+ {
184
+ "name": "turkish_plu_step_inference",
185
+ "task": "multiple_choice",
186
+ "acc": 0.315359477124183,
187
+ "acc_norm": 0.35130718954248363
188
+ },
189
+ {
190
+ "name": "turkish_plu_step_ordering",
191
+ "task": "multiple_choice",
192
+ "acc": 0.5954946131243879,
193
+ "acc_norm": 0.5954946131243879
194
  }
195
  ]
196
  }