mihaimasala commited on
Commit
0f70588
·
verified ·
1 Parent(s): 227838b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +475 -474
README.md CHANGED
@@ -4,480 +4,6 @@ language:
4
  - ro
5
  base_model:
6
  - OpenLLM-Ro/RoLlama2-7b-Base
7
- model-index:
8
- - name: OpenLLM-Ro/RoLlama2-7b-Instruct-2024-05-14
9
- results:
10
- - task:
11
- type: text-generation
12
- dataset:
13
- name: RoMT-Bench
14
- type: RoMT-Bench
15
- metrics:
16
- - name: Score
17
- type: Score
18
- value: 3.86
19
- - task:
20
- type: text-generation
21
- dataset:
22
- name: RoCulturaBench
23
- type: RoCulturaBench
24
- metrics:
25
- - name: Score
26
- type: Score
27
- value: 3.77
28
- - task:
29
- type: text-generation
30
- dataset:
31
- name: Romanian_Academic_Benchmarks
32
- type: Romanian_Academic_Benchmarks
33
- metrics:
34
- - name: Average accuracy
35
- type: accuracy
36
- value: 45.71
37
- - task:
38
- type: text-generation
39
- dataset:
40
- name: OpenLLM-Ro/ro_arc_challenge
41
- type: OpenLLM-Ro/ro_arc_challenge
42
- metrics:
43
- - name: Average accuracy
44
- type: accuracy
45
- value: 43.66
46
- - task:
47
- type: text-generation
48
- dataset:
49
- name: OpenLLM-Ro/ro_mmlu
50
- type: OpenLLM-Ro/ro_mmlu
51
- metrics:
52
- - name: Average accuracy
53
- type: accuracy
54
- value: 39.7
55
- - task:
56
- type: text-generation
57
- dataset:
58
- name: OpenLLM-Ro/ro_winogrande
59
- type: OpenLLM-Ro/ro_winogrande
60
- metrics:
61
- - name: Average accuracy
62
- type: accuracy
63
- value: 70.34
64
- - task:
65
- type: text-generation
66
- dataset:
67
- name: OpenLLM-Ro/ro_hellaswag
68
- type: OpenLLM-Ro/ro_hellaswag
69
- metrics:
70
- - name: Average accuracy
71
- type: accuracy
72
- value: 57.36
73
- - task:
74
- type: text-generation
75
- dataset:
76
- name: OpenLLM-Ro/ro_gsm8k
77
- type: OpenLLM-Ro/ro_gsm8k
78
- metrics:
79
- - name: Average accuracy
80
- type: accuracy
81
- value: 18.78
82
- - task:
83
- type: text-generation
84
- dataset:
85
- name: OpenLLM-Ro/ro_truthfulqa
86
- type: OpenLLM-Ro/ro_truthfulqa
87
- metrics:
88
- - name: Average accuracy
89
- type: accuracy
90
- value: 44.44
91
- - task:
92
- type: text-generation
93
- dataset:
94
- name: LaRoSeDa_binary
95
- type: LaRoSeDa_binary
96
- metrics:
97
- - name: Average macro-f1
98
- type: macro-f1
99
- value: 97.48
100
- - task:
101
- type: text-generation
102
- dataset:
103
- name: LaRoSeDa_multiclass
104
- type: LaRoSeDa_multiclass
105
- metrics:
106
- - name: Average macro-f1
107
- type: macro-f1
108
- value: 65.26
109
- - task:
110
- type: text-generation
111
- dataset:
112
- name: LaRoSeDa_binary_finetuned
113
- type: LaRoSeDa_binary_finetuned
114
- metrics:
115
- - name: Average macro-f1
116
- type: macro-f1
117
- value: 98.83
118
- - task:
119
- type: text-generation
120
- dataset:
121
- name: LaRoSeDa_multiclass_finetuned
122
- type: LaRoSeDa_multiclass_finetuned
123
- metrics:
124
- - name: Average macro-f1
125
- type: macro-f1
126
- value: 87.28
127
- - task:
128
- type: text-generation
129
- dataset:
130
- name: WMT_EN-RO
131
- type: WMT_EN-RO
132
- metrics:
133
- - name: Average bleu
134
- type: bleu
135
- value: 27.38
136
- - task:
137
- type: text-generation
138
- dataset:
139
- name: WMT_RO-EN
140
- type: WMT_RO-EN
141
- metrics:
142
- - name: Average bleu
143
- type: bleu
144
- value: 10.32
145
- - task:
146
- type: text-generation
147
- dataset:
148
- name: WMT_EN-RO_finetuned
149
- type: WMT_EN-RO_finetuned
150
- metrics:
151
- - name: Average bleu
152
- type: bleu
153
- value: 27.59
154
- - task:
155
- type: text-generation
156
- dataset:
157
- name: WMT_RO-EN_finetuned
158
- type: WMT_RO-EN_finetuned
159
- metrics:
160
- - name: Average bleu
161
- type: bleu
162
- value: 40.13
163
- - task:
164
- type: text-generation
165
- dataset:
166
- name: XQuAD
167
- type: XQuAD
168
- metrics:
169
- - name: Average exact_match
170
- type: exact_match
171
- value: 44.52
172
- - task:
173
- type: text-generation
174
- dataset:
175
- name: XQuAD
176
- type: XQuAD
177
- metrics:
178
- - name: Average f1
179
- type: f1
180
- value: 64.75
181
- - task:
182
- type: text-generation
183
- dataset:
184
- name: XQuAD_finetuned
185
- type: XQuAD_finetuned
186
- metrics:
187
- - name: Average exact_match
188
- type: exact_match
189
- value: 54.96
190
- - task:
191
- type: text-generation
192
- dataset:
193
- name: XQuAD_finetuned
194
- type: XQuAD_finetuned
195
- metrics:
196
- - name: Average f1
197
- type: f1
198
- value: 70.2
199
- - task:
200
- type: text-generation
201
- dataset:
202
- name: STS
203
- type: STS
204
- metrics:
205
- - name: Average spearman
206
- type: spearman
207
- value: 65.5
208
- - task:
209
- type: text-generation
210
- dataset:
211
- name: STS
212
- type: STS
213
- metrics:
214
- - name: Average pearson
215
- type: pearson
216
- value: 67.79
217
- - task:
218
- type: text-generation
219
- dataset:
220
- name: STS_finetuned
221
- type: STS_finetuned
222
- metrics:
223
- - name: Average spearman
224
- type: spearman
225
- value: 84.44
226
- - task:
227
- type: text-generation
228
- dataset:
229
- name: STS_finetuned
230
- type: STS_finetuned
231
- metrics:
232
- - name: Average pearson
233
- type: pearson
234
- value: 84.76
235
- - task:
236
- type: text-generation
237
- dataset:
238
- name: RoMT-Bench
239
- type: RoMT-Bench
240
- metrics:
241
- - name: First turn
242
- type: Score
243
- value: 4.67
244
- - name: Second turn
245
- type: Score
246
- value: 3.04
247
- - task:
248
- type: text-generation
249
- dataset:
250
- name: OpenLLM-Ro/ro_arc_challenge
251
- type: OpenLLM-Ro/ro_arc_challenge
252
- metrics:
253
- - name: 0-shot
254
- type: accuracy
255
- value: 41.73
256
- - name: 1-shot
257
- type: accuracy
258
- value: 42.16
259
- - name: 3-shot
260
- type: accuracy
261
- value: 43.53
262
- - name: 5-shot
263
- type: accuracy
264
- value: 44.9
265
- - name: 10-shot
266
- type: accuracy
267
- value: 44.99
268
- - name: 25-shot
269
- type: accuracy
270
- value: 44.64
271
- - task:
272
- type: text-generation
273
- dataset:
274
- name: OpenLLM-Ro/ro_mmlu
275
- type: OpenLLM-Ro/ro_mmlu
276
- metrics:
277
- - name: 0-shot
278
- type: accuracy
279
- value: 38.54
280
- - name: 1-shot
281
- type: accuracy
282
- value: 39.36
283
- - name: 3-shot
284
- type: accuracy
285
- value: 40.82
286
- - name: 5-shot
287
- type: accuracy
288
- value: 40.07
289
- - task:
290
- type: text-generation
291
- dataset:
292
- name: OpenLLM-Ro/ro_winogrande
293
- type: OpenLLM-Ro/ro_winogrande
294
- metrics:
295
- - name: 0-shot
296
- type: accuracy
297
- value: 72.61
298
- - name: 1-shot
299
- type: accuracy
300
- value: 69.93
301
- - name: 3-shot
302
- type: accuracy
303
- value: 70.4
304
- - name: 5-shot
305
- type: accuracy
306
- value: 68.43
307
- - task:
308
- type: text-generation
309
- dataset:
310
- name: OpenLLM-Ro/ro_hellaswag
311
- type: OpenLLM-Ro/ro_hellaswag
312
- metrics:
313
- - name: 0-shot
314
- type: accuracy
315
- value: 56.9
316
- - name: 1-shot
317
- type: accuracy
318
- value: 57.07
319
- - name: 3-shot
320
- type: accuracy
321
- value: 57.56
322
- - name: 5-shot
323
- type: accuracy
324
- value: 57.35
325
- - name: 10-shot
326
- type: accuracy
327
- value: 57.93
328
- - task:
329
- type: text-generation
330
- dataset:
331
- name: OpenLLM-Ro/ro_gsm8k
332
- type: OpenLLM-Ro/ro_gsm8k
333
- metrics:
334
- - name: 0-shot
335
- type: accuracy
336
- value: 11.22
337
- - name: 1-shot
338
- type: accuracy
339
- value: 21.38
340
- - name: 3-shot
341
- type: accuracy
342
- value: 23.73
343
- - task:
344
- type: text-generation
345
- dataset:
346
- name: LaRoSeDa_binary
347
- type: LaRoSeDa_binary
348
- metrics:
349
- - name: 0-shot
350
- type: macro-f1
351
- value: 97.67
352
- - name: 1-shot
353
- type: macro-f1
354
- value: 96.77
355
- - name: 3-shot
356
- type: macro-f1
357
- value: 97.6
358
- - name: 5-shot
359
- type: macro-f1
360
- value: 97.87
361
- - task:
362
- type: text-generation
363
- dataset:
364
- name: LaRoSeDa_multiclass
365
- type: LaRoSeDa_multiclass
366
- metrics:
367
- - name: 0-shot
368
- type: macro-f1
369
- value: 61.82
370
- - name: 1-shot
371
- type: macro-f1
372
- value: 58.84
373
- - name: 3-shot
374
- type: macro-f1
375
- value: 68.67
376
- - name: 5-shot
377
- type: macro-f1
378
- value: 71.71
379
- - task:
380
- type: text-generation
381
- dataset:
382
- name: WMT_EN-RO
383
- type: WMT_EN-RO
384
- metrics:
385
- - name: 0-shot
386
- type: bleu
387
- value: 19.71
388
- - name: 1-shot
389
- type: bleu
390
- value: 29.62
391
- - name: 3-shot
392
- type: bleu
393
- value: 30.11
394
- - name: 5-shot
395
- type: bleu
396
- value: 30.1
397
- - task:
398
- type: text-generation
399
- dataset:
400
- name: WMT_RO-EN
401
- type: WMT_RO-EN
402
- metrics:
403
- - name: 0-shot
404
- type: bleu
405
- value: 1.86
406
- - name: 1-shot
407
- type: bleu
408
- value: 4.41
409
- - name: 3-shot
410
- type: bleu
411
- value: 14.95
412
- - name: 5-shot
413
- type: bleu
414
- value: 20.07
415
- - task:
416
- type: text-generation
417
- dataset:
418
- name: XQuAD_EM
419
- type: XQuAD_EM
420
- metrics:
421
- - name: 0-shot
422
- type: exact_match
423
- value: 34.87
424
- - name: 1-shot
425
- type: exact_match
426
- value: 44.96
427
- - name: 3-shot
428
- type: exact_match
429
- value: 48.4
430
- - name: 5-shot
431
- type: exact_match
432
- value: 49.83
433
- - task:
434
- type: text-generation
435
- dataset:
436
- name: XQuAD_F1
437
- type: XQuAD_F1
438
- metrics:
439
- - name: 0-shot
440
- type: f1
441
- value: 58.07
442
- - name: 1-shot
443
- type: f1
444
- value: 63.93
445
- - name: 3-shot
446
- type: f1
447
- value: 67.89
448
- - name: 5-shot
449
- type: f1
450
- value: 69.1
451
- - task:
452
- type: text-generation
453
- dataset:
454
- name: STS
455
- type: STS
456
- metrics:
457
- - name: 0-shot
458
- type: spearman
459
- value: 61.14
460
- - name: 1-shot
461
- type: spearman
462
- value: 66.91
463
- - name: 3-shot
464
- type: spearman
465
- value: 68.46
466
- - task:
467
- type: text-generation
468
- dataset:
469
- name: STS
470
- type: STS
471
- metrics:
472
- - name: 0-shot
473
- type: pearson
474
- value: 61.88
475
- - name: 1-shot
476
- type: pearson
477
- value: 70.04
478
- - name: 3-shot
479
- type: pearson
480
- value: 71.46
481
  datasets:
482
  - OpenLLM-Ro/ro_sft_alpaca
483
  - OpenLLM-Ro/ro_sft_alpaca_gpt4
@@ -486,6 +12,481 @@ datasets:
486
  - OpenLLM-Ro/ro_sft_norobots
487
  - OpenLLM-Ro/ro_sft_orca
488
  - OpenLLM-Ro/ro_sft_camel
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
489
  ---
490
 
491
  # Model Card for Model ID
 
4
  - ro
5
  base_model:
6
  - OpenLLM-Ro/RoLlama2-7b-Base
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  datasets:
8
  - OpenLLM-Ro/ro_sft_alpaca
9
  - OpenLLM-Ro/ro_sft_alpaca_gpt4
 
12
  - OpenLLM-Ro/ro_sft_norobots
13
  - OpenLLM-Ro/ro_sft_orca
14
  - OpenLLM-Ro/ro_sft_camel
15
+ model-index:
16
+ - name: OpenLLM-Ro/RoLlama2-7b-Instruct-2024-05-14
17
+ results:
18
+ - task:
19
+ type: text-generation
20
+ dataset:
21
+ name: RoMT-Bench
22
+ type: RoMT-Bench
23
+ metrics:
24
+ - name: Score
25
+ type: Score
26
+ value: 3.86
27
+ - task:
28
+ type: text-generation
29
+ dataset:
30
+ name: RoCulturaBench
31
+ type: RoCulturaBench
32
+ metrics:
33
+ - name: Score
34
+ type: Score
35
+ value: 3.77
36
+ - task:
37
+ type: text-generation
38
+ dataset:
39
+ name: Romanian_Academic_Benchmarks
40
+ type: Romanian_Academic_Benchmarks
41
+ metrics:
42
+ - name: Average accuracy
43
+ type: accuracy
44
+ value: 45.71
45
+ - task:
46
+ type: text-generation
47
+ dataset:
48
+ name: OpenLLM-Ro/ro_arc_challenge
49
+ type: OpenLLM-Ro/ro_arc_challenge
50
+ metrics:
51
+ - name: Average accuracy
52
+ type: accuracy
53
+ value: 43.66
54
+ - task:
55
+ type: text-generation
56
+ dataset:
57
+ name: OpenLLM-Ro/ro_mmlu
58
+ type: OpenLLM-Ro/ro_mmlu
59
+ metrics:
60
+ - name: Average accuracy
61
+ type: accuracy
62
+ value: 39.70
63
+ - task:
64
+ type: text-generation
65
+ dataset:
66
+ name: OpenLLM-Ro/ro_winogrande
67
+ type: OpenLLM-Ro/ro_winogrande
68
+ metrics:
69
+ - name: Average accuracy
70
+ type: accuracy
71
+ value: 70.34
72
+ - task:
73
+ type: text-generation
74
+ dataset:
75
+ name: OpenLLM-Ro/ro_hellaswag
76
+ type: OpenLLM-Ro/ro_hellaswag
77
+ metrics:
78
+ - name: Average accuracy
79
+ type: accuracy
80
+ value: 57.36
81
+ - task:
82
+ type: text-generation
83
+ dataset:
84
+ name: OpenLLM-Ro/ro_gsm8k
85
+ type: OpenLLM-Ro/ro_gsm8k
86
+ metrics:
87
+ - name: Average accuracy
88
+ type: accuracy
89
+ value: 18.78
90
+ - task:
91
+ type: text-generation
92
+ dataset:
93
+ name: OpenLLM-Ro/ro_truthfulqa
94
+ type: OpenLLM-Ro/ro_truthfulqa
95
+ metrics:
96
+ - name: Average accuracy
97
+ type: accuracy
98
+ value: 44.44
99
+ - task:
100
+ type: text-generation
101
+ dataset:
102
+ name: LaRoSeDa_binary
103
+ type: LaRoSeDa_binary
104
+ metrics:
105
+ - name: Average macro-f1
106
+ type: macro-f1
107
+ value: 97.48
108
+ - task:
109
+ type: text-generation
110
+ dataset:
111
+ name: LaRoSeDa_multiclass
112
+ type: LaRoSeDa_multiclass
113
+ metrics:
114
+ - name: Average macro-f1
115
+ type: macro-f1
116
+ value: 65.26
117
+ - task:
118
+ type: text-generation
119
+ dataset:
120
+ name: LaRoSeDa_binary_finetuned
121
+ type: LaRoSeDa_binary_finetuned
122
+ metrics:
123
+ - name: Average macro-f1
124
+ type: macro-f1
125
+ value: 98.83
126
+ - task:
127
+ type: text-generation
128
+ dataset:
129
+ name: LaRoSeDa_multiclass_finetuned
130
+ type: LaRoSeDa_multiclass_finetuned
131
+ metrics:
132
+ - name: Average macro-f1
133
+ type: macro-f1
134
+ value: 87.28
135
+ - task:
136
+ type: text-generation
137
+ dataset:
138
+ name: WMT_EN-RO
139
+ type: WMT_EN-RO
140
+ metrics:
141
+ - name: Average bleu
142
+ type: bleu
143
+ value: 27.38
144
+ - task:
145
+ type: text-generation
146
+ dataset:
147
+ name: WMT_RO-EN
148
+ type: WMT_RO-EN
149
+ metrics:
150
+ - name: Average bleu
151
+ type: bleu
152
+ value: 10.32
153
+ - task:
154
+ type: text-generation
155
+ dataset:
156
+ name: WMT_EN-RO_finetuned
157
+ type: WMT_EN-RO_finetuned
158
+ metrics:
159
+ - name: Average bleu
160
+ type: bleu
161
+ value: 27.59
162
+ - task:
163
+ type: text-generation
164
+ dataset:
165
+ name: WMT_RO-EN_finetuned
166
+ type: WMT_RO-EN_finetuned
167
+ metrics:
168
+ - name: Average bleu
169
+ type: bleu
170
+ value: 40.13
171
+ - task:
172
+ type: text-generation
173
+ dataset:
174
+ name: XQuAD
175
+ type: XQuAD
176
+ metrics:
177
+ - name: Average exact_match
178
+ type: exact_match
179
+ value: 44.52
180
+ - task:
181
+ type: text-generation
182
+ dataset:
183
+ name: XQuAD
184
+ type: XQuAD
185
+ metrics:
186
+ - name: Average f1
187
+ type: f1
188
+ value: 64.75
189
+ - task:
190
+ type: text-generation
191
+ dataset:
192
+ name: XQuAD_finetuned
193
+ type: XQuAD_finetuned
194
+ metrics:
195
+ - name: Average exact_match
196
+ type: exact_match
197
+ value: 54.96
198
+ - task:
199
+ type: text-generation
200
+ dataset:
201
+ name: XQuAD_finetuned
202
+ type: XQuAD_finetuned
203
+ metrics:
204
+ - name: Average f1
205
+ type: f1
206
+ value: 70.20
207
+ - task:
208
+ type: text-generation
209
+ dataset:
210
+ name: STS
211
+ type: STS
212
+ metrics:
213
+ - name: Average spearman
214
+ type: spearman
215
+ value: 65.50
216
+ - task:
217
+ type: text-generation
218
+ dataset:
219
+ name: STS
220
+ type: STS
221
+ metrics:
222
+ - name: Average pearson
223
+ type: pearson
224
+ value: 67.79
225
+ - task:
226
+ type: text-generation
227
+ dataset:
228
+ name: STS_finetuned
229
+ type: STS_finetuned
230
+ metrics:
231
+ - name: Average spearman
232
+ type: spearman
233
+ value: 84.44
234
+ - task:
235
+ type: text-generation
236
+ dataset:
237
+ name: STS_finetuned
238
+ type: STS_finetuned
239
+ metrics:
240
+ - name: Average pearson
241
+ type: pearson
242
+ value: 84.76
243
+ - task:
244
+ type: text-generation
245
+ dataset:
246
+ name: RoMT-Bench
247
+ type: RoMT-Bench
248
+ metrics:
249
+ - name: First turn
250
+ type: Score
251
+ value: 4.67
252
+ - name: Second turn
253
+ type: Score
254
+ value: 3.04
255
+ - task:
256
+ type: text-generation
257
+ dataset:
258
+ name: OpenLLM-Ro/ro_arc_challenge
259
+ type: OpenLLM-Ro/ro_arc_challenge
260
+ metrics:
261
+ - name: 0-shot
262
+ type: accuracy
263
+ value: 41.73
264
+ - name: 1-shot
265
+ type: accuracy
266
+ value: 42.16
267
+ - name: 3-shot
268
+ type: accuracy
269
+ value: 43.53
270
+ - name: 5-shot
271
+ type: accuracy
272
+ value: 44.90
273
+ - name: 10-shot
274
+ type: accuracy
275
+ value: 44.99
276
+ - name: 25-shot
277
+ type: accuracy
278
+ value: 44.64
279
+ - task:
280
+ type: text-generation
281
+ dataset:
282
+ name: OpenLLM-Ro/ro_mmlu
283
+ type: OpenLLM-Ro/ro_mmlu
284
+ metrics:
285
+ - name: 0-shot
286
+ type: accuracy
287
+ value: 38.54
288
+ - name: 1-shot
289
+ type: accuracy
290
+ value: 39.36
291
+ - name: 3-shot
292
+ type: accuracy
293
+ value: 40.82
294
+ - name: 5-shot
295
+ type: accuracy
296
+ value: 40.07
297
+ - task:
298
+ type: text-generation
299
+ dataset:
300
+ name: OpenLLM-Ro/ro_winogrande
301
+ type: OpenLLM-Ro/ro_winogrande
302
+ metrics:
303
+ - name: 0-shot
304
+ type: accuracy
305
+ value: 72.61
306
+ - name: 1-shot
307
+ type: accuracy
308
+ value: 69.93
309
+ - name: 3-shot
310
+ type: accuracy
311
+ value: 70.40
312
+ - name: 5-shot
313
+ type: accuracy
314
+ value: 68.43
315
+ - task:
316
+ type: text-generation
317
+ dataset:
318
+ name: OpenLLM-Ro/ro_hellaswag
319
+ type: OpenLLM-Ro/ro_hellaswag
320
+ metrics:
321
+ - name: 0-shot
322
+ type: accuracy
323
+ value: 56.90
324
+ - name: 1-shot
325
+ type: accuracy
326
+ value: 57.07
327
+ - name: 3-shot
328
+ type: accuracy
329
+ value: 57.56
330
+ - name: 5-shot
331
+ type: accuracy
332
+ value: 57.35
333
+ - name: 10-shot
334
+ type: accuracy
335
+ value: 57.93
336
+ - task:
337
+ type: text-generation
338
+ dataset:
339
+ name: OpenLLM-Ro/ro_gsm8k
340
+ type: OpenLLM-Ro/ro_gsm8k
341
+ metrics:
342
+ - name: 0-shot
343
+ type: accuracy
344
+ value: 11.22
345
+ - name: 1-shot
346
+ type: accuracy
347
+ value: 21.38
348
+ - name: 3-shot
349
+ type: accuracy
350
+ value: 23.73
351
+ - task:
352
+ type: text-generation
353
+ dataset:
354
+ name: LaRoSeDa_binary
355
+ type: LaRoSeDa_binary
356
+ metrics:
357
+ - name: 0-shot
358
+ type: macro-f1
359
+ value: 97.67
360
+ - name: 1-shot
361
+ type: macro-f1
362
+ value: 96.77
363
+ - name: 3-shot
364
+ type: macro-f1
365
+ value: 97.60
366
+ - name: 5-shot
367
+ type: macro-f1
368
+ value: 97.87
369
+ - task:
370
+ type: text-generation
371
+ dataset:
372
+ name: LaRoSeDa_multiclass
373
+ type: LaRoSeDa_multiclass
374
+ metrics:
375
+ - name: 0-shot
376
+ type: macro-f1
377
+ value: 61.82
378
+ - name: 1-shot
379
+ type: macro-f1
380
+ value: 58.84
381
+ - name: 3-shot
382
+ type: macro-f1
383
+ value: 68.67
384
+ - name: 5-shot
385
+ type: macro-f1
386
+ value: 71.71
387
+ - task:
388
+ type: text-generation
389
+ dataset:
390
+ name: WMT_EN-RO
391
+ type: WMT_EN-RO
392
+ metrics:
393
+ - name: 0-shot
394
+ type: bleu
395
+ value: 19.71
396
+ - name: 1-shot
397
+ type: bleu
398
+ value: 29.62
399
+ - name: 3-shot
400
+ type: bleu
401
+ value: 30.11
402
+ - name: 5-shot
403
+ type: bleu
404
+ value: 30.10
405
+ - task:
406
+ type: text-generation
407
+ dataset:
408
+ name: WMT_RO-EN
409
+ type: WMT_RO-EN
410
+ metrics:
411
+ - name: 0-shot
412
+ type: bleu
413
+ value: 1.86
414
+ - name: 1-shot
415
+ type: bleu
416
+ value: 4.41
417
+ - name: 3-shot
418
+ type: bleu
419
+ value: 14.95
420
+ - name: 5-shot
421
+ type: bleu
422
+ value: 20.07
423
+ - task:
424
+ type: text-generation
425
+ dataset:
426
+ name: XQuAD_EM
427
+ type: XQuAD_EM
428
+ metrics:
429
+ - name: 0-shot
430
+ type: exact_match
431
+ value: 34.87
432
+ - name: 1-shot
433
+ type: exact_match
434
+ value: 44.96
435
+ - name: 3-shot
436
+ type: exact_match
437
+ value: 48.40
438
+ - name: 5-shot
439
+ type: exact_match
440
+ value: 49.83
441
+ - task:
442
+ type: text-generation
443
+ dataset:
444
+ name: XQuAD_F1
445
+ type: XQuAD_F1
446
+ metrics:
447
+ - name: 0-shot
448
+ type: f1
449
+ value: 58.07
450
+ - name: 1-shot
451
+ type: f1
452
+ value: 63.93
453
+ - name: 3-shot
454
+ type: f1
455
+ value: 67.89
456
+ - name: 5-shot
457
+ type: f1
458
+ value: 69.10
459
+ - task:
460
+ type: text-generation
461
+ dataset:
462
+ name: STS_Spearman
463
+ type: STS_Spearman
464
+ metrics:
465
+ - name: 1-shot
466
+ type: spearman
467
+ value: 61.14
468
+ - name: 3-shot
469
+ type: spearman
470
+ value: 66.91
471
+ - name: 5-shot
472
+ type: spearman
473
+ value: 68.46
474
+ - task:
475
+ type: text-generation
476
+ dataset:
477
+ name: STS_Pearson
478
+ type: STS_Pearson
479
+ metrics:
480
+ - name: 1-shot
481
+ type: pearson
482
+ value: 61.88
483
+ - name: 3-shot
484
+ type: pearson
485
+ value: 70.04
486
+ - name: 5-shot
487
+ type: pearson
488
+ value: 71.46
489
+
490
  ---
491
 
492
  # Model Card for Model ID