tiedeman commited on
Commit
cd73848
·
1 Parent(s): b2c0b05

Initial commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.spm filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,1357 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ language:
4
+ - bal
5
+ - de
6
+ - diq
7
+ - en
8
+ - es
9
+ - fa
10
+ - fr
11
+ - glk
12
+ - jdt
13
+ - ku
14
+ - lrc
15
+ - mzn
16
+ - os
17
+ - pal
18
+ - ps
19
+ - pt
20
+ - tg
21
+ - tly
22
+ - zza
23
+ language_bcp47:
24
+ - ku_Latn
25
+
26
+ tags:
27
+ - translation
28
+ - opus-mt-tc-bible
29
+
30
+ license: apache-2.0
31
+ model-index:
32
+ - name: opus-mt-tc-bible-big-ira-deu_eng_fra_por_spa
33
+ results:
34
+ - task:
35
+ name: Translation ckb-deu
36
+ type: translation
37
+ args: ckb-deu
38
+ dataset:
39
+ name: flores200-devtest
40
+ type: flores200-devtest
41
+ args: ckb-deu
42
+ metrics:
43
+ - name: BLEU
44
+ type: bleu
45
+ value: 11.7
46
+ - name: chr-F
47
+ type: chrf
48
+ value: 0.40369
49
+ - task:
50
+ name: Translation ckb-eng
51
+ type: translation
52
+ args: ckb-eng
53
+ dataset:
54
+ name: flores200-devtest
55
+ type: flores200-devtest
56
+ args: ckb-eng
57
+ metrics:
58
+ - name: BLEU
59
+ type: bleu
60
+ value: 21.5
61
+ - name: chr-F
62
+ type: chrf
63
+ value: 0.48447
64
+ - task:
65
+ name: Translation ckb-fra
66
+ type: translation
67
+ args: ckb-fra
68
+ dataset:
69
+ name: flores200-devtest
70
+ type: flores200-devtest
71
+ args: ckb-fra
72
+ metrics:
73
+ - name: BLEU
74
+ type: bleu
75
+ value: 17.1
76
+ - name: chr-F
77
+ type: chrf
78
+ value: 0.44026
79
+ - task:
80
+ name: Translation ckb-por
81
+ type: translation
82
+ args: ckb-por
83
+ dataset:
84
+ name: flores200-devtest
85
+ type: flores200-devtest
86
+ args: ckb-por
87
+ metrics:
88
+ - name: BLEU
89
+ type: bleu
90
+ value: 16.4
91
+ - name: chr-F
92
+ type: chrf
93
+ value: 0.43192
94
+ - task:
95
+ name: Translation ckb-spa
96
+ type: translation
97
+ args: ckb-spa
98
+ dataset:
99
+ name: flores200-devtest
100
+ type: flores200-devtest
101
+ args: ckb-spa
102
+ metrics:
103
+ - name: BLEU
104
+ type: bleu
105
+ value: 11.7
106
+ - name: chr-F
107
+ type: chrf
108
+ value: 0.38880
109
+ - task:
110
+ name: Translation kmr-eng
111
+ type: translation
112
+ args: kmr-eng
113
+ dataset:
114
+ name: flores200-devtest
115
+ type: flores200-devtest
116
+ args: kmr-eng
117
+ metrics:
118
+ - name: BLEU
119
+ type: bleu
120
+ value: 12.4
121
+ - name: chr-F
122
+ type: chrf
123
+ value: 0.37372
124
+ - task:
125
+ name: Translation pes-deu
126
+ type: translation
127
+ args: pes-deu
128
+ dataset:
129
+ name: flores200-devtest
130
+ type: flores200-devtest
131
+ args: pes-deu
132
+ metrics:
133
+ - name: BLEU
134
+ type: bleu
135
+ value: 21.5
136
+ - name: chr-F
137
+ type: chrf
138
+ value: 0.51542
139
+ - task:
140
+ name: Translation pes-eng
141
+ type: translation
142
+ args: pes-eng
143
+ dataset:
144
+ name: flores200-devtest
145
+ type: flores200-devtest
146
+ args: pes-eng
147
+ metrics:
148
+ - name: BLEU
149
+ type: bleu
150
+ value: 34.9
151
+ - name: chr-F
152
+ type: chrf
153
+ value: 0.61372
154
+ - task:
155
+ name: Translation pes-fra
156
+ type: translation
157
+ args: pes-fra
158
+ dataset:
159
+ name: flores200-devtest
160
+ type: flores200-devtest
161
+ args: pes-fra
162
+ metrics:
163
+ - name: BLEU
164
+ type: bleu
165
+ value: 29.2
166
+ - name: chr-F
167
+ type: chrf
168
+ value: 0.56347
169
+ - task:
170
+ name: Translation pes-por
171
+ type: translation
172
+ args: pes-por
173
+ dataset:
174
+ name: flores200-devtest
175
+ type: flores200-devtest
176
+ args: pes-por
177
+ metrics:
178
+ - name: BLEU
179
+ type: bleu
180
+ value: 28.5
181
+ - name: chr-F
182
+ type: chrf
183
+ value: 0.55676
184
+ - task:
185
+ name: Translation pes-spa
186
+ type: translation
187
+ args: pes-spa
188
+ dataset:
189
+ name: flores200-devtest
190
+ type: flores200-devtest
191
+ args: pes-spa
192
+ metrics:
193
+ - name: BLEU
194
+ type: bleu
195
+ value: 19.8
196
+ - name: chr-F
197
+ type: chrf
198
+ value: 0.48334
199
+ - task:
200
+ name: Translation prs-deu
201
+ type: translation
202
+ args: prs-deu
203
+ dataset:
204
+ name: flores200-devtest
205
+ type: flores200-devtest
206
+ args: prs-deu
207
+ metrics:
208
+ - name: BLEU
209
+ type: bleu
210
+ value: 21.2
211
+ - name: chr-F
212
+ type: chrf
213
+ value: 0.50562
214
+ - task:
215
+ name: Translation prs-eng
216
+ type: translation
217
+ args: prs-eng
218
+ dataset:
219
+ name: flores200-devtest
220
+ type: flores200-devtest
221
+ args: prs-eng
222
+ metrics:
223
+ - name: BLEU
224
+ type: bleu
225
+ value: 35.1
226
+ - name: chr-F
227
+ type: chrf
228
+ value: 0.60716
229
+ - task:
230
+ name: Translation prs-fra
231
+ type: translation
232
+ args: prs-fra
233
+ dataset:
234
+ name: flores200-devtest
235
+ type: flores200-devtest
236
+ args: prs-fra
237
+ metrics:
238
+ - name: BLEU
239
+ type: bleu
240
+ value: 27.8
241
+ - name: chr-F
242
+ type: chrf
243
+ value: 0.54769
244
+ - task:
245
+ name: Translation prs-por
246
+ type: translation
247
+ args: prs-por
248
+ dataset:
249
+ name: flores200-devtest
250
+ type: flores200-devtest
251
+ args: prs-por
252
+ metrics:
253
+ - name: BLEU
254
+ type: bleu
255
+ value: 27.2
256
+ - name: chr-F
257
+ type: chrf
258
+ value: 0.54073
259
+ - task:
260
+ name: Translation prs-spa
261
+ type: translation
262
+ args: prs-spa
263
+ dataset:
264
+ name: flores200-devtest
265
+ type: flores200-devtest
266
+ args: prs-spa
267
+ metrics:
268
+ - name: BLEU
269
+ type: bleu
270
+ value: 18.6
271
+ - name: chr-F
272
+ type: chrf
273
+ value: 0.46850
274
+ - task:
275
+ name: Translation tgk-deu
276
+ type: translation
277
+ args: tgk-deu
278
+ dataset:
279
+ name: flores200-devtest
280
+ type: flores200-devtest
281
+ args: tgk-deu
282
+ metrics:
283
+ - name: BLEU
284
+ type: bleu
285
+ value: 14.2
286
+ - name: chr-F
287
+ type: chrf
288
+ value: 0.43115
289
+ - task:
290
+ name: Translation tgk-eng
291
+ type: translation
292
+ args: tgk-eng
293
+ dataset:
294
+ name: flores200-devtest
295
+ type: flores200-devtest
296
+ args: tgk-eng
297
+ metrics:
298
+ - name: BLEU
299
+ type: bleu
300
+ value: 25.6
301
+ - name: chr-F
302
+ type: chrf
303
+ value: 0.53705
304
+ - task:
305
+ name: Translation tgk-fra
306
+ type: translation
307
+ args: tgk-fra
308
+ dataset:
309
+ name: flores200-devtest
310
+ type: flores200-devtest
311
+ args: tgk-fra
312
+ metrics:
313
+ - name: BLEU
314
+ type: bleu
315
+ value: 20.7
316
+ - name: chr-F
317
+ type: chrf
318
+ value: 0.48902
319
+ - task:
320
+ name: Translation tgk-por
321
+ type: translation
322
+ args: tgk-por
323
+ dataset:
324
+ name: flores200-devtest
325
+ type: flores200-devtest
326
+ args: tgk-por
327
+ metrics:
328
+ - name: BLEU
329
+ type: bleu
330
+ value: 20.7
331
+ - name: chr-F
332
+ type: chrf
333
+ value: 0.48519
334
+ - task:
335
+ name: Translation tgk-spa
336
+ type: translation
337
+ args: tgk-spa
338
+ dataset:
339
+ name: flores200-devtest
340
+ type: flores200-devtest
341
+ args: tgk-spa
342
+ metrics:
343
+ - name: BLEU
344
+ type: bleu
345
+ value: 15.7
346
+ - name: chr-F
347
+ type: chrf
348
+ value: 0.43563
349
+ - task:
350
+ name: Translation ckb-deu
351
+ type: translation
352
+ args: ckb-deu
353
+ dataset:
354
+ name: flores101-devtest
355
+ type: flores_101
356
+ args: ckb deu devtest
357
+ metrics:
358
+ - name: BLEU
359
+ type: bleu
360
+ value: 11.6
361
+ - name: chr-F
362
+ type: chrf
363
+ value: 0.40117
364
+ - task:
365
+ name: Translation ckb-eng
366
+ type: translation
367
+ args: ckb-eng
368
+ dataset:
369
+ name: flores101-devtest
370
+ type: flores_101
371
+ args: ckb eng devtest
372
+ metrics:
373
+ - name: BLEU
374
+ type: bleu
375
+ value: 21.6
376
+ - name: chr-F
377
+ type: chrf
378
+ value: 0.48321
379
+ - task:
380
+ name: Translation ckb-fra
381
+ type: translation
382
+ args: ckb-fra
383
+ dataset:
384
+ name: flores101-devtest
385
+ type: flores_101
386
+ args: ckb fra devtest
387
+ metrics:
388
+ - name: BLEU
389
+ type: bleu
390
+ value: 17.2
391
+ - name: chr-F
392
+ type: chrf
393
+ value: 0.44260
394
+ - task:
395
+ name: Translation ckb-por
396
+ type: translation
397
+ args: ckb-por
398
+ dataset:
399
+ name: flores101-devtest
400
+ type: flores_101
401
+ args: ckb por devtest
402
+ metrics:
403
+ - name: BLEU
404
+ type: bleu
405
+ value: 16.2
406
+ - name: chr-F
407
+ type: chrf
408
+ value: 0.43179
409
+ - task:
410
+ name: Translation fas-eng
411
+ type: translation
412
+ args: fas-eng
413
+ dataset:
414
+ name: flores101-devtest
415
+ type: flores_101
416
+ args: fas eng devtest
417
+ metrics:
418
+ - name: BLEU
419
+ type: bleu
420
+ value: 34.4
421
+ - name: chr-F
422
+ type: chrf
423
+ value: 0.61134
424
+ - task:
425
+ name: Translation pus-eng
426
+ type: translation
427
+ args: pus-eng
428
+ dataset:
429
+ name: flores101-devtest
430
+ type: flores_101
431
+ args: pus eng devtest
432
+ metrics:
433
+ - name: BLEU
434
+ type: bleu
435
+ value: 22.7
436
+ - name: chr-F
437
+ type: chrf
438
+ value: 0.49556
439
+ - task:
440
+ name: Translation pus-fra
441
+ type: translation
442
+ args: pus-fra
443
+ dataset:
444
+ name: flores101-devtest
445
+ type: flores_101
446
+ args: pus fra devtest
447
+ metrics:
448
+ - name: BLEU
449
+ type: bleu
450
+ value: 17.8
451
+ - name: chr-F
452
+ type: chrf
453
+ value: 0.45248
454
+ - task:
455
+ name: Translation tgk-eng
456
+ type: translation
457
+ args: tgk-eng
458
+ dataset:
459
+ name: flores101-devtest
460
+ type: flores_101
461
+ args: tgk eng devtest
462
+ metrics:
463
+ - name: BLEU
464
+ type: bleu
465
+ value: 25.4
466
+ - name: chr-F
467
+ type: chrf
468
+ value: 0.53630
469
+ - task:
470
+ name: Translation tgk-fra
471
+ type: translation
472
+ args: tgk-fra
473
+ dataset:
474
+ name: flores101-devtest
475
+ type: flores_101
476
+ args: tgk fra devtest
477
+ metrics:
478
+ - name: BLEU
479
+ type: bleu
480
+ value: 21.0
481
+ - name: chr-F
482
+ type: chrf
483
+ value: 0.49084
484
+ - task:
485
+ name: Translation tgk-spa
486
+ type: translation
487
+ args: tgk-spa
488
+ dataset:
489
+ name: flores101-devtest
490
+ type: flores_101
491
+ args: tgk spa devtest
492
+ metrics:
493
+ - name: BLEU
494
+ type: bleu
495
+ value: 15.5
496
+ - name: chr-F
497
+ type: chrf
498
+ value: 0.43524
499
+ - task:
500
+ name: Translation fas-deu
501
+ type: translation
502
+ args: fas-deu
503
+ dataset:
504
+ name: ntrex128
505
+ type: ntrex128
506
+ args: fas-deu
507
+ metrics:
508
+ - name: BLEU
509
+ type: bleu
510
+ value: 16.7
511
+ - name: chr-F
512
+ type: chrf
513
+ value: 0.47408
514
+ - task:
515
+ name: Translation fas-eng
516
+ type: translation
517
+ args: fas-eng
518
+ dataset:
519
+ name: ntrex128
520
+ type: ntrex128
521
+ args: fas-eng
522
+ metrics:
523
+ - name: BLEU
524
+ type: bleu
525
+ value: 26.4
526
+ - name: chr-F
527
+ type: chrf
528
+ value: 0.55350
529
+ - task:
530
+ name: Translation fas-fra
531
+ type: translation
532
+ args: fas-fra
533
+ dataset:
534
+ name: ntrex128
535
+ type: ntrex128
536
+ args: fas-fra
537
+ metrics:
538
+ - name: BLEU
539
+ type: bleu
540
+ value: 22.1
541
+ - name: chr-F
542
+ type: chrf
543
+ value: 0.50311
544
+ - task:
545
+ name: Translation fas-por
546
+ type: translation
547
+ args: fas-por
548
+ dataset:
549
+ name: ntrex128
550
+ type: ntrex128
551
+ args: fas-por
552
+ metrics:
553
+ - name: BLEU
554
+ type: bleu
555
+ value: 19.1
556
+ - name: chr-F
557
+ type: chrf
558
+ value: 0.48005
559
+ - task:
560
+ name: Translation fas-spa
561
+ type: translation
562
+ args: fas-spa
563
+ dataset:
564
+ name: ntrex128
565
+ type: ntrex128
566
+ args: fas-spa
567
+ metrics:
568
+ - name: BLEU
569
+ type: bleu
570
+ value: 23.6
571
+ - name: chr-F
572
+ type: chrf
573
+ value: 0.50973
574
+ - task:
575
+ name: Translation kmr-eng
576
+ type: translation
577
+ args: kmr-eng
578
+ dataset:
579
+ name: ntrex128
580
+ type: ntrex128
581
+ args: kmr-eng
582
+ metrics:
583
+ - name: BLEU
584
+ type: bleu
585
+ value: 12.8
586
+ - name: chr-F
587
+ type: chrf
588
+ value: 0.38189
589
+ - task:
590
+ name: Translation prs-deu
591
+ type: translation
592
+ args: prs-deu
593
+ dataset:
594
+ name: ntrex128
595
+ type: ntrex128
596
+ args: prs-deu
597
+ metrics:
598
+ - name: BLEU
599
+ type: bleu
600
+ value: 14.9
601
+ - name: chr-F
602
+ type: chrf
603
+ value: 0.45191
604
+ - task:
605
+ name: Translation prs-eng
606
+ type: translation
607
+ args: prs-eng
608
+ dataset:
609
+ name: ntrex128
610
+ type: ntrex128
611
+ args: prs-eng
612
+ metrics:
613
+ - name: BLEU
614
+ type: bleu
615
+ value: 26.6
616
+ - name: chr-F
617
+ type: chrf
618
+ value: 0.54761
619
+ - task:
620
+ name: Translation prs-fra
621
+ type: translation
622
+ args: prs-fra
623
+ dataset:
624
+ name: ntrex128
625
+ type: ntrex128
626
+ args: prs-fra
627
+ metrics:
628
+ - name: BLEU
629
+ type: bleu
630
+ value: 19.9
631
+ - name: chr-F
632
+ type: chrf
633
+ value: 0.47819
634
+ - task:
635
+ name: Translation prs-por
636
+ type: translation
637
+ args: prs-por
638
+ dataset:
639
+ name: ntrex128
640
+ type: ntrex128
641
+ args: prs-por
642
+ metrics:
643
+ - name: BLEU
644
+ type: bleu
645
+ value: 17.4
646
+ - name: chr-F
647
+ type: chrf
648
+ value: 0.46241
649
+ - task:
650
+ name: Translation prs-spa
651
+ type: translation
652
+ args: prs-spa
653
+ dataset:
654
+ name: ntrex128
655
+ type: ntrex128
656
+ args: prs-spa
657
+ metrics:
658
+ - name: BLEU
659
+ type: bleu
660
+ value: 21.4
661
+ - name: chr-F
662
+ type: chrf
663
+ value: 0.48712
664
+ - task:
665
+ name: Translation pus-eng
666
+ type: translation
667
+ args: pus-eng
668
+ dataset:
669
+ name: ntrex128
670
+ type: ntrex128
671
+ args: pus-eng
672
+ metrics:
673
+ - name: BLEU
674
+ type: bleu
675
+ value: 17.4
676
+ - name: chr-F
677
+ type: chrf
678
+ value: 0.43901
679
+ - task:
680
+ name: Translation pus-fra
681
+ type: translation
682
+ args: pus-fra
683
+ dataset:
684
+ name: ntrex128
685
+ type: ntrex128
686
+ args: pus-fra
687
+ metrics:
688
+ - name: BLEU
689
+ type: bleu
690
+ value: 12.4
691
+ - name: chr-F
692
+ type: chrf
693
+ value: 0.39661
694
+ - task:
695
+ name: Translation pus-por
696
+ type: translation
697
+ args: pus-por
698
+ dataset:
699
+ name: ntrex128
700
+ type: ntrex128
701
+ args: pus-por
702
+ metrics:
703
+ - name: BLEU
704
+ type: bleu
705
+ value: 11.4
706
+ - name: chr-F
707
+ type: chrf
708
+ value: 0.38694
709
+ - task:
710
+ name: Translation pus-spa
711
+ type: translation
712
+ args: pus-spa
713
+ dataset:
714
+ name: ntrex128
715
+ type: ntrex128
716
+ args: pus-spa
717
+ metrics:
718
+ - name: BLEU
719
+ type: bleu
720
+ value: 14.1
721
+ - name: chr-F
722
+ type: chrf
723
+ value: 0.40812
724
+ - task:
725
+ name: Translation tgk_Cyrl-deu
726
+ type: translation
727
+ args: tgk_Cyrl-deu
728
+ dataset:
729
+ name: ntrex128
730
+ type: ntrex128
731
+ args: tgk_Cyrl-deu
732
+ metrics:
733
+ - name: BLEU
734
+ type: bleu
735
+ value: 10.7
736
+ - name: chr-F
737
+ type: chrf
738
+ value: 0.38740
739
+ - task:
740
+ name: Translation tgk_Cyrl-eng
741
+ type: translation
742
+ args: tgk_Cyrl-eng
743
+ dataset:
744
+ name: ntrex128
745
+ type: ntrex128
746
+ args: tgk_Cyrl-eng
747
+ metrics:
748
+ - name: BLEU
749
+ type: bleu
750
+ value: 18.6
751
+ - name: chr-F
752
+ type: chrf
753
+ value: 0.46839
754
+ - task:
755
+ name: Translation tgk_Cyrl-fra
756
+ type: translation
757
+ args: tgk_Cyrl-fra
758
+ dataset:
759
+ name: ntrex128
760
+ type: ntrex128
761
+ args: tgk_Cyrl-fra
762
+ metrics:
763
+ - name: BLEU
764
+ type: bleu
765
+ value: 15.1
766
+ - name: chr-F
767
+ type: chrf
768
+ value: 0.42569
769
+ - task:
770
+ name: Translation tgk_Cyrl-por
771
+ type: translation
772
+ args: tgk_Cyrl-por
773
+ dataset:
774
+ name: ntrex128
775
+ type: ntrex128
776
+ args: tgk_Cyrl-por
777
+ metrics:
778
+ - name: BLEU
779
+ type: bleu
780
+ value: 13.7
781
+ - name: chr-F
782
+ type: chrf
783
+ value: 0.41632
784
+ - task:
785
+ name: Translation tgk_Cyrl-spa
786
+ type: translation
787
+ args: tgk_Cyrl-spa
788
+ dataset:
789
+ name: ntrex128
790
+ type: ntrex128
791
+ args: tgk_Cyrl-spa
792
+ metrics:
793
+ - name: BLEU
794
+ type: bleu
795
+ value: 16.8
796
+ - name: chr-F
797
+ type: chrf
798
+ value: 0.43763
799
+ - task:
800
+ name: Translation fas-deu
801
+ type: translation
802
+ args: fas-deu
803
+ dataset:
804
+ name: tatoeba-test-v2021-08-07
805
+ type: tatoeba_mt
806
+ args: fas-deu
807
+ metrics:
808
+ - name: BLEU
809
+ type: bleu
810
+ value: 36.1
811
+ - name: chr-F
812
+ type: chrf
813
+ value: 0.59737
814
+ - task:
815
+ name: Translation fas-eng
816
+ type: translation
817
+ args: fas-eng
818
+ dataset:
819
+ name: tatoeba-test-v2021-08-07
820
+ type: tatoeba_mt
821
+ args: fas-eng
822
+ metrics:
823
+ - name: BLEU
824
+ type: bleu
825
+ value: 35.8
826
+ - name: chr-F
827
+ type: chrf
828
+ value: 0.59871
829
+ - task:
830
+ name: Translation fas-fra
831
+ type: translation
832
+ args: fas-fra
833
+ dataset:
834
+ name: tatoeba-test-v2021-08-07
835
+ type: tatoeba_mt
836
+ args: fas-fra
837
+ metrics:
838
+ - name: BLEU
839
+ type: bleu
840
+ value: 36.3
841
+ - name: chr-F
842
+ type: chrf
843
+ value: 0.58095
844
+ - task:
845
+ name: Translation kur_Latn-deu
846
+ type: translation
847
+ args: kur_Latn-deu
848
+ dataset:
849
+ name: tatoeba-test-v2021-08-07
850
+ type: tatoeba_mt
851
+ args: kur_Latn-deu
852
+ metrics:
853
+ - name: BLEU
854
+ type: bleu
855
+ value: 24.9
856
+ - name: chr-F
857
+ type: chrf
858
+ value: 0.40276
859
+ - task:
860
+ name: Translation multi-multi
861
+ type: translation
862
+ args: multi-multi
863
+ dataset:
864
+ name: tatoeba-test-v2020-07-28-v2023-09-26
865
+ type: tatoeba_mt
866
+ args: multi-multi
867
+ metrics:
868
+ - name: BLEU
869
+ type: bleu
870
+ value: 34.0
871
+ - name: chr-F
872
+ type: chrf
873
+ value: 0.56042
874
+ - task:
875
+ name: Translation pes-eng
876
+ type: translation
877
+ args: pes-eng
878
+ dataset:
879
+ name: tatoeba-test-v2021-08-07
880
+ type: tatoeba_mt
881
+ args: pes-eng
882
+ metrics:
883
+ - name: BLEU
884
+ type: bleu
885
+ value: 42.3
886
+ - name: chr-F
887
+ type: chrf
888
+ value: 0.60717
889
+ - task:
890
+ name: Translation ckb-eng
891
+ type: translation
892
+ args: ckb-eng
893
+ dataset:
894
+ name: tico19-test
895
+ type: tico19-test
896
+ args: ckb-eng
897
+ metrics:
898
+ - name: BLEU
899
+ type: bleu
900
+ value: 40.1
901
+ - name: chr-F
902
+ type: chrf
903
+ value: 0.61905
904
+ - task:
905
+ name: Translation ckb-fra
906
+ type: translation
907
+ args: ckb-fra
908
+ dataset:
909
+ name: tico19-test
910
+ type: tico19-test
911
+ args: ckb-fra
912
+ metrics:
913
+ - name: BLEU
914
+ type: bleu
915
+ value: 19.7
916
+ - name: chr-F
917
+ type: chrf
918
+ value: 0.45070
919
+ - task:
920
+ name: Translation ckb-por
921
+ type: translation
922
+ args: ckb-por
923
+ dataset:
924
+ name: tico19-test
925
+ type: tico19-test
926
+ args: ckb-por
927
+ metrics:
928
+ - name: BLEU
929
+ type: bleu
930
+ value: 22.9
931
+ - name: chr-F
932
+ type: chrf
933
+ value: 0.49617
934
+ - task:
935
+ name: Translation ckb-spa
936
+ type: translation
937
+ args: ckb-spa
938
+ dataset:
939
+ name: tico19-test
940
+ type: tico19-test
941
+ args: ckb-spa
942
+ metrics:
943
+ - name: BLEU
944
+ type: bleu
945
+ value: 24.9
946
+ - name: chr-F
947
+ type: chrf
948
+ value: 0.50543
949
+ - task:
950
+ name: Translation fas-eng
951
+ type: translation
952
+ args: fas-eng
953
+ dataset:
954
+ name: tico19-test
955
+ type: tico19-test
956
+ args: fas-eng
957
+ metrics:
958
+ - name: BLEU
959
+ type: bleu
960
+ value: 37.3
961
+ - name: chr-F
962
+ type: chrf
963
+ value: 0.64016
964
+ - task:
965
+ name: Translation fas-fra
966
+ type: translation
967
+ args: fas-fra
968
+ dataset:
969
+ name: tico19-test
970
+ type: tico19-test
971
+ args: fas-fra
972
+ metrics:
973
+ - name: BLEU
974
+ type: bleu
975
+ value: 26.1
976
+ - name: chr-F
977
+ type: chrf
978
+ value: 0.53319
979
+ - task:
980
+ name: Translation fas-por
981
+ type: translation
982
+ args: fas-por
983
+ dataset:
984
+ name: tico19-test
985
+ type: tico19-test
986
+ args: fas-por
987
+ metrics:
988
+ - name: BLEU
989
+ type: bleu
990
+ value: 30.6
991
+ - name: chr-F
992
+ type: chrf
993
+ value: 0.58008
994
+ - task:
995
+ name: Translation fas-spa
996
+ type: translation
997
+ args: fas-spa
998
+ dataset:
999
+ name: tico19-test
1000
+ type: tico19-test
1001
+ args: fas-spa
1002
+ metrics:
1003
+ - name: BLEU
1004
+ type: bleu
1005
+ value: 33.3
1006
+ - name: chr-F
1007
+ type: chrf
1008
+ value: 0.59239
1009
+ - task:
1010
+ name: Translation prs-eng
1011
+ type: translation
1012
+ args: prs-eng
1013
+ dataset:
1014
+ name: tico19-test
1015
+ type: tico19-test
1016
+ args: prs-eng
1017
+ metrics:
1018
+ - name: BLEU
1019
+ type: bleu
1020
+ value: 34.8
1021
+ - name: chr-F
1022
+ type: chrf
1023
+ value: 0.61702
1024
+ - task:
1025
+ name: Translation prs-fra
1026
+ type: translation
1027
+ args: prs-fra
1028
+ dataset:
1029
+ name: tico19-test
1030
+ type: tico19-test
1031
+ args: prs-fra
1032
+ metrics:
1033
+ - name: BLEU
1034
+ type: bleu
1035
+ value: 24.0
1036
+ - name: chr-F
1037
+ type: chrf
1038
+ value: 0.51218
1039
+ - task:
1040
+ name: Translation prs-por
1041
+ type: translation
1042
+ args: prs-por
1043
+ dataset:
1044
+ name: tico19-test
1045
+ type: tico19-test
1046
+ args: prs-por
1047
+ metrics:
1048
+ - name: BLEU
1049
+ type: bleu
1050
+ value: 28.6
1051
+ - name: chr-F
1052
+ type: chrf
1053
+ value: 0.55888
1054
+ - task:
1055
+ name: Translation prs-spa
1056
+ type: translation
1057
+ args: prs-spa
1058
+ dataset:
1059
+ name: tico19-test
1060
+ type: tico19-test
1061
+ args: prs-spa
1062
+ metrics:
1063
+ - name: BLEU
1064
+ type: bleu
1065
+ value: 31.1
1066
+ - name: chr-F
1067
+ type: chrf
1068
+ value: 0.57494
1069
+ - task:
1070
+ name: Translation pus-eng
1071
+ type: translation
1072
+ args: pus-eng
1073
+ dataset:
1074
+ name: tico19-test
1075
+ type: tico19-test
1076
+ args: pus-eng
1077
+ metrics:
1078
+ - name: BLEU
1079
+ type: bleu
1080
+ value: 32.1
1081
+ - name: chr-F
1082
+ type: chrf
1083
+ value: 0.57586
1084
+ - task:
1085
+ name: Translation pus-fra
1086
+ type: translation
1087
+ args: pus-fra
1088
+ dataset:
1089
+ name: tico19-test
1090
+ type: tico19-test
1091
+ args: pus-fra
1092
+ metrics:
1093
+ - name: BLEU
1094
+ type: bleu
1095
+ value: 19.2
1096
+ - name: chr-F
1097
+ type: chrf
1098
+ value: 0.46091
1099
+ - task:
1100
+ name: Translation pus-por
1101
+ type: translation
1102
+ args: pus-por
1103
+ dataset:
1104
+ name: tico19-test
1105
+ type: tico19-test
1106
+ args: pus-por
1107
+ metrics:
1108
+ - name: BLEU
1109
+ type: bleu
1110
+ value: 24.1
1111
+ - name: chr-F
1112
+ type: chrf
1113
+ value: 0.51033
1114
+ - task:
1115
+ name: Translation pus-spa
1116
+ type: translation
1117
+ args: pus-spa
1118
+ dataset:
1119
+ name: tico19-test
1120
+ type: tico19-test
1121
+ args: pus-spa
1122
+ metrics:
1123
+ - name: BLEU
1124
+ type: bleu
1125
+ value: 25.9
1126
+ - name: chr-F
1127
+ type: chrf
1128
+ value: 0.51857
1129
+ - task:
1130
+ name: Translation pus-eng
1131
+ type: translation
1132
+ args: pus-eng
1133
+ dataset:
1134
+ name: newstest2020
1135
+ type: wmt-2020-news
1136
+ args: pus-eng
1137
+ metrics:
1138
+ - name: BLEU
1139
+ type: bleu
1140
+ value: 13.1
1141
+ - name: chr-F
1142
+ type: chrf
1143
+ value: 0.37487
1144
+ ---
1145
+ # opus-mt-tc-bible-big-ira-deu_eng_fra_por_spa
1146
+
1147
+ ## Table of Contents
1148
+ - [Model Details](#model-details)
1149
+ - [Uses](#uses)
1150
+ - [Risks, Limitations and Biases](#risks-limitations-and-biases)
1151
+ - [How to Get Started With the Model](#how-to-get-started-with-the-model)
1152
+ - [Training](#training)
1153
+ - [Evaluation](#evaluation)
1154
+ - [Citation Information](#citation-information)
1155
+ - [Acknowledgements](#acknowledgements)
1156
+
1157
+ ## Model Details
1158
+
1159
+ Neural machine translation model for translating from Iranian languages (ira) to unknown (deu+eng+fra+por+spa).
1160
+
1161
+ This model is part of the [OPUS-MT project](https://github.com/Helsinki-NLP/Opus-MT), an effort to make neural machine translation models widely available and accessible for many languages in the world. All models are originally trained using the amazing framework of [Marian NMT](https://marian-nmt.github.io/), an efficient NMT implementation written in pure C++. The models have been converted to pyTorch using the transformers library by huggingface. Training data is taken from [OPUS](https://opus.nlpl.eu/) and training pipelines use the procedures of [OPUS-MT-train](https://github.com/Helsinki-NLP/Opus-MT-train).
1162
+ **Model Description:**
1163
+ - **Developed by:** Language Technology Research Group at the University of Helsinki
1164
+ - **Model Type:** Translation (transformer-big)
1165
+ - **Release**: 2024-05-30
1166
+ - **License:** Apache-2.0
1167
+ - **Language(s):**
1168
+ - Source Language(s): bal ckb diq fas glk jdt kmr kur lrc mzn oss pal pes prs pus sdh tgk tly zza
1169
+ - Target Language(s): deu eng fra por spa
1170
+ - Valid Target Language Labels: >>deu<< >>eng<< >>fra<< >>por<< >>spa<< >>xxx<<
1171
+ - **Original Model**: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip](https://object.pouta.csc.fi/Tatoeba-MT-models/ira-deu+eng+fra+por+spa/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip)
1172
+ - **Resources for more information:**
1173
+ - [OPUS-MT dashboard](https://opus.nlpl.eu/dashboard/index.php?pkg=opusmt&test=all&scoreslang=all&chart=standard&model=Tatoeba-MT-models/ira-deu%2Beng%2Bfra%2Bpor%2Bspa/opusTCv20230926max50%2Bbt%2Bjhubc_transformer-big_2024-05-30)
1174
+ - [OPUS-MT-train GitHub Repo](https://github.com/Helsinki-NLP/OPUS-MT-train)
1175
+ - [More information about MarianNMT models in the transformers library](https://huggingface.co/docs/transformers/model_doc/marian)
1176
+ - [Tatoeba Translation Challenge](https://github.com/Helsinki-NLP/Tatoeba-Challenge/)
1177
+ - [HPLT bilingual data v1 (as part of the Tatoeba Translation Challenge dataset)](https://hplt-project.org/datasets/v1)
1178
+ - [A massively parallel Bible corpus](https://aclanthology.org/L14-1215/)
1179
+
1180
+ This is a multilingual translation model with multiple target languages. A sentence initial language token is required in the form of `>>id<<` (id = valid target language ID), e.g. `>>deu<<`
1181
+
1182
+ ## Uses
1183
+
1184
+ This model can be used for translation and text-to-text generation.
1185
+
1186
+ ## Risks, Limitations and Biases
1187
+
1188
+ **CONTENT WARNING: Readers should be aware that the model is trained on various public data sets that may contain content that is disturbing, offensive, and can propagate historical and current stereotypes.**
1189
+
1190
+ Significant research has explored bias and fairness issues with language models (see, e.g., [Sheng et al. (2021)](https://aclanthology.org/2021.acl-long.330.pdf) and [Bender et al. (2021)](https://dl.acm.org/doi/pdf/10.1145/3442188.3445922)).
1191
+
1192
+ ## How to Get Started With the Model
1193
+
1194
+ A short example code:
1195
+
1196
+ ```python
1197
+ from transformers import MarianMTModel, MarianTokenizer
1198
+
1199
+ src_text = [
1200
+ ">>deu<< Replace this with text in an accepted source language.",
1201
+ ">>spa<< This is the second sentence."
1202
+ ]
1203
+
1204
+ model_name = "pytorch-models/opus-mt-tc-bible-big-ira-deu_eng_fra_por_spa"
1205
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
1206
+ model = MarianMTModel.from_pretrained(model_name)
1207
+ translated = model.generate(**tokenizer(src_text, return_tensors="pt", padding=True))
1208
+
1209
+ for t in translated:
1210
+ print( tokenizer.decode(t, skip_special_tokens=True) )
1211
+ ```
1212
+
1213
+ You can also use OPUS-MT models with the transformers pipelines, for example:
1214
+
1215
+ ```python
1216
+ from transformers import pipeline
1217
+ pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-bible-big-ira-deu_eng_fra_por_spa")
1218
+ print(pipe(">>deu<< Replace this with text in an accepted source language."))
1219
+ ```
1220
+
1221
+ ## Training
1222
+
1223
+ - **Data**: opusTCv20230926max50+bt+jhubc ([source](https://github.com/Helsinki-NLP/Tatoeba-Challenge))
1224
+ - **Pre-processing**: SentencePiece (spm32k,spm32k)
1225
+ - **Model Type:** transformer-big
1226
+ - **Original MarianNMT Model**: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip](https://object.pouta.csc.fi/Tatoeba-MT-models/ira-deu+eng+fra+por+spa/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip)
1227
+ - **Training Scripts**: [GitHub Repo](https://github.com/Helsinki-NLP/OPUS-MT-train)
1228
+
1229
+ ## Evaluation
1230
+
1231
+ * [Model scores at the OPUS-MT dashboard](https://opus.nlpl.eu/dashboard/index.php?pkg=opusmt&test=all&scoreslang=all&chart=standard&model=Tatoeba-MT-models/ira-deu%2Beng%2Bfra%2Bpor%2Bspa/opusTCv20230926max50%2Bbt%2Bjhubc_transformer-big_2024-05-30)
1232
+ * test set translations: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.test.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/ira-deu+eng+fra+por+spa/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.test.txt)
1233
+ * test set scores: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.eval.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/ira-deu+eng+fra+por+spa/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.eval.txt)
1234
+ * benchmark results: [benchmark_results.txt](benchmark_results.txt)
1235
+ * benchmark output: [benchmark_translations.zip](benchmark_translations.zip)
1236
+
1237
+ | langpair | testset | chr-F | BLEU | #sent | #words |
1238
+ |----------|---------|-------|-------|-------|--------|
1239
+ | fas-deu | tatoeba-test-v2021-08-07 | 0.59737 | 36.1 | 3185 | 25590 |
1240
+ | fas-eng | tatoeba-test-v2021-08-07 | 0.59871 | 35.8 | 3762 | 31480 |
1241
+ | fas-fra | tatoeba-test-v2021-08-07 | 0.58095 | 36.3 | 376 | 3377 |
1242
+ | kur_Latn-deu | tatoeba-test-v2021-08-07 | 0.40276 | 24.9 | 223 | 1323 |
1243
+ | pes-eng | tatoeba-test-v2021-08-07 | 0.60717 | 42.3 | 3757 | 31411 |
1244
+ | ckb-deu | flores101-devtest | 0.40117 | 11.6 | 1012 | 25094 |
1245
+ | ckb-eng | flores101-devtest | 0.48321 | 21.6 | 1012 | 24721 |
1246
+ | ckb-fra | flores101-devtest | 0.44260 | 17.2 | 1012 | 28343 |
1247
+ | ckb-por | flores101-devtest | 0.43179 | 16.2 | 1012 | 26519 |
1248
+ | fas-eng | flores101-devtest | 0.61134 | 34.4 | 1012 | 24721 |
1249
+ | pus-eng | flores101-devtest | 0.49556 | 22.7 | 1012 | 24721 |
1250
+ | pus-fra | flores101-devtest | 0.45248 | 17.8 | 1012 | 28343 |
1251
+ | tgk-eng | flores101-devtest | 0.53630 | 25.4 | 1012 | 24721 |
1252
+ | tgk-fra | flores101-devtest | 0.49084 | 21.0 | 1012 | 28343 |
1253
+ | tgk-spa | flores101-devtest | 0.43524 | 15.5 | 1012 | 29199 |
1254
+ | ckb-deu | flores200-devtest | 0.40369 | 11.7 | 1012 | 25094 |
1255
+ | ckb-eng | flores200-devtest | 0.48447 | 21.5 | 1012 | 24721 |
1256
+ | ckb-fra | flores200-devtest | 0.44026 | 17.1 | 1012 | 28343 |
1257
+ | ckb-por | flores200-devtest | 0.43192 | 16.4 | 1012 | 26519 |
1258
+ | pes-deu | flores200-devtest | 0.51542 | 21.5 | 1012 | 25094 |
1259
+ | pes-eng | flores200-devtest | 0.61372 | 34.9 | 1012 | 24721 |
1260
+ | pes-fra | flores200-devtest | 0.56347 | 29.2 | 1012 | 28343 |
1261
+ | pes-por | flores200-devtest | 0.55676 | 28.5 | 1012 | 26519 |
1262
+ | pes-spa | flores200-devtest | 0.48334 | 19.8 | 1012 | 29199 |
1263
+ | prs-deu | flores200-devtest | 0.50562 | 21.2 | 1012 | 25094 |
1264
+ | prs-eng | flores200-devtest | 0.60716 | 35.1 | 1012 | 24721 |
1265
+ | prs-fra | flores200-devtest | 0.54769 | 27.8 | 1012 | 28343 |
1266
+ | prs-por | flores200-devtest | 0.54073 | 27.2 | 1012 | 26519 |
1267
+ | prs-spa | flores200-devtest | 0.46850 | 18.6 | 1012 | 29199 |
1268
+ | tgk-deu | flores200-devtest | 0.43115 | 14.2 | 1012 | 25094 |
1269
+ | tgk-eng | flores200-devtest | 0.53705 | 25.6 | 1012 | 24721 |
1270
+ | tgk-fra | flores200-devtest | 0.48902 | 20.7 | 1012 | 28343 |
1271
+ | tgk-por | flores200-devtest | 0.48519 | 20.7 | 1012 | 26519 |
1272
+ | tgk-spa | flores200-devtest | 0.43563 | 15.7 | 1012 | 29199 |
1273
+ | fas-deu | ntrex128 | 0.47408 | 16.7 | 1997 | 48761 |
1274
+ | fas-eng | ntrex128 | 0.55350 | 26.4 | 1997 | 47673 |
1275
+ | fas-fra | ntrex128 | 0.50311 | 22.1 | 1997 | 53481 |
1276
+ | fas-por | ntrex128 | 0.48005 | 19.1 | 1997 | 51631 |
1277
+ | fas-spa | ntrex128 | 0.50973 | 23.6 | 1997 | 54107 |
1278
+ | prs-deu | ntrex128 | 0.45191 | 14.9 | 1997 | 48761 |
1279
+ | prs-eng | ntrex128 | 0.54761 | 26.6 | 1997 | 47673 |
1280
+ | prs-fra | ntrex128 | 0.47819 | 19.9 | 1997 | 53481 |
1281
+ | prs-por | ntrex128 | 0.46241 | 17.4 | 1997 | 51631 |
1282
+ | prs-spa | ntrex128 | 0.48712 | 21.4 | 1997 | 54107 |
1283
+ | pus-eng | ntrex128 | 0.43901 | 17.4 | 1997 | 47673 |
1284
+ | pus-spa | ntrex128 | 0.40812 | 14.1 | 1997 | 54107 |
1285
+ | tgk_Cyrl-eng | ntrex128 | 0.46839 | 18.6 | 1997 | 47673 |
1286
+ | tgk_Cyrl-fra | ntrex128 | 0.42569 | 15.1 | 1997 | 53481 |
1287
+ | tgk_Cyrl-por | ntrex128 | 0.41632 | 13.7 | 1997 | 51631 |
1288
+ | tgk_Cyrl-spa | ntrex128 | 0.43763 | 16.8 | 1997 | 54107 |
1289
+ | ckb-eng | tico19-test | 0.61905 | 40.1 | 2100 | 56315 |
1290
+ | ckb-fra | tico19-test | 0.45070 | 19.7 | 2100 | 64661 |
1291
+ | ckb-por | tico19-test | 0.49617 | 22.9 | 2100 | 62729 |
1292
+ | ckb-spa | tico19-test | 0.50543 | 24.9 | 2100 | 66563 |
1293
+ | fas-eng | tico19-test | 0.64016 | 37.3 | 2100 | 56315 |
1294
+ | fas-fra | tico19-test | 0.53319 | 26.1 | 2100 | 64661 |
1295
+ | fas-por | tico19-test | 0.58008 | 30.6 | 2100 | 62729 |
1296
+ | fas-spa | tico19-test | 0.59239 | 33.3 | 2100 | 66563 |
1297
+ | prs-eng | tico19-test | 0.61702 | 34.8 | 2100 | 56824 |
1298
+ | prs-fra | tico19-test | 0.51218 | 24.0 | 2100 | 64661 |
1299
+ | prs-por | tico19-test | 0.55888 | 28.6 | 2100 | 62729 |
1300
+ | prs-spa | tico19-test | 0.57494 | 31.1 | 2100 | 66563 |
1301
+ | pus-eng | tico19-test | 0.57586 | 32.1 | 2100 | 56315 |
1302
+ | pus-fra | tico19-test | 0.46091 | 19.2 | 2100 | 64661 |
1303
+ | pus-por | tico19-test | 0.51033 | 24.1 | 2100 | 62729 |
1304
+ | pus-spa | tico19-test | 0.51857 | 25.9 | 2100 | 66563 |
1305
+
1306
+ ## Citation Information
1307
+
1308
+ * Publications: [Democratizing neural machine translation with OPUS-MT](https://doi.org/10.1007/s10579-023-09704-w) and [OPUS-MT – Building open translation services for the World](https://aclanthology.org/2020.eamt-1.61/) and [The Tatoeba Translation Challenge – Realistic Data Sets for Low Resource and Multilingual MT](https://aclanthology.org/2020.wmt-1.139/) (Please, cite if you use this model.)
1309
+
1310
+ ```bibtex
1311
+ @article{tiedemann2023democratizing,
1312
+ title={Democratizing neural machine translation with {OPUS-MT}},
1313
+ author={Tiedemann, J{\"o}rg and Aulamo, Mikko and Bakshandaeva, Daria and Boggia, Michele and Gr{\"o}nroos, Stig-Arne and Nieminen, Tommi and Raganato, Alessandro and Scherrer, Yves and Vazquez, Raul and Virpioja, Sami},
1314
+ journal={Language Resources and Evaluation},
1315
+ number={58},
1316
+ pages={713--755},
1317
+ year={2023},
1318
+ publisher={Springer Nature},
1319
+ issn={1574-0218},
1320
+ doi={10.1007/s10579-023-09704-w}
1321
+ }
1322
+
1323
+ @inproceedings{tiedemann-thottingal-2020-opus,
1324
+ title = "{OPUS}-{MT} {--} Building open translation services for the World",
1325
+ author = {Tiedemann, J{\"o}rg and Thottingal, Santhosh},
1326
+ booktitle = "Proceedings of the 22nd Annual Conference of the European Association for Machine Translation",
1327
+ month = nov,
1328
+ year = "2020",
1329
+ address = "Lisboa, Portugal",
1330
+ publisher = "European Association for Machine Translation",
1331
+ url = "https://aclanthology.org/2020.eamt-1.61",
1332
+ pages = "479--480",
1333
+ }
1334
+
1335
+ @inproceedings{tiedemann-2020-tatoeba,
1336
+ title = "The Tatoeba Translation Challenge {--} Realistic Data Sets for Low Resource and Multilingual {MT}",
1337
+ author = {Tiedemann, J{\"o}rg},
1338
+ booktitle = "Proceedings of the Fifth Conference on Machine Translation",
1339
+ month = nov,
1340
+ year = "2020",
1341
+ address = "Online",
1342
+ publisher = "Association for Computational Linguistics",
1343
+ url = "https://aclanthology.org/2020.wmt-1.139",
1344
+ pages = "1174--1182",
1345
+ }
1346
+ ```
1347
+
1348
+ ## Acknowledgements
1349
+
1350
+ The work is supported by the [HPLT project](https://hplt-project.org/), funded by the European Union’s Horizon Europe research and innovation programme under grant agreement No 101070350. We are also grateful for the generous computational resources and IT infrastructure provided by [CSC -- IT Center for Science](https://www.csc.fi/), Finland, and the [EuroHPC supercomputer LUMI](https://www.lumi-supercomputer.eu/).
1351
+
1352
+ ## Model conversion info
1353
+
1354
+ * transformers version: 4.45.1
1355
+ * OPUS-MT git hash: 0882077
1356
+ * port time: Tue Oct 8 11:54:09 EEST 2024
1357
+ * port machine: LM0-400-22516.local
benchmark_results.txt ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ multi-multi tatoeba-test-v2020-07-28-v2023-09-26 0.56042 34.0 8920 71316
2
+ ckb-deu flores101-devtest 0.40117 11.6 1012 25094
3
+ ckb-eng flores101-devtest 0.48321 21.6 1012 24721
4
+ ckb-fra flores101-devtest 0.44260 17.2 1012 28343
5
+ ckb-por flores101-devtest 0.43179 16.2 1012 26519
6
+ fas-eng flores101-devtest 0.61134 34.4 1012 24721
7
+ pus-eng flores101-devtest 0.49556 22.7 1012 24721
8
+ pus-fra flores101-devtest 0.45248 17.8 1012 28343
9
+ tgk-eng flores101-devtest 0.53630 25.4 1012 24721
10
+ tgk-fra flores101-devtest 0.49084 21.0 1012 28343
11
+ tgk-spa flores101-devtest 0.43524 15.5 1012 29199
12
+ ckb-deu flores200-devtest 0.40369 11.7 1012 25094
13
+ ckb-eng flores200-devtest 0.48447 21.5 1012 24721
14
+ ckb-fra flores200-devtest 0.44026 17.1 1012 28343
15
+ ckb-por flores200-devtest 0.43192 16.4 1012 26519
16
+ ckb-spa flores200-devtest 0.38880 11.7 1012 29199
17
+ kmr-deu flores200-devtest 0.32168 6.7 1012 25094
18
+ kmr-eng flores200-devtest 0.37372 12.4 1012 24721
19
+ kmr-fra flores200-devtest 0.33796 8.6 1012 28343
20
+ kmr-por flores200-devtest 0.33604 9.6 1012 26519
21
+ kmr-spa flores200-devtest 0.32062 7.7 1012 29199
22
+ pes-deu flores200-devtest 0.51542 21.5 1012 25094
23
+ pes-eng flores200-devtest 0.61372 34.9 1012 24721
24
+ pes-fra flores200-devtest 0.56347 29.2 1012 28343
25
+ pes-por flores200-devtest 0.55676 28.5 1012 26519
26
+ pes-spa flores200-devtest 0.48334 19.8 1012 29199
27
+ prs-deu flores200-devtest 0.50562 21.2 1012 25094
28
+ prs-eng flores200-devtest 0.60716 35.1 1012 24721
29
+ prs-fra flores200-devtest 0.54769 27.8 1012 28343
30
+ prs-por flores200-devtest 0.54073 27.2 1012 26519
31
+ prs-spa flores200-devtest 0.46850 18.6 1012 29199
32
+ tgk-deu flores200-devtest 0.43115 14.2 1012 25094
33
+ tgk-eng flores200-devtest 0.53705 25.6 1012 24721
34
+ tgk-fra flores200-devtest 0.48902 20.7 1012 28343
35
+ tgk-por flores200-devtest 0.48519 20.7 1012 26519
36
+ tgk-spa flores200-devtest 0.43563 15.7 1012 29199
37
+ pus-eng newstest2020 0.37487 13.1 2719 53382
38
+ fas-deu ntrex128 0.47408 16.7 1997 48761
39
+ fas-eng ntrex128 0.55350 26.4 1997 47673
40
+ fas-fra ntrex128 0.50311 22.1 1997 53481
41
+ fas-por ntrex128 0.48005 19.1 1997 51631
42
+ fas-spa ntrex128 0.50973 23.6 1997 54107
43
+ kmr-deu ntrex128 0.32165 6.8 1997 48761
44
+ kmr-eng ntrex128 0.38189 12.8 1997 47673
45
+ kmr-fra ntrex128 0.34023 9.6 1997 53481
46
+ kmr-por ntrex128 0.33306 8.6 1997 51631
47
+ kmr-spa ntrex128 0.000 0.0 1997 0
48
+ prs-deu ntrex128 0.45191 14.9 1997 48761
49
+ prs-eng ntrex128 0.54761 26.6 1997 47673
50
+ prs-fra ntrex128 0.47819 19.9 1997 53481
51
+ prs-por ntrex128 0.46241 17.4 1997 51631
52
+ prs-spa ntrex128 0.48712 21.4 1997 54107
53
+ pus-deu ntrex128 0.38295 9.8 1997 48761
54
+ pus-eng ntrex128 0.43901 17.4 1997 47673
55
+ pus-fra ntrex128 0.39661 12.4 1997 53481
56
+ pus-por ntrex128 0.38694 11.4 1997 51631
57
+ pus-spa ntrex128 0.40812 14.1 1997 54107
58
+ tgk_Cyrl-deu ntrex128 0.38740 10.7 1997 48761
59
+ tgk_Cyrl-eng ntrex128 0.46839 18.6 1997 47673
60
+ tgk_Cyrl-fra ntrex128 0.42569 15.1 1997 53481
61
+ tgk_Cyrl-por ntrex128 0.41632 13.7 1997 51631
62
+ tgk_Cyrl-spa ntrex128 0.43763 16.8 1997 54107
63
+ zza-eng tatoeba-test-v2020-07-28 0.12456 0.9 523 3120
64
+ fas-fra tatoeba-test-v2021-03-30 0.58236 36.6 383 3442
65
+ pes-eng tatoeba-test-v2021-03-30 0.60756 42.8 3763 31439
66
+ zza-eng tatoeba-test-v2021-03-30 0.12511 0.9 533 3182
67
+ fas-deu tatoeba-test-v2021-08-07 0.59737 36.1 3185 25590
68
+ fas-eng tatoeba-test-v2021-08-07 0.59871 35.8 3762 31480
69
+ fas-fra tatoeba-test-v2021-08-07 0.58095 36.3 376 3377
70
+ kur_Latn-deu tatoeba-test-v2021-08-07 0.40276 24.9 223 1323
71
+ kur_Latn-eng tatoeba-test-v2021-08-07 0.000 0.0 290 0
72
+ pes-eng tatoeba-test-v2021-08-07 0.60717 42.3 3757 31411
73
+ zza-eng tatoeba-test-v2021-08-07 0.12989 1.4 529 3162
74
+ ckb-eng tico19-test 0.61905 40.1 2100 56315
75
+ ckb-fra tico19-test 0.45070 19.7 2100 64661
76
+ ckb-por tico19-test 0.49617 22.9 2100 62729
77
+ ckb-spa tico19-test 0.50543 24.9 2100 66563
78
+ fas-eng tico19-test 0.64016 37.3 2100 56315
79
+ fas-fra tico19-test 0.53319 26.1 2100 64661
80
+ fas-por tico19-test 0.58008 30.6 2100 62729
81
+ fas-spa tico19-test 0.59239 33.3 2100 66563
82
+ prs-eng tico19-test 0.61702 34.8 2100 56824
83
+ prs-fra tico19-test 0.51218 24.0 2100 64661
84
+ prs-por tico19-test 0.55888 28.6 2100 62729
85
+ prs-spa tico19-test 0.57494 31.1 2100 66563
86
+ pus-eng tico19-test 0.57586 32.1 2100 56315
87
+ pus-fra tico19-test 0.46091 19.2 2100 64661
88
+ pus-por tico19-test 0.51033 24.1 2100 62729
89
+ pus-spa tico19-test 0.51857 25.9 2100 66563
benchmark_translations.zip ADDED
File without changes
config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "pytorch-models/opus-mt-tc-bible-big-ira-deu_eng_fra_por_spa",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "relu",
5
+ "architectures": [
6
+ "MarianMTModel"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "bos_token_id": 0,
10
+ "classifier_dropout": 0.0,
11
+ "d_model": 1024,
12
+ "decoder_attention_heads": 16,
13
+ "decoder_ffn_dim": 4096,
14
+ "decoder_layerdrop": 0.0,
15
+ "decoder_layers": 6,
16
+ "decoder_start_token_id": 60955,
17
+ "decoder_vocab_size": 60956,
18
+ "dropout": 0.1,
19
+ "encoder_attention_heads": 16,
20
+ "encoder_ffn_dim": 4096,
21
+ "encoder_layerdrop": 0.0,
22
+ "encoder_layers": 6,
23
+ "eos_token_id": 502,
24
+ "forced_eos_token_id": null,
25
+ "init_std": 0.02,
26
+ "is_encoder_decoder": true,
27
+ "max_length": null,
28
+ "max_position_embeddings": 1024,
29
+ "model_type": "marian",
30
+ "normalize_embedding": false,
31
+ "num_beams": null,
32
+ "num_hidden_layers": 6,
33
+ "pad_token_id": 60955,
34
+ "scale_embedding": true,
35
+ "share_encoder_decoder_embeddings": true,
36
+ "static_position_embeddings": true,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.45.1",
39
+ "use_cache": true,
40
+ "vocab_size": 60956
41
+ }
generation_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bad_words_ids": [
4
+ [
5
+ 60955
6
+ ]
7
+ ],
8
+ "bos_token_id": 0,
9
+ "decoder_start_token_id": 60955,
10
+ "eos_token_id": 502,
11
+ "forced_eos_token_id": 502,
12
+ "max_length": 512,
13
+ "num_beams": 4,
14
+ "pad_token_id": 60955,
15
+ "transformers_version": "4.45.1"
16
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c117d5020ee76ac9dc8a996fcc9ab44e4210691a6f5b83674f752dfa761324c9
3
+ size 955378720
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:767745f4e5c174d2c27fb50d75640a289794b2c053825a4ccf77d439133c58df
3
+ size 955429957
source.spm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:467f88a72be71e523bca01c4ffd970d3466ff3d405b91c6e12f607570ed06dba
3
+ size 885588
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
target.spm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac3b523a6d0e93e46954b126e38bc2fbb9dd285fb312c3fdc8b319fddc3d299a
3
+ size 803985
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"source_lang": "ira", "target_lang": "deu+eng+fra+por+spa", "unk_token": "<unk>", "eos_token": "</s>", "pad_token": "<pad>", "model_max_length": 512, "sp_model_kwargs": {}, "separate_vocabs": false, "special_tokens_map_file": null, "name_or_path": "marian-models/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30/ira-deu+eng+fra+por+spa", "tokenizer_class": "MarianTokenizer"}
vocab.json ADDED
The diff for this file is too large to render. See raw diff