PyTorch
xlm-roberta
elmadany commited on
Commit
9fb8802
·
1 Parent(s): 317b9f2

Afrolid v1.5

Browse files
config.json ADDED
@@ -0,0 +1,1070 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "UBC-NLP/serengeti",
3
+ "architectures": [
4
+ "XLMRobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "finetuning_task": "text-classification",
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "aar",
16
+ "1": "aba",
17
+ "2": "abn",
18
+ "3": "acd",
19
+ "4": "ach",
20
+ "5": "ada",
21
+ "6": "adh",
22
+ "7": "adj",
23
+ "8": "afr",
24
+ "9": "agq",
25
+ "10": "aha",
26
+ "11": "ajg",
27
+ "12": "akp",
28
+ "13": "alz",
29
+ "14": "amh",
30
+ "15": "ann",
31
+ "16": "anu",
32
+ "17": "anv",
33
+ "18": "asa",
34
+ "19": "asg",
35
+ "20": "atg",
36
+ "21": "ati",
37
+ "22": "avn",
38
+ "23": "avu",
39
+ "24": "azo",
40
+ "25": "bam",
41
+ "26": "bav",
42
+ "27": "bba",
43
+ "28": "bbj",
44
+ "29": "bbk",
45
+ "30": "bci",
46
+ "31": "bcn",
47
+ "32": "bcw",
48
+ "33": "bcy",
49
+ "34": "bdh",
50
+ "35": "bds",
51
+ "36": "bem",
52
+ "37": "beq",
53
+ "38": "ber",
54
+ "39": "bex",
55
+ "40": "bez",
56
+ "41": "bfa",
57
+ "42": "bfd",
58
+ "43": "bfo",
59
+ "44": "bib",
60
+ "45": "bim",
61
+ "46": "bin",
62
+ "47": "biv",
63
+ "48": "bjv",
64
+ "49": "bky",
65
+ "50": "bmo",
66
+ "51": "bmv",
67
+ "52": "bom",
68
+ "53": "bov",
69
+ "54": "box",
70
+ "55": "bqc",
71
+ "56": "bqj",
72
+ "57": "bsc",
73
+ "58": "bsp",
74
+ "59": "bss",
75
+ "60": "bst",
76
+ "61": "bud",
77
+ "62": "bum",
78
+ "63": "bun",
79
+ "64": "bus",
80
+ "65": "buy",
81
+ "66": "bwr",
82
+ "67": "bwu",
83
+ "68": "bxk",
84
+ "69": "byf",
85
+ "70": "byv",
86
+ "71": "bza",
87
+ "72": "bzw",
88
+ "73": "cce",
89
+ "74": "chw",
90
+ "75": "cjk",
91
+ "76": "cko",
92
+ "77": "cme",
93
+ "78": "cop",
94
+ "79": "cou",
95
+ "80": "crs",
96
+ "81": "csk",
97
+ "82": "cwe",
98
+ "83": "daa",
99
+ "84": "dag",
100
+ "85": "dav",
101
+ "86": "dga",
102
+ "87": "dgd",
103
+ "88": "dgi",
104
+ "89": "dhm",
105
+ "90": "dib",
106
+ "91": "did",
107
+ "92": "dig",
108
+ "93": "dik",
109
+ "94": "dip",
110
+ "95": "diu",
111
+ "96": "dks",
112
+ "97": "dnj",
113
+ "98": "dow",
114
+ "99": "dsh",
115
+ "100": "dua",
116
+ "101": "dug",
117
+ "102": "dwr",
118
+ "103": "dyi",
119
+ "104": "dyu",
120
+ "105": "ebr",
121
+ "106": "ebu",
122
+ "107": "efi",
123
+ "108": "ego",
124
+ "109": "eka",
125
+ "110": "eko",
126
+ "111": "eto",
127
+ "112": "etu",
128
+ "113": "etx",
129
+ "114": "ewe",
130
+ "115": "ewo",
131
+ "116": "fak",
132
+ "117": "fat",
133
+ "118": "ffm",
134
+ "119": "fia",
135
+ "120": "fip",
136
+ "121": "flr",
137
+ "122": "fon",
138
+ "123": "fub",
139
+ "124": "fue",
140
+ "125": "fuf",
141
+ "126": "fuh",
142
+ "127": "ful",
143
+ "128": "fuq",
144
+ "129": "fuv",
145
+ "130": "gaa",
146
+ "131": "gax",
147
+ "132": "gaz",
148
+ "133": "gbo",
149
+ "134": "gbr",
150
+ "135": "gde",
151
+ "136": "gid",
152
+ "137": "giz",
153
+ "138": "gjn",
154
+ "139": "gkn",
155
+ "140": "gkp",
156
+ "141": "gmv",
157
+ "142": "gna",
158
+ "143": "gnd",
159
+ "144": "gng",
160
+ "145": "gof",
161
+ "146": "gog",
162
+ "147": "gol",
163
+ "148": "gqr",
164
+ "149": "gso",
165
+ "150": "gud",
166
+ "151": "gur",
167
+ "152": "guw",
168
+ "153": "gux",
169
+ "154": "guz",
170
+ "155": "gvl",
171
+ "156": "gwr",
172
+ "157": "gya",
173
+ "158": "hag",
174
+ "159": "har",
175
+ "160": "hau",
176
+ "161": "hay",
177
+ "162": "hbb",
178
+ "163": "heh",
179
+ "164": "her",
180
+ "165": "hgm",
181
+ "166": "hna",
182
+ "167": "ibb",
183
+ "168": "ibo",
184
+ "169": "idu",
185
+ "170": "igb",
186
+ "171": "ige",
187
+ "172": "igl",
188
+ "173": "ijn",
189
+ "174": "ikk",
190
+ "175": "ikw",
191
+ "176": "iqw",
192
+ "177": "iri",
193
+ "178": "ish",
194
+ "179": "iso",
195
+ "180": "iyx",
196
+ "181": "izr",
197
+ "182": "izz",
198
+ "183": "jgo",
199
+ "184": "jib",
200
+ "185": "jit",
201
+ "186": "jmc",
202
+ "187": "kab",
203
+ "188": "kam",
204
+ "189": "kbn",
205
+ "190": "kbo",
206
+ "191": "kbp",
207
+ "192": "kby",
208
+ "193": "kcg",
209
+ "194": "kck",
210
+ "195": "kdc",
211
+ "196": "kde",
212
+ "197": "kdh",
213
+ "198": "kdi",
214
+ "199": "kdj",
215
+ "200": "kdl",
216
+ "201": "kdn",
217
+ "202": "kea",
218
+ "203": "ken",
219
+ "204": "khy",
220
+ "205": "kia",
221
+ "206": "kik",
222
+ "207": "kin",
223
+ "208": "kiz",
224
+ "209": "kki",
225
+ "210": "kkj",
226
+ "211": "kln",
227
+ "212": "klu",
228
+ "213": "kma",
229
+ "214": "kmb",
230
+ "215": "kmy",
231
+ "216": "knf",
232
+ "217": "kng",
233
+ "218": "knk",
234
+ "219": "kno",
235
+ "220": "koo",
236
+ "221": "koq",
237
+ "222": "kqn",
238
+ "223": "kqp",
239
+ "224": "kqs",
240
+ "225": "kqy",
241
+ "226": "kri",
242
+ "227": "krs",
243
+ "228": "krw",
244
+ "229": "krx",
245
+ "230": "ksb",
246
+ "231": "ksf",
247
+ "232": "ksp",
248
+ "233": "ktj",
249
+ "234": "ktu",
250
+ "235": "kua",
251
+ "236": "kub",
252
+ "237": "kuj",
253
+ "238": "kus",
254
+ "239": "kvj",
255
+ "240": "kwn",
256
+ "241": "kyf",
257
+ "242": "kyq",
258
+ "243": "kzr",
259
+ "244": "lai",
260
+ "245": "laj",
261
+ "246": "lam",
262
+ "247": "lap",
263
+ "248": "lee",
264
+ "249": "lef",
265
+ "250": "lem",
266
+ "251": "lgg",
267
+ "252": "lgm",
268
+ "253": "lia",
269
+ "254": "lik",
270
+ "255": "lin",
271
+ "256": "lip",
272
+ "257": "lmd",
273
+ "258": "lmp",
274
+ "259": "lnl",
275
+ "260": "log",
276
+ "261": "lom",
277
+ "262": "loq",
278
+ "263": "lot",
279
+ "264": "loz",
280
+ "265": "lro",
281
+ "266": "lsm",
282
+ "267": "lth",
283
+ "268": "lto",
284
+ "269": "lua",
285
+ "270": "luc",
286
+ "271": "lue",
287
+ "272": "lug",
288
+ "273": "lun",
289
+ "274": "luo",
290
+ "275": "lwg",
291
+ "276": "lwo",
292
+ "277": "maf",
293
+ "278": "mas",
294
+ "279": "maw",
295
+ "280": "mbu",
296
+ "281": "mck",
297
+ "282": "mcn",
298
+ "283": "mcp",
299
+ "284": "mcu",
300
+ "285": "mda",
301
+ "286": "mdm",
302
+ "287": "mdy",
303
+ "288": "men",
304
+ "289": "meq",
305
+ "290": "mer",
306
+ "291": "mev",
307
+ "292": "mfe",
308
+ "293": "mfg",
309
+ "294": "mfh",
310
+ "295": "mfi",
311
+ "296": "mfk",
312
+ "297": "mfq",
313
+ "298": "mfz",
314
+ "299": "mgc",
315
+ "300": "mgh",
316
+ "301": "mgo",
317
+ "302": "mgq",
318
+ "303": "mgr",
319
+ "304": "mgw",
320
+ "305": "mif",
321
+ "306": "mkl",
322
+ "307": "mlg",
323
+ "308": "mlr",
324
+ "309": "mmy",
325
+ "310": "mnf",
326
+ "311": "mnk",
327
+ "312": "moa",
328
+ "313": "mos",
329
+ "314": "moy",
330
+ "315": "moz",
331
+ "316": "mpe",
332
+ "317": "mpg",
333
+ "318": "mqb",
334
+ "319": "msc",
335
+ "320": "mur",
336
+ "321": "muy",
337
+ "322": "mwe",
338
+ "323": "mwm",
339
+ "324": "mwn",
340
+ "325": "mws",
341
+ "326": "myb",
342
+ "327": "myk",
343
+ "328": "myx",
344
+ "329": "mzm",
345
+ "330": "mzw",
346
+ "331": "nan_lang",
347
+ "332": "naq",
348
+ "333": "naw",
349
+ "334": "nba",
350
+ "335": "nbl",
351
+ "336": "ncu",
352
+ "337": "ndc",
353
+ "338": "nde",
354
+ "339": "ndh",
355
+ "340": "ndj",
356
+ "341": "ndo",
357
+ "342": "ndv",
358
+ "343": "ndz",
359
+ "344": "ngb",
360
+ "345": "ngc",
361
+ "346": "ngl",
362
+ "347": "ngn",
363
+ "348": "ngo",
364
+ "349": "ngp",
365
+ "350": "nhr",
366
+ "351": "nhu",
367
+ "352": "nih",
368
+ "353": "nim",
369
+ "354": "nin",
370
+ "355": "niy",
371
+ "356": "nka",
372
+ "357": "nko",
373
+ "358": "nla",
374
+ "359": "nnb",
375
+ "360": "nnh",
376
+ "361": "nnq",
377
+ "362": "nnw",
378
+ "363": "nse",
379
+ "364": "nso",
380
+ "365": "ntr",
381
+ "366": "nuj",
382
+ "367": "nus",
383
+ "368": "nwb",
384
+ "369": "nxd",
385
+ "370": "nya",
386
+ "371": "nyb",
387
+ "372": "nyd",
388
+ "373": "nyf",
389
+ "374": "nyk",
390
+ "375": "nym",
391
+ "376": "nyn",
392
+ "377": "nyo",
393
+ "378": "nyu",
394
+ "379": "nyy",
395
+ "380": "nza",
396
+ "381": "nzi",
397
+ "382": "odu",
398
+ "383": "ogo",
399
+ "384": "oke",
400
+ "385": "okr",
401
+ "386": "oku",
402
+ "387": "orm",
403
+ "388": "ozm",
404
+ "389": "pcm",
405
+ "390": "pem",
406
+ "391": "pkb",
407
+ "392": "pko",
408
+ "393": "pov",
409
+ "394": "poy",
410
+ "395": "rag",
411
+ "396": "rel",
412
+ "397": "rif",
413
+ "398": "rim",
414
+ "399": "rnd",
415
+ "400": "rng",
416
+ "401": "rub",
417
+ "402": "run",
418
+ "403": "rwk",
419
+ "404": "sag",
420
+ "405": "saq",
421
+ "406": "sba",
422
+ "407": "sbd",
423
+ "408": "sbp",
424
+ "409": "sbs",
425
+ "410": "sby",
426
+ "411": "sef",
427
+ "412": "ses",
428
+ "413": "sev",
429
+ "414": "sfw",
430
+ "415": "sgw",
431
+ "416": "shi",
432
+ "417": "shj",
433
+ "418": "shk",
434
+ "419": "sid",
435
+ "420": "sig",
436
+ "421": "sil",
437
+ "422": "sna",
438
+ "423": "snf",
439
+ "424": "sng",
440
+ "425": "snw",
441
+ "426": "som",
442
+ "427": "sop",
443
+ "428": "sor",
444
+ "429": "sot",
445
+ "430": "soy",
446
+ "431": "spp",
447
+ "432": "ssw",
448
+ "433": "suk",
449
+ "434": "sus",
450
+ "435": "swa",
451
+ "436": "swc",
452
+ "437": "swh",
453
+ "438": "swk",
454
+ "439": "sxb",
455
+ "440": "taq",
456
+ "441": "tcc",
457
+ "442": "tcd",
458
+ "443": "ted",
459
+ "444": "tem",
460
+ "445": "teo",
461
+ "446": "tex",
462
+ "447": "tgw",
463
+ "448": "thk",
464
+ "449": "thv",
465
+ "450": "tir",
466
+ "451": "tiv",
467
+ "452": "tke",
468
+ "453": "tlj",
469
+ "454": "tll",
470
+ "455": "tog",
471
+ "456": "toh",
472
+ "457": "toi",
473
+ "458": "tpm",
474
+ "459": "tsc",
475
+ "460": "tsn",
476
+ "461": "tso",
477
+ "462": "tsw",
478
+ "463": "ttj",
479
+ "464": "ttq",
480
+ "465": "ttr",
481
+ "466": "tui",
482
+ "467": "tul",
483
+ "468": "tum",
484
+ "469": "tuv",
485
+ "470": "tvu",
486
+ "471": "twi",
487
+ "472": "umb",
488
+ "473": "urh",
489
+ "474": "uth",
490
+ "475": "vag",
491
+ "476": "vai",
492
+ "477": "ven",
493
+ "478": "vid",
494
+ "479": "vif",
495
+ "480": "vmk",
496
+ "481": "vmw",
497
+ "482": "vun",
498
+ "483": "vut",
499
+ "484": "wal",
500
+ "485": "wbi",
501
+ "486": "wec",
502
+ "487": "wes",
503
+ "488": "wib",
504
+ "489": "wmw",
505
+ "490": "wol",
506
+ "491": "won",
507
+ "492": "xan",
508
+ "493": "xed",
509
+ "494": "xho",
510
+ "495": "xnz",
511
+ "496": "xog",
512
+ "497": "xon",
513
+ "498": "xpe",
514
+ "499": "xrb",
515
+ "500": "xsm",
516
+ "501": "xtc",
517
+ "502": "xuo",
518
+ "503": "yal",
519
+ "504": "yam",
520
+ "505": "yao",
521
+ "506": "yat",
522
+ "507": "yba",
523
+ "508": "ybb",
524
+ "509": "yom",
525
+ "510": "yor",
526
+ "511": "yre",
527
+ "512": "zaj",
528
+ "513": "zdj",
529
+ "514": "zga",
530
+ "515": "ziw",
531
+ "516": "zne",
532
+ "517": "zul"
533
+ },
534
+ "initializer_range": 0.02,
535
+ "intermediate_size": 3072,
536
+ "label2id": {
537
+ "aar": 0,
538
+ "aba": 1,
539
+ "abn": 2,
540
+ "acd": 3,
541
+ "ach": 4,
542
+ "ada": 5,
543
+ "adh": 6,
544
+ "adj": 7,
545
+ "afr": 8,
546
+ "agq": 9,
547
+ "aha": 10,
548
+ "ajg": 11,
549
+ "akp": 12,
550
+ "alz": 13,
551
+ "amh": 14,
552
+ "ann": 15,
553
+ "anu": 16,
554
+ "anv": 17,
555
+ "asa": 18,
556
+ "asg": 19,
557
+ "atg": 20,
558
+ "ati": 21,
559
+ "avn": 22,
560
+ "avu": 23,
561
+ "azo": 24,
562
+ "bam": 25,
563
+ "bav": 26,
564
+ "bba": 27,
565
+ "bbj": 28,
566
+ "bbk": 29,
567
+ "bci": 30,
568
+ "bcn": 31,
569
+ "bcw": 32,
570
+ "bcy": 33,
571
+ "bdh": 34,
572
+ "bds": 35,
573
+ "bem": 36,
574
+ "beq": 37,
575
+ "ber": 38,
576
+ "bex": 39,
577
+ "bez": 40,
578
+ "bfa": 41,
579
+ "bfd": 42,
580
+ "bfo": 43,
581
+ "bib": 44,
582
+ "bim": 45,
583
+ "bin": 46,
584
+ "biv": 47,
585
+ "bjv": 48,
586
+ "bky": 49,
587
+ "bmo": 50,
588
+ "bmv": 51,
589
+ "bom": 52,
590
+ "bov": 53,
591
+ "box": 54,
592
+ "bqc": 55,
593
+ "bqj": 56,
594
+ "bsc": 57,
595
+ "bsp": 58,
596
+ "bss": 59,
597
+ "bst": 60,
598
+ "bud": 61,
599
+ "bum": 62,
600
+ "bun": 63,
601
+ "bus": 64,
602
+ "buy": 65,
603
+ "bwr": 66,
604
+ "bwu": 67,
605
+ "bxk": 68,
606
+ "byf": 69,
607
+ "byv": 70,
608
+ "bza": 71,
609
+ "bzw": 72,
610
+ "cce": 73,
611
+ "chw": 74,
612
+ "cjk": 75,
613
+ "cko": 76,
614
+ "cme": 77,
615
+ "cop": 78,
616
+ "cou": 79,
617
+ "crs": 80,
618
+ "csk": 81,
619
+ "cwe": 82,
620
+ "daa": 83,
621
+ "dag": 84,
622
+ "dav": 85,
623
+ "dga": 86,
624
+ "dgd": 87,
625
+ "dgi": 88,
626
+ "dhm": 89,
627
+ "dib": 90,
628
+ "did": 91,
629
+ "dig": 92,
630
+ "dik": 93,
631
+ "dip": 94,
632
+ "diu": 95,
633
+ "dks": 96,
634
+ "dnj": 97,
635
+ "dow": 98,
636
+ "dsh": 99,
637
+ "dua": 100,
638
+ "dug": 101,
639
+ "dwr": 102,
640
+ "dyi": 103,
641
+ "dyu": 104,
642
+ "ebr": 105,
643
+ "ebu": 106,
644
+ "efi": 107,
645
+ "ego": 108,
646
+ "eka": 109,
647
+ "eko": 110,
648
+ "eto": 111,
649
+ "etu": 112,
650
+ "etx": 113,
651
+ "ewe": 114,
652
+ "ewo": 115,
653
+ "fak": 116,
654
+ "fat": 117,
655
+ "ffm": 118,
656
+ "fia": 119,
657
+ "fip": 120,
658
+ "flr": 121,
659
+ "fon": 122,
660
+ "fub": 123,
661
+ "fue": 124,
662
+ "fuf": 125,
663
+ "fuh": 126,
664
+ "ful": 127,
665
+ "fuq": 128,
666
+ "fuv": 129,
667
+ "gaa": 130,
668
+ "gax": 131,
669
+ "gaz": 132,
670
+ "gbo": 133,
671
+ "gbr": 134,
672
+ "gde": 135,
673
+ "gid": 136,
674
+ "giz": 137,
675
+ "gjn": 138,
676
+ "gkn": 139,
677
+ "gkp": 140,
678
+ "gmv": 141,
679
+ "gna": 142,
680
+ "gnd": 143,
681
+ "gng": 144,
682
+ "gof": 145,
683
+ "gog": 146,
684
+ "gol": 147,
685
+ "gqr": 148,
686
+ "gso": 149,
687
+ "gud": 150,
688
+ "gur": 151,
689
+ "guw": 152,
690
+ "gux": 153,
691
+ "guz": 154,
692
+ "gvl": 155,
693
+ "gwr": 156,
694
+ "gya": 157,
695
+ "hag": 158,
696
+ "har": 159,
697
+ "hau": 160,
698
+ "hay": 161,
699
+ "hbb": 162,
700
+ "heh": 163,
701
+ "her": 164,
702
+ "hgm": 165,
703
+ "hna": 166,
704
+ "ibb": 167,
705
+ "ibo": 168,
706
+ "idu": 169,
707
+ "igb": 170,
708
+ "ige": 171,
709
+ "igl": 172,
710
+ "ijn": 173,
711
+ "ikk": 174,
712
+ "ikw": 175,
713
+ "iqw": 176,
714
+ "iri": 177,
715
+ "ish": 178,
716
+ "iso": 179,
717
+ "iyx": 180,
718
+ "izr": 181,
719
+ "izz": 182,
720
+ "jgo": 183,
721
+ "jib": 184,
722
+ "jit": 185,
723
+ "jmc": 186,
724
+ "kab": 187,
725
+ "kam": 188,
726
+ "kbn": 189,
727
+ "kbo": 190,
728
+ "kbp": 191,
729
+ "kby": 192,
730
+ "kcg": 193,
731
+ "kck": 194,
732
+ "kdc": 195,
733
+ "kde": 196,
734
+ "kdh": 197,
735
+ "kdi": 198,
736
+ "kdj": 199,
737
+ "kdl": 200,
738
+ "kdn": 201,
739
+ "kea": 202,
740
+ "ken": 203,
741
+ "khy": 204,
742
+ "kia": 205,
743
+ "kik": 206,
744
+ "kin": 207,
745
+ "kiz": 208,
746
+ "kki": 209,
747
+ "kkj": 210,
748
+ "kln": 211,
749
+ "klu": 212,
750
+ "kma": 213,
751
+ "kmb": 214,
752
+ "kmy": 215,
753
+ "knf": 216,
754
+ "kng": 217,
755
+ "knk": 218,
756
+ "kno": 219,
757
+ "koo": 220,
758
+ "koq": 221,
759
+ "kqn": 222,
760
+ "kqp": 223,
761
+ "kqs": 224,
762
+ "kqy": 225,
763
+ "kri": 226,
764
+ "krs": 227,
765
+ "krw": 228,
766
+ "krx": 229,
767
+ "ksb": 230,
768
+ "ksf": 231,
769
+ "ksp": 232,
770
+ "ktj": 233,
771
+ "ktu": 234,
772
+ "kua": 235,
773
+ "kub": 236,
774
+ "kuj": 237,
775
+ "kus": 238,
776
+ "kvj": 239,
777
+ "kwn": 240,
778
+ "kyf": 241,
779
+ "kyq": 242,
780
+ "kzr": 243,
781
+ "lai": 244,
782
+ "laj": 245,
783
+ "lam": 246,
784
+ "lap": 247,
785
+ "lee": 248,
786
+ "lef": 249,
787
+ "lem": 250,
788
+ "lgg": 251,
789
+ "lgm": 252,
790
+ "lia": 253,
791
+ "lik": 254,
792
+ "lin": 255,
793
+ "lip": 256,
794
+ "lmd": 257,
795
+ "lmp": 258,
796
+ "lnl": 259,
797
+ "log": 260,
798
+ "lom": 261,
799
+ "loq": 262,
800
+ "lot": 263,
801
+ "loz": 264,
802
+ "lro": 265,
803
+ "lsm": 266,
804
+ "lth": 267,
805
+ "lto": 268,
806
+ "lua": 269,
807
+ "luc": 270,
808
+ "lue": 271,
809
+ "lug": 272,
810
+ "lun": 273,
811
+ "luo": 274,
812
+ "lwg": 275,
813
+ "lwo": 276,
814
+ "maf": 277,
815
+ "mas": 278,
816
+ "maw": 279,
817
+ "mbu": 280,
818
+ "mck": 281,
819
+ "mcn": 282,
820
+ "mcp": 283,
821
+ "mcu": 284,
822
+ "mda": 285,
823
+ "mdm": 286,
824
+ "mdy": 287,
825
+ "men": 288,
826
+ "meq": 289,
827
+ "mer": 290,
828
+ "mev": 291,
829
+ "mfe": 292,
830
+ "mfg": 293,
831
+ "mfh": 294,
832
+ "mfi": 295,
833
+ "mfk": 296,
834
+ "mfq": 297,
835
+ "mfz": 298,
836
+ "mgc": 299,
837
+ "mgh": 300,
838
+ "mgo": 301,
839
+ "mgq": 302,
840
+ "mgr": 303,
841
+ "mgw": 304,
842
+ "mif": 305,
843
+ "mkl": 306,
844
+ "mlg": 307,
845
+ "mlr": 308,
846
+ "mmy": 309,
847
+ "mnf": 310,
848
+ "mnk": 311,
849
+ "moa": 312,
850
+ "mos": 313,
851
+ "moy": 314,
852
+ "moz": 315,
853
+ "mpe": 316,
854
+ "mpg": 317,
855
+ "mqb": 318,
856
+ "msc": 319,
857
+ "mur": 320,
858
+ "muy": 321,
859
+ "mwe": 322,
860
+ "mwm": 323,
861
+ "mwn": 324,
862
+ "mws": 325,
863
+ "myb": 326,
864
+ "myk": 327,
865
+ "myx": 328,
866
+ "mzm": 329,
867
+ "mzw": 330,
868
+ "nan_lang": 331,
869
+ "naq": 332,
870
+ "naw": 333,
871
+ "nba": 334,
872
+ "nbl": 335,
873
+ "ncu": 336,
874
+ "ndc": 337,
875
+ "nde": 338,
876
+ "ndh": 339,
877
+ "ndj": 340,
878
+ "ndo": 341,
879
+ "ndv": 342,
880
+ "ndz": 343,
881
+ "ngb": 344,
882
+ "ngc": 345,
883
+ "ngl": 346,
884
+ "ngn": 347,
885
+ "ngo": 348,
886
+ "ngp": 349,
887
+ "nhr": 350,
888
+ "nhu": 351,
889
+ "nih": 352,
890
+ "nim": 353,
891
+ "nin": 354,
892
+ "niy": 355,
893
+ "nka": 356,
894
+ "nko": 357,
895
+ "nla": 358,
896
+ "nnb": 359,
897
+ "nnh": 360,
898
+ "nnq": 361,
899
+ "nnw": 362,
900
+ "nse": 363,
901
+ "nso": 364,
902
+ "ntr": 365,
903
+ "nuj": 366,
904
+ "nus": 367,
905
+ "nwb": 368,
906
+ "nxd": 369,
907
+ "nya": 370,
908
+ "nyb": 371,
909
+ "nyd": 372,
910
+ "nyf": 373,
911
+ "nyk": 374,
912
+ "nym": 375,
913
+ "nyn": 376,
914
+ "nyo": 377,
915
+ "nyu": 378,
916
+ "nyy": 379,
917
+ "nza": 380,
918
+ "nzi": 381,
919
+ "odu": 382,
920
+ "ogo": 383,
921
+ "oke": 384,
922
+ "okr": 385,
923
+ "oku": 386,
924
+ "orm": 387,
925
+ "ozm": 388,
926
+ "pcm": 389,
927
+ "pem": 390,
928
+ "pkb": 391,
929
+ "pko": 392,
930
+ "pov": 393,
931
+ "poy": 394,
932
+ "rag": 395,
933
+ "rel": 396,
934
+ "rif": 397,
935
+ "rim": 398,
936
+ "rnd": 399,
937
+ "rng": 400,
938
+ "rub": 401,
939
+ "run": 402,
940
+ "rwk": 403,
941
+ "sag": 404,
942
+ "saq": 405,
943
+ "sba": 406,
944
+ "sbd": 407,
945
+ "sbp": 408,
946
+ "sbs": 409,
947
+ "sby": 410,
948
+ "sef": 411,
949
+ "ses": 412,
950
+ "sev": 413,
951
+ "sfw": 414,
952
+ "sgw": 415,
953
+ "shi": 416,
954
+ "shj": 417,
955
+ "shk": 418,
956
+ "sid": 419,
957
+ "sig": 420,
958
+ "sil": 421,
959
+ "sna": 422,
960
+ "snf": 423,
961
+ "sng": 424,
962
+ "snw": 425,
963
+ "som": 426,
964
+ "sop": 427,
965
+ "sor": 428,
966
+ "sot": 429,
967
+ "soy": 430,
968
+ "spp": 431,
969
+ "ssw": 432,
970
+ "suk": 433,
971
+ "sus": 434,
972
+ "swa": 435,
973
+ "swc": 436,
974
+ "swh": 437,
975
+ "swk": 438,
976
+ "sxb": 439,
977
+ "taq": 440,
978
+ "tcc": 441,
979
+ "tcd": 442,
980
+ "ted": 443,
981
+ "tem": 444,
982
+ "teo": 445,
983
+ "tex": 446,
984
+ "tgw": 447,
985
+ "thk": 448,
986
+ "thv": 449,
987
+ "tir": 450,
988
+ "tiv": 451,
989
+ "tke": 452,
990
+ "tlj": 453,
991
+ "tll": 454,
992
+ "tog": 455,
993
+ "toh": 456,
994
+ "toi": 457,
995
+ "tpm": 458,
996
+ "tsc": 459,
997
+ "tsn": 460,
998
+ "tso": 461,
999
+ "tsw": 462,
1000
+ "ttj": 463,
1001
+ "ttq": 464,
1002
+ "ttr": 465,
1003
+ "tui": 466,
1004
+ "tul": 467,
1005
+ "tum": 468,
1006
+ "tuv": 469,
1007
+ "tvu": 470,
1008
+ "twi": 471,
1009
+ "umb": 472,
1010
+ "urh": 473,
1011
+ "uth": 474,
1012
+ "vag": 475,
1013
+ "vai": 476,
1014
+ "ven": 477,
1015
+ "vid": 478,
1016
+ "vif": 479,
1017
+ "vmk": 480,
1018
+ "vmw": 481,
1019
+ "vun": 482,
1020
+ "vut": 483,
1021
+ "wal": 484,
1022
+ "wbi": 485,
1023
+ "wec": 486,
1024
+ "wes": 487,
1025
+ "wib": 488,
1026
+ "wmw": 489,
1027
+ "wol": 490,
1028
+ "won": 491,
1029
+ "xan": 492,
1030
+ "xed": 493,
1031
+ "xho": 494,
1032
+ "xnz": 495,
1033
+ "xog": 496,
1034
+ "xon": 497,
1035
+ "xpe": 498,
1036
+ "xrb": 499,
1037
+ "xsm": 500,
1038
+ "xtc": 501,
1039
+ "xuo": 502,
1040
+ "yal": 503,
1041
+ "yam": 504,
1042
+ "yao": 505,
1043
+ "yat": 506,
1044
+ "yba": 507,
1045
+ "ybb": 508,
1046
+ "yom": 509,
1047
+ "yor": 510,
1048
+ "yre": 511,
1049
+ "zaj": 512,
1050
+ "zdj": 513,
1051
+ "zga": 514,
1052
+ "ziw": 515,
1053
+ "zne": 516,
1054
+ "zul": 517
1055
+ },
1056
+ "layer_norm_eps": 1e-05,
1057
+ "max_position_embeddings": 514,
1058
+ "model_type": "xlm-roberta",
1059
+ "num_attention_heads": 12,
1060
+ "num_hidden_layers": 12,
1061
+ "output_past": true,
1062
+ "pad_token_id": 1,
1063
+ "position_embedding_type": "absolute",
1064
+ "problem_type": "single_label_classification",
1065
+ "torch_dtype": "float32",
1066
+ "transformers_version": "4.31.0",
1067
+ "type_vocab_size": 1,
1068
+ "use_cache": true,
1069
+ "vocab_size": 250004
1070
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa54e75de45f97547502ff26a2f2ac578481aa6f888fef9cb60463ee3a94f48b
3
+ size 1113843057
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae9459d6b3ea13588a4e2137d473d03cfc84154c648388e07c7be8a1470fd4d1
3
+ size 4796746
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "clean_up_tokenization_spaces": true,
4
+ "cls_token": "<s>",
5
+ "eos_token": "</s>",
6
+ "mask_token": {
7
+ "__type": "AddedToken",
8
+ "content": "<mask>",
9
+ "lstrip": true,
10
+ "normalized": true,
11
+ "rstrip": false,
12
+ "single_word": false
13
+ },
14
+ "model_max_length": 1000000000000000019884624838656,
15
+ "pad_token": "<pad>",
16
+ "sep_token": "</s>",
17
+ "sp_model_kwargs": {},
18
+ "tokenizer_class": "XLMRobertaTokenizer",
19
+ "unk_token": "<unk>",
20
+ "use_fast": true
21
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28956ec9bfbe2f8d4e124409c95cf8550cda4e5f85a5d64e14e906a341f25db7
3
+ size 4091