|
{ |
|
"add_prefix_space": true, |
|
"added_tokens_decoder": { |
|
"0": { |
|
"content": "<pad>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"1": { |
|
"content": "<unk>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"2": { |
|
"content": "<s>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"3": { |
|
"content": "</s>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256001": { |
|
"content": "__ace__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256002": { |
|
"content": "__ace_Latn__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256003": { |
|
"content": "__acm__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256004": { |
|
"content": "__acq__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256005": { |
|
"content": "__aeb__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256006": { |
|
"content": "__afr__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256007": { |
|
"content": "__ajp__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256008": { |
|
"content": "__aka__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256009": { |
|
"content": "__amh__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256010": { |
|
"content": "__apc__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256011": { |
|
"content": "__arb__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256012": { |
|
"content": "__ars__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256013": { |
|
"content": "__ary__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256014": { |
|
"content": "__arz__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256015": { |
|
"content": "__asm__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256016": { |
|
"content": "__ast__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256017": { |
|
"content": "__awa__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256018": { |
|
"content": "__ayr__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256019": { |
|
"content": "__azb__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256020": { |
|
"content": "__azj__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256021": { |
|
"content": "__bak__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256022": { |
|
"content": "__bam__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256023": { |
|
"content": "__ban__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256024": { |
|
"content": "__bel__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256025": { |
|
"content": "__bem__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256026": { |
|
"content": "__ben__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256027": { |
|
"content": "__bho__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256028": { |
|
"content": "__bjn__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256029": { |
|
"content": "__bjn_Latn__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256030": { |
|
"content": "__bod__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256031": { |
|
"content": "__bos__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256032": { |
|
"content": "__bug__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256033": { |
|
"content": "__bul__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256034": { |
|
"content": "__cat__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256035": { |
|
"content": "__ceb__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256036": { |
|
"content": "__ces__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256037": { |
|
"content": "__cjk__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256038": { |
|
"content": "__ckb__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256039": { |
|
"content": "__crh__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256040": { |
|
"content": "__cym__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256041": { |
|
"content": "__dan__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256042": { |
|
"content": "__deu__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256043": { |
|
"content": "__dik__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256044": { |
|
"content": "__dyu__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256045": { |
|
"content": "__dzo__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256046": { |
|
"content": "__ell__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256047": { |
|
"content": "__eng__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256048": { |
|
"content": "__epo__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256049": { |
|
"content": "__est__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256050": { |
|
"content": "__eus__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256051": { |
|
"content": "__ewe__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256052": { |
|
"content": "__fao__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256053": { |
|
"content": "__pes__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256054": { |
|
"content": "__fij__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256055": { |
|
"content": "__fin__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256056": { |
|
"content": "__fon__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256057": { |
|
"content": "__fra__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256058": { |
|
"content": "__fur__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256059": { |
|
"content": "__fuv__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256060": { |
|
"content": "__gla__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256061": { |
|
"content": "__gle__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256062": { |
|
"content": "__glg__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256063": { |
|
"content": "__grn__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256064": { |
|
"content": "__guj__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256065": { |
|
"content": "__hat__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256066": { |
|
"content": "__hau__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256067": { |
|
"content": "__heb__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256068": { |
|
"content": "__hin__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256069": { |
|
"content": "__hne__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256070": { |
|
"content": "__hrv__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256071": { |
|
"content": "__hun__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256072": { |
|
"content": "__hye__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256073": { |
|
"content": "__ibo__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256074": { |
|
"content": "__ilo__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256075": { |
|
"content": "__ind__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256076": { |
|
"content": "__isl__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256077": { |
|
"content": "__ita__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256078": { |
|
"content": "__jav__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256079": { |
|
"content": "__jpn__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256080": { |
|
"content": "__kab__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256081": { |
|
"content": "__kac__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256082": { |
|
"content": "__kam__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256083": { |
|
"content": "__kan__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256084": { |
|
"content": "__kas__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256085": { |
|
"content": "__kas_Deva__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256086": { |
|
"content": "__kat__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256087": { |
|
"content": "__knc__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256088": { |
|
"content": "__knc_Latn__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256089": { |
|
"content": "__kaz__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256090": { |
|
"content": "__kbp__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256091": { |
|
"content": "__kea__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256092": { |
|
"content": "__khm__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256093": { |
|
"content": "__kik__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256094": { |
|
"content": "__kin__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256095": { |
|
"content": "__kir__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256096": { |
|
"content": "__kmb__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256097": { |
|
"content": "__kon__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256098": { |
|
"content": "__kor__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256099": { |
|
"content": "__kmr__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256100": { |
|
"content": "__lao__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256101": { |
|
"content": "__lvs__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256102": { |
|
"content": "__lij__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256103": { |
|
"content": "__lim__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256104": { |
|
"content": "__lin__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256105": { |
|
"content": "__lit__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256106": { |
|
"content": "__lmo__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256107": { |
|
"content": "__ltg__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256108": { |
|
"content": "__ltz__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256109": { |
|
"content": "__lua__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256110": { |
|
"content": "__lug__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256111": { |
|
"content": "__luo__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256112": { |
|
"content": "__lus__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256113": { |
|
"content": "__mag__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256114": { |
|
"content": "__mai__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256115": { |
|
"content": "__mal__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256116": { |
|
"content": "__mar__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256117": { |
|
"content": "__min__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256118": { |
|
"content": "__mkd__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256119": { |
|
"content": "__plt__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256120": { |
|
"content": "__mlt__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256121": { |
|
"content": "__mni__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256122": { |
|
"content": "__khk__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256123": { |
|
"content": "__mos__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256124": { |
|
"content": "__mri__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256125": { |
|
"content": "__zsm__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256126": { |
|
"content": "__mya__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256127": { |
|
"content": "__nld__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256128": { |
|
"content": "__nno__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256129": { |
|
"content": "__nob__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256130": { |
|
"content": "__npi__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256131": { |
|
"content": "__nso__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256132": { |
|
"content": "__nus__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256133": { |
|
"content": "__nya__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256134": { |
|
"content": "__oci__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256135": { |
|
"content": "__gaz__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256136": { |
|
"content": "__ory__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256137": { |
|
"content": "__pag__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256138": { |
|
"content": "__pan__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256139": { |
|
"content": "__pap__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256140": { |
|
"content": "__pol__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256141": { |
|
"content": "__por__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256142": { |
|
"content": "__prs__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256143": { |
|
"content": "__pbt__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256144": { |
|
"content": "__quy__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256145": { |
|
"content": "__ron__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256146": { |
|
"content": "__run__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256147": { |
|
"content": "__rus__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256148": { |
|
"content": "__sag__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256149": { |
|
"content": "__san__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256150": { |
|
"content": "__sat__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256151": { |
|
"content": "__scn__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256152": { |
|
"content": "__shn__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256153": { |
|
"content": "__sin__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256154": { |
|
"content": "__slk__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256155": { |
|
"content": "__slv__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256156": { |
|
"content": "__smo__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256157": { |
|
"content": "__sna__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256158": { |
|
"content": "__snd__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256159": { |
|
"content": "__som__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256160": { |
|
"content": "__sot__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256161": { |
|
"content": "__spa__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256162": { |
|
"content": "__als__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256163": { |
|
"content": "__srd__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256164": { |
|
"content": "__srp__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256165": { |
|
"content": "__ssw__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256166": { |
|
"content": "__sun__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256167": { |
|
"content": "__swe__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256168": { |
|
"content": "__swh__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256169": { |
|
"content": "__szl__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256170": { |
|
"content": "__tam__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256171": { |
|
"content": "__tat__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256172": { |
|
"content": "__tel__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256173": { |
|
"content": "__tgk__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256174": { |
|
"content": "__tgl__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256175": { |
|
"content": "__tha__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256176": { |
|
"content": "__tir__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256177": { |
|
"content": "__taq__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256178": { |
|
"content": "__taq_Tfng__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256179": { |
|
"content": "__tpi__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256180": { |
|
"content": "__tsn__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256181": { |
|
"content": "__tso__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256182": { |
|
"content": "__tuk__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256183": { |
|
"content": "__tum__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256184": { |
|
"content": "__tur__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256185": { |
|
"content": "__twi__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256186": { |
|
"content": "__tzm__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256187": { |
|
"content": "__uig__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256188": { |
|
"content": "__ukr__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256189": { |
|
"content": "__umb__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256190": { |
|
"content": "__urd__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256191": { |
|
"content": "__uzn__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256192": { |
|
"content": "__vec__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256193": { |
|
"content": "__vie__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256194": { |
|
"content": "__war__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256195": { |
|
"content": "__wol__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256196": { |
|
"content": "__xho__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256197": { |
|
"content": "__ydd__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256198": { |
|
"content": "__yor__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256199": { |
|
"content": "__yue__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256200": { |
|
"content": "__cmn__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256201": { |
|
"content": "__cmn_Hant__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"256202": { |
|
"content": "__zul__", |
|
"lstrip": true, |
|
"normalized": false, |
|
"rstrip": true, |
|
"single_word": false, |
|
"special": true |
|
} |
|
}, |
|
"additional_special_tokens": [ |
|
"<pad>", |
|
"<unk>", |
|
"<s>", |
|
"</s>", |
|
"__ace__", |
|
"__ace_Latn__", |
|
"__acm__", |
|
"__acq__", |
|
"__aeb__", |
|
"__afr__", |
|
"__ajp__", |
|
"__aka__", |
|
"__amh__", |
|
"__apc__", |
|
"__arb__", |
|
"__ars__", |
|
"__ary__", |
|
"__arz__", |
|
"__asm__", |
|
"__ast__", |
|
"__awa__", |
|
"__ayr__", |
|
"__azb__", |
|
"__azj__", |
|
"__bak__", |
|
"__bam__", |
|
"__ban__", |
|
"__bel__", |
|
"__bem__", |
|
"__ben__", |
|
"__bho__", |
|
"__bjn__", |
|
"__bjn_Latn__", |
|
"__bod__", |
|
"__bos__", |
|
"__bug__", |
|
"__bul__", |
|
"__cat__", |
|
"__ceb__", |
|
"__ces__", |
|
"__cjk__", |
|
"__ckb__", |
|
"__crh__", |
|
"__cym__", |
|
"__dan__", |
|
"__deu__", |
|
"__dik__", |
|
"__dyu__", |
|
"__dzo__", |
|
"__ell__", |
|
"__eng__", |
|
"__epo__", |
|
"__est__", |
|
"__eus__", |
|
"__ewe__", |
|
"__fao__", |
|
"__pes__", |
|
"__fij__", |
|
"__fin__", |
|
"__fon__", |
|
"__fra__", |
|
"__fur__", |
|
"__fuv__", |
|
"__gla__", |
|
"__gle__", |
|
"__glg__", |
|
"__grn__", |
|
"__guj__", |
|
"__hat__", |
|
"__hau__", |
|
"__heb__", |
|
"__hin__", |
|
"__hne__", |
|
"__hrv__", |
|
"__hun__", |
|
"__hye__", |
|
"__ibo__", |
|
"__ilo__", |
|
"__ind__", |
|
"__isl__", |
|
"__ita__", |
|
"__jav__", |
|
"__jpn__", |
|
"__kab__", |
|
"__kac__", |
|
"__kam__", |
|
"__kan__", |
|
"__kas__", |
|
"__kas_Deva__", |
|
"__kat__", |
|
"__knc__", |
|
"__knc_Latn__", |
|
"__kaz__", |
|
"__kbp__", |
|
"__kea__", |
|
"__khm__", |
|
"__kik__", |
|
"__kin__", |
|
"__kir__", |
|
"__kmb__", |
|
"__kon__", |
|
"__kor__", |
|
"__kmr__", |
|
"__lao__", |
|
"__lvs__", |
|
"__lij__", |
|
"__lim__", |
|
"__lin__", |
|
"__lit__", |
|
"__lmo__", |
|
"__ltg__", |
|
"__ltz__", |
|
"__lua__", |
|
"__lug__", |
|
"__luo__", |
|
"__lus__", |
|
"__mag__", |
|
"__mai__", |
|
"__mal__", |
|
"__mar__", |
|
"__min__", |
|
"__mkd__", |
|
"__plt__", |
|
"__mlt__", |
|
"__mni__", |
|
"__khk__", |
|
"__mos__", |
|
"__mri__", |
|
"__zsm__", |
|
"__mya__", |
|
"__nld__", |
|
"__nno__", |
|
"__nob__", |
|
"__npi__", |
|
"__nso__", |
|
"__nus__", |
|
"__nya__", |
|
"__oci__", |
|
"__gaz__", |
|
"__ory__", |
|
"__pag__", |
|
"__pan__", |
|
"__pap__", |
|
"__pol__", |
|
"__por__", |
|
"__prs__", |
|
"__pbt__", |
|
"__quy__", |
|
"__ron__", |
|
"__run__", |
|
"__rus__", |
|
"__sag__", |
|
"__san__", |
|
"__sat__", |
|
"__scn__", |
|
"__shn__", |
|
"__sin__", |
|
"__slk__", |
|
"__slv__", |
|
"__smo__", |
|
"__sna__", |
|
"__snd__", |
|
"__som__", |
|
"__sot__", |
|
"__spa__", |
|
"__als__", |
|
"__srd__", |
|
"__srp__", |
|
"__ssw__", |
|
"__sun__", |
|
"__swe__", |
|
"__swh__", |
|
"__szl__", |
|
"__tam__", |
|
"__tat__", |
|
"__tel__", |
|
"__tgk__", |
|
"__tgl__", |
|
"__tha__", |
|
"__tir__", |
|
"__taq__", |
|
"__taq_Tfng__", |
|
"__tpi__", |
|
"__tsn__", |
|
"__tso__", |
|
"__tuk__", |
|
"__tum__", |
|
"__tur__", |
|
"__twi__", |
|
"__tzm__", |
|
"__uig__", |
|
"__ukr__", |
|
"__umb__", |
|
"__urd__", |
|
"__uzn__", |
|
"__vec__", |
|
"__vie__", |
|
"__war__", |
|
"__wol__", |
|
"__xho__", |
|
"__ydd__", |
|
"__yor__", |
|
"__yue__", |
|
"__cmn__", |
|
"__cmn_Hant__", |
|
"__zul__" |
|
], |
|
"bos_token": "<s>", |
|
"clean_up_tokenization_spaces": true, |
|
"cls_token": "<s>", |
|
"eos_token": "</s>", |
|
"max_length": 128, |
|
"model_max_length": 1000000000000000019884624838656, |
|
"pad_to_multiple_of": 2, |
|
"pad_token": "<pad>", |
|
"pad_token_type_id": 0, |
|
"padding_side": "right", |
|
"processor_class": "SeamlessM4TProcessor", |
|
"sep_token": "</s>", |
|
"sp_model_kwargs": {}, |
|
"src_lang": "__eng__", |
|
"stride": 0, |
|
"tgt_lang": "__fra__", |
|
"tokenizer_class": "SeamlessM4TTokenizer", |
|
"truncation_side": "right", |
|
"truncation_strategy": "longest_first", |
|
"unk_token": "<unk>" |
|
} |
|
|