hf-seamless-m4t-medium-en-tw-3-ep / tokenizer_config.json
lukmanaj's picture
Upload tokenizer
deda80d verified
{
"add_prefix_space": true,
"added_tokens_decoder": {
"0": {
"content": "<pad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"256001": {
"content": "__ace__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256002": {
"content": "__ace_Latn__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256003": {
"content": "__acm__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256004": {
"content": "__acq__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256005": {
"content": "__aeb__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256006": {
"content": "__afr__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256007": {
"content": "__ajp__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256008": {
"content": "__aka__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256009": {
"content": "__amh__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256010": {
"content": "__apc__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256011": {
"content": "__arb__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256012": {
"content": "__ars__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256013": {
"content": "__ary__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256014": {
"content": "__arz__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256015": {
"content": "__asm__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256016": {
"content": "__ast__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256017": {
"content": "__awa__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256018": {
"content": "__ayr__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256019": {
"content": "__azb__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256020": {
"content": "__azj__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256021": {
"content": "__bak__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256022": {
"content": "__bam__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256023": {
"content": "__ban__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256024": {
"content": "__bel__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256025": {
"content": "__bem__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256026": {
"content": "__ben__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256027": {
"content": "__bho__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256028": {
"content": "__bjn__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256029": {
"content": "__bjn_Latn__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256030": {
"content": "__bod__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256031": {
"content": "__bos__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256032": {
"content": "__bug__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256033": {
"content": "__bul__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256034": {
"content": "__cat__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256035": {
"content": "__ceb__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256036": {
"content": "__ces__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256037": {
"content": "__cjk__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256038": {
"content": "__ckb__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256039": {
"content": "__crh__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256040": {
"content": "__cym__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256041": {
"content": "__dan__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256042": {
"content": "__deu__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256043": {
"content": "__dik__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256044": {
"content": "__dyu__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256045": {
"content": "__dzo__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256046": {
"content": "__ell__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256047": {
"content": "__eng__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256048": {
"content": "__epo__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256049": {
"content": "__est__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256050": {
"content": "__eus__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256051": {
"content": "__ewe__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256052": {
"content": "__fao__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256053": {
"content": "__pes__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256054": {
"content": "__fij__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256055": {
"content": "__fin__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256056": {
"content": "__fon__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256057": {
"content": "__fra__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256058": {
"content": "__fur__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256059": {
"content": "__fuv__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256060": {
"content": "__gla__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256061": {
"content": "__gle__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256062": {
"content": "__glg__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256063": {
"content": "__grn__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256064": {
"content": "__guj__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256065": {
"content": "__hat__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256066": {
"content": "__hau__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256067": {
"content": "__heb__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256068": {
"content": "__hin__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256069": {
"content": "__hne__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256070": {
"content": "__hrv__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256071": {
"content": "__hun__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256072": {
"content": "__hye__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256073": {
"content": "__ibo__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256074": {
"content": "__ilo__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256075": {
"content": "__ind__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256076": {
"content": "__isl__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256077": {
"content": "__ita__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256078": {
"content": "__jav__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256079": {
"content": "__jpn__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256080": {
"content": "__kab__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256081": {
"content": "__kac__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256082": {
"content": "__kam__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256083": {
"content": "__kan__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256084": {
"content": "__kas__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256085": {
"content": "__kas_Deva__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256086": {
"content": "__kat__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256087": {
"content": "__knc__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256088": {
"content": "__knc_Latn__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256089": {
"content": "__kaz__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256090": {
"content": "__kbp__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256091": {
"content": "__kea__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256092": {
"content": "__khm__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256093": {
"content": "__kik__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256094": {
"content": "__kin__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256095": {
"content": "__kir__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256096": {
"content": "__kmb__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256097": {
"content": "__kon__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256098": {
"content": "__kor__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256099": {
"content": "__kmr__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256100": {
"content": "__lao__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256101": {
"content": "__lvs__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256102": {
"content": "__lij__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256103": {
"content": "__lim__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256104": {
"content": "__lin__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256105": {
"content": "__lit__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256106": {
"content": "__lmo__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256107": {
"content": "__ltg__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256108": {
"content": "__ltz__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256109": {
"content": "__lua__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256110": {
"content": "__lug__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256111": {
"content": "__luo__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256112": {
"content": "__lus__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256113": {
"content": "__mag__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256114": {
"content": "__mai__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256115": {
"content": "__mal__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256116": {
"content": "__mar__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256117": {
"content": "__min__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256118": {
"content": "__mkd__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256119": {
"content": "__plt__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256120": {
"content": "__mlt__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256121": {
"content": "__mni__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256122": {
"content": "__khk__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256123": {
"content": "__mos__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256124": {
"content": "__mri__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256125": {
"content": "__zsm__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256126": {
"content": "__mya__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256127": {
"content": "__nld__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256128": {
"content": "__nno__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256129": {
"content": "__nob__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256130": {
"content": "__npi__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256131": {
"content": "__nso__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256132": {
"content": "__nus__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256133": {
"content": "__nya__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256134": {
"content": "__oci__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256135": {
"content": "__gaz__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256136": {
"content": "__ory__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256137": {
"content": "__pag__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256138": {
"content": "__pan__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256139": {
"content": "__pap__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256140": {
"content": "__pol__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256141": {
"content": "__por__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256142": {
"content": "__prs__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256143": {
"content": "__pbt__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256144": {
"content": "__quy__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256145": {
"content": "__ron__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256146": {
"content": "__run__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256147": {
"content": "__rus__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256148": {
"content": "__sag__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256149": {
"content": "__san__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256150": {
"content": "__sat__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256151": {
"content": "__scn__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256152": {
"content": "__shn__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256153": {
"content": "__sin__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256154": {
"content": "__slk__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256155": {
"content": "__slv__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256156": {
"content": "__smo__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256157": {
"content": "__sna__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256158": {
"content": "__snd__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256159": {
"content": "__som__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256160": {
"content": "__sot__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256161": {
"content": "__spa__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256162": {
"content": "__als__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256163": {
"content": "__srd__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256164": {
"content": "__srp__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256165": {
"content": "__ssw__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256166": {
"content": "__sun__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256167": {
"content": "__swe__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256168": {
"content": "__swh__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256169": {
"content": "__szl__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256170": {
"content": "__tam__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256171": {
"content": "__tat__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256172": {
"content": "__tel__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256173": {
"content": "__tgk__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256174": {
"content": "__tgl__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256175": {
"content": "__tha__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256176": {
"content": "__tir__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256177": {
"content": "__taq__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256178": {
"content": "__taq_Tfng__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256179": {
"content": "__tpi__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256180": {
"content": "__tsn__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256181": {
"content": "__tso__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256182": {
"content": "__tuk__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256183": {
"content": "__tum__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256184": {
"content": "__tur__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256185": {
"content": "__twi__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256186": {
"content": "__tzm__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256187": {
"content": "__uig__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256188": {
"content": "__ukr__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256189": {
"content": "__umb__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256190": {
"content": "__urd__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256191": {
"content": "__uzn__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256192": {
"content": "__vec__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256193": {
"content": "__vie__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256194": {
"content": "__war__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256195": {
"content": "__wol__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256196": {
"content": "__xho__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256197": {
"content": "__ydd__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256198": {
"content": "__yor__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256199": {
"content": "__yue__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256200": {
"content": "__cmn__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256201": {
"content": "__cmn_Hant__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"256202": {
"content": "__zul__",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [
"<pad>",
"<unk>",
"<s>",
"</s>",
"__ace__",
"__ace_Latn__",
"__acm__",
"__acq__",
"__aeb__",
"__afr__",
"__ajp__",
"__aka__",
"__amh__",
"__apc__",
"__arb__",
"__ars__",
"__ary__",
"__arz__",
"__asm__",
"__ast__",
"__awa__",
"__ayr__",
"__azb__",
"__azj__",
"__bak__",
"__bam__",
"__ban__",
"__bel__",
"__bem__",
"__ben__",
"__bho__",
"__bjn__",
"__bjn_Latn__",
"__bod__",
"__bos__",
"__bug__",
"__bul__",
"__cat__",
"__ceb__",
"__ces__",
"__cjk__",
"__ckb__",
"__crh__",
"__cym__",
"__dan__",
"__deu__",
"__dik__",
"__dyu__",
"__dzo__",
"__ell__",
"__eng__",
"__epo__",
"__est__",
"__eus__",
"__ewe__",
"__fao__",
"__pes__",
"__fij__",
"__fin__",
"__fon__",
"__fra__",
"__fur__",
"__fuv__",
"__gla__",
"__gle__",
"__glg__",
"__grn__",
"__guj__",
"__hat__",
"__hau__",
"__heb__",
"__hin__",
"__hne__",
"__hrv__",
"__hun__",
"__hye__",
"__ibo__",
"__ilo__",
"__ind__",
"__isl__",
"__ita__",
"__jav__",
"__jpn__",
"__kab__",
"__kac__",
"__kam__",
"__kan__",
"__kas__",
"__kas_Deva__",
"__kat__",
"__knc__",
"__knc_Latn__",
"__kaz__",
"__kbp__",
"__kea__",
"__khm__",
"__kik__",
"__kin__",
"__kir__",
"__kmb__",
"__kon__",
"__kor__",
"__kmr__",
"__lao__",
"__lvs__",
"__lij__",
"__lim__",
"__lin__",
"__lit__",
"__lmo__",
"__ltg__",
"__ltz__",
"__lua__",
"__lug__",
"__luo__",
"__lus__",
"__mag__",
"__mai__",
"__mal__",
"__mar__",
"__min__",
"__mkd__",
"__plt__",
"__mlt__",
"__mni__",
"__khk__",
"__mos__",
"__mri__",
"__zsm__",
"__mya__",
"__nld__",
"__nno__",
"__nob__",
"__npi__",
"__nso__",
"__nus__",
"__nya__",
"__oci__",
"__gaz__",
"__ory__",
"__pag__",
"__pan__",
"__pap__",
"__pol__",
"__por__",
"__prs__",
"__pbt__",
"__quy__",
"__ron__",
"__run__",
"__rus__",
"__sag__",
"__san__",
"__sat__",
"__scn__",
"__shn__",
"__sin__",
"__slk__",
"__slv__",
"__smo__",
"__sna__",
"__snd__",
"__som__",
"__sot__",
"__spa__",
"__als__",
"__srd__",
"__srp__",
"__ssw__",
"__sun__",
"__swe__",
"__swh__",
"__szl__",
"__tam__",
"__tat__",
"__tel__",
"__tgk__",
"__tgl__",
"__tha__",
"__tir__",
"__taq__",
"__taq_Tfng__",
"__tpi__",
"__tsn__",
"__tso__",
"__tuk__",
"__tum__",
"__tur__",
"__twi__",
"__tzm__",
"__uig__",
"__ukr__",
"__umb__",
"__urd__",
"__uzn__",
"__vec__",
"__vie__",
"__war__",
"__wol__",
"__xho__",
"__ydd__",
"__yor__",
"__yue__",
"__cmn__",
"__cmn_Hant__",
"__zul__"
],
"bos_token": "<s>",
"clean_up_tokenization_spaces": true,
"cls_token": "<s>",
"eos_token": "</s>",
"max_length": 128,
"model_max_length": 1000000000000000019884624838656,
"pad_to_multiple_of": 2,
"pad_token": "<pad>",
"pad_token_type_id": 0,
"padding_side": "right",
"processor_class": "SeamlessM4TProcessor",
"sep_token": "</s>",
"sp_model_kwargs": {},
"src_lang": "__eng__",
"stride": 0,
"tgt_lang": "__fra__",
"tokenizer_class": "SeamlessM4TTokenizer",
"truncation_side": "right",
"truncation_strategy": "longest_first",
"unk_token": "<unk>"
}