|
{ |
|
"additional_special_tokens": [ |
|
"ar_AR", |
|
"cs_CZ", |
|
"de_DE", |
|
"en_XX", |
|
"es_XX", |
|
"et_EE", |
|
"fi_FI", |
|
"fr_XX", |
|
"gu_IN", |
|
"hi_IN", |
|
"it_IT", |
|
"ja_XX", |
|
"kk_KZ", |
|
"ko_KR", |
|
"lt_LT", |
|
"lv_LV", |
|
"my_MM", |
|
"ne_NP", |
|
"nl_XX", |
|
"ro_RO", |
|
"ru_RU", |
|
"si_LK", |
|
"tr_TR", |
|
"vi_VN", |
|
"zh_CN", |
|
"af_ZA", |
|
"az_AZ", |
|
"bn_IN", |
|
"fa_IR", |
|
"he_IL", |
|
"hr_HR", |
|
"id_ID", |
|
"ka_GE", |
|
"km_KH", |
|
"mk_MK", |
|
"ml_IN", |
|
"mn_MN", |
|
"mr_IN", |
|
"pl_PL", |
|
"ps_AF", |
|
"pt_XX", |
|
"sv_SE", |
|
"sw_KE", |
|
"ta_IN", |
|
"te_IN", |
|
"th_TH", |
|
"tl_XX", |
|
"uk_UA", |
|
"ur_PK", |
|
"xh_ZA", |
|
"gl_ES", |
|
"sl_SI" |
|
], |
|
"bos_token": "<s>", |
|
"cls_token": "<s>", |
|
"eos_token": "</s>", |
|
"language_codes": "ML50", |
|
"mask_token": { |
|
"__type": "AddedToken", |
|
"content": "<mask>", |
|
"lstrip": true, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false |
|
}, |
|
"name_or_path": "nam194/augment_210k_then_sentence_length_zh_vi_epoch_1", |
|
"pad_token": "<pad>", |
|
"sep_token": "</s>", |
|
"sp_model_kwargs": {}, |
|
"special_tokens_map_file": "/home/suraj/projects/mbart-50/mbart-50/special_tokens_map.json", |
|
"src_lang": null, |
|
"tgt_lang": null, |
|
"tokenizer_class": "MBart50Tokenizer", |
|
"unk_token": "<unk>" |
|
} |
|
|