File size: 1,945 Bytes
9243af0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
{
  "additional_special_tokens": [
    "hin_Deva",
    "ben_Beng",
    "mar_Deva",
    "tel_Telu",
    "tam_Taml",
    "guj_Gujr",
    "kan_Knda",
    "mal_Mlym",
    "asm_Beng",
    "urd_Arab",
    "ory_Orya",
    "eng_Latn",
    "fra_Latn",
    "ita_Latn",
    "spa_Latn",
    "deu_Latn",
    "por_Latn",
    "kor_Hang",
    "tha_Thai",
    "arb_Arab",
    "vie_Latn",
    "rus_Cyrl",
    "jpn_Jpan",
    "tur_Latn",
    "ukr_Cyrl",
    "pes_Arab",
    "nld_Latn",
    "npi_Deva",
    "pol_Latn",
    "ind_Latn",
    "afr_Latn",
    "amh_Ethi",
    "hye_Armn",
    "bam_Latn",
    "bel_Cyrl",
    "bul_Cyrl",
    "zho_Hans",
    "ces_Latn",
    "dan_Latn",
    "fin_Latn",
    "ell_Grek",
    "heb_Hebr",
    "mri_Latn",
    "ron_Latn",
    "slv_Latn",
    "swe_Latn",
    "xho_Latn",
    "uig_Arab",
    "som_Latn",
    "pan_Guru",
    "hin_Latn",
    "ben_Latn",
    "tel_Latn",
    "tam_Latn",
    "guj_Latn",
    "kan_Latn",
    "arb_Latn",
    "rus_Latn",
    "jpn_Latn"
  ],
  "bos_token": {
    "content": "<s>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "cls_token": {
    "content": "<s>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "eos_token": {
    "content": "</s>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "mask_token": {
    "content": "<mask>",
    "lstrip": false,
    "normalized": true,
    "rstrip": false,
    "single_word": false
  },
  "pad_token": {
    "content": "<pad>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "sep_token": {
    "content": "</s>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "unk_token": {
    "content": "<unk>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  }
}