axiado commited on
Commit
ba2098d
·
1 Parent(s): ffec329

add tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -0
  2. tokenizer.json +290 -0
  3. tokenizer_config.json +1 -0
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "[end]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "mask_token": "[MASK]"}
tokenizer.json ADDED
@@ -0,0 +1,290 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 1,
8
+ "content": "[UNK]",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 234,
17
+ "content": "[PAD]",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 235,
26
+ "content": "[MASK]",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 236,
35
+ "content": "[SEP]",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ }
42
+ ],
43
+ "normalizer": null,
44
+ "pre_tokenizer": null,
45
+ "post_processor": null,
46
+ "decoder": null,
47
+ "model": {
48
+ "type": "WordPiece",
49
+ "unk_token": "[UNK]",
50
+ "continuing_subword_prefix": "##",
51
+ "max_input_chars_per_word": 100,
52
+ "vocab": {
53
+ "": 0,
54
+ "[UNK]": 1,
55
+ "[start]": 2,
56
+ "[end]": 3,
57
+ "malicious": 4,
58
+ "benign": 5,
59
+ "alert": 6,
60
+ "server_name": 7,
61
+ "sequence": 8,
62
+ "TLS": 9,
63
+ "cipher": 10,
64
+ "change_cipher_spec": 11,
65
+ "client_key_length": 12,
66
+ "c0": 13,
67
+ "s0": 14,
68
+ "c1": 15,
69
+ "s1": 16,
70
+ "c2": 17,
71
+ "s2": 18,
72
+ "c3": 19,
73
+ "s3": 20,
74
+ "c4": 21,
75
+ "s4": 22,
76
+ "c5": 23,
77
+ "s5": 24,
78
+ "c6": 25,
79
+ "s6": 26,
80
+ "c7": 27,
81
+ "s7": 28,
82
+ "c8": 29,
83
+ "s8": 30,
84
+ "c9": 31,
85
+ "s9": 32,
86
+ "c10": 33,
87
+ "s10": 34,
88
+ "c11": 35,
89
+ "s11": 36,
90
+ "c12": 37,
91
+ "s12": 38,
92
+ "c13": 39,
93
+ "s13": 40,
94
+ "c14": 41,
95
+ "s14": 42,
96
+ "c15": 43,
97
+ "s15": 44,
98
+ "c16": 45,
99
+ "s16": 46,
100
+ "l<1": 47,
101
+ "l:1": 48,
102
+ "l:2": 49,
103
+ "l:3": 50,
104
+ "l:4": 51,
105
+ "l:5": 52,
106
+ "l:6": 53,
107
+ "l:7": 54,
108
+ "l:8": 55,
109
+ "l:9": 56,
110
+ "l:10": 57,
111
+ "l:11": 58,
112
+ "l:12": 59,
113
+ "l:13": 60,
114
+ "l:14": 61,
115
+ "l:15": 62,
116
+ "l:16": 63,
117
+ "l:17": 64,
118
+ "l:18": 65,
119
+ "l:19": 66,
120
+ "l:20": 67,
121
+ "l>20": 68,
122
+ "l>10": 69,
123
+ "a": 70,
124
+ "b": 71,
125
+ "c": 72,
126
+ "d": 73,
127
+ "e": 74,
128
+ "f": 75,
129
+ "g": 76,
130
+ "h": 77,
131
+ "i": 78,
132
+ "j": 79,
133
+ "k": 80,
134
+ "l": 81,
135
+ "m": 82,
136
+ "n": 83,
137
+ "o": 84,
138
+ "p": 85,
139
+ "q": 86,
140
+ "r": 87,
141
+ "s": 88,
142
+ "t": 89,
143
+ "u": 90,
144
+ "v": 91,
145
+ "w": 92,
146
+ "x": 93,
147
+ "y": 94,
148
+ "z": 95,
149
+ "0": 96,
150
+ "1": 97,
151
+ "2": 98,
152
+ "3": 99,
153
+ "4": 100,
154
+ "5": 101,
155
+ "6": 102,
156
+ "7": 103,
157
+ "8": 104,
158
+ "9": 105,
159
+ ".": 106,
160
+ "-": 107,
161
+ "SSLv2": 108,
162
+ "SSLv3": 109,
163
+ "TLS1.0": 110,
164
+ "TLS1.1": 111,
165
+ "TLS1.2": 112,
166
+ "TLS1.3": 113,
167
+ "TLS1.3-d18": 114,
168
+ "TLS1.3-d19": 115,
169
+ "TLS_RSA_WITH_RC4_128_MD5": 116,
170
+ "TLS_RSA_WITH_RC4_128_SHA": 117,
171
+ "TLS_RSA_WITH_3DES_EDE_CBC_SHA": 118,
172
+ "TLS_DHE_DSS_WITH_3DES_EDE_CBC_SHA": 119,
173
+ "TLS_DHE_RSA_WITH_DES_CBC_SHA": 120,
174
+ "TLS_RSA_WITH_AES_128_CBC_SHA": 121,
175
+ "TLS_DHE_DSS_WITH_AES_128_CBC_SHA": 122,
176
+ "TLS_DHE_RSA_WITH_AES_128_CBC_SHA": 123,
177
+ "TLS_RSA_WITH_AES_256_CBC_SHA": 124,
178
+ "TLS_DHE_DSS_WITH_AES_256_CBC_SHA": 125,
179
+ "TLS_DHE_RSA_WITH_AES_256_CBC_SHA": 126,
180
+ "TLS_RSA_WITH_AES_128_CBC_SHA256": 127,
181
+ "TLS_RSA_WITH_AES_256_CBC_SHA256": 128,
182
+ "TLS_DHE_DSS_WITH_AES_128_CBC_SHA256": 129,
183
+ "TLS_RSA_WITH_CAMELLIA_128_CBC_SHA": 130,
184
+ "TLS_DHE_DSS_WITH_CAMELLIA_128_CBC_SHA": 131,
185
+ "TLS_DHE_RSA_WITH_CAMELLIA_128_CBC_SHA": 132,
186
+ "SSL_RSA_EXPORT1024_WITH_RC4_56_SHA": 133,
187
+ "TLS_DHE_RSA_WITH_AES_128_CBC_SHA256": 134,
188
+ "TLS_DHE_DSS_WITH_AES_256_CBC_SHA256": 135,
189
+ "TLS_DHE_RSA_WITH_AES_256_CBC_SHA256": 136,
190
+ "TLS_RSA_WITH_CAMELLIA_256_CBC_SHA": 137,
191
+ "TLS_DHE_DSS_WITH_CAMELLIA_256_CBC_SHA": 138,
192
+ "TLS_DHE_RSA_WITH_CAMELLIA_256_CBC_SHA": 139,
193
+ "TLS_RSA_WITH_SEED_CBC_SHA": 140,
194
+ "TLS_RSA_WITH_AES_128_GCM_SHA256": 141,
195
+ "TLS_RSA_WITH_AES_256_GCM_SHA384": 142,
196
+ "TLS_DHE_RSA_WITH_AES_128_GCM_SHA256": 143,
197
+ "TLS_DHE_RSA_WITH_AES_256_GCM_SHA384": 144,
198
+ "TLS_DHE_DSS_WITH_AES_128_GCM_SHA256": 145,
199
+ "TLS_DHE_DSS_WITH_AES_256_GCM_SHA384": 146,
200
+ "TLS_DHE_PSK_WITH_AES_128_GCM_SHA256": 147,
201
+ "TLS_DHE_PSK_WITH_AES_256_GCM_SHA384": 148,
202
+ "TLS_AES_128_GCM_SHA256": 149,
203
+ "TLS_AES_256_GCM_SHA384": 150,
204
+ "TLS_CHACHA20_POLY1305_SHA256": 151,
205
+ "TLS_AES_128_CCM_SHA256": 152,
206
+ "TLS_ECDHE_ECDSA_WITH_RC4_128_SHA": 153,
207
+ "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA": 154,
208
+ "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA": 155,
209
+ "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA": 156,
210
+ "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA": 157,
211
+ "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256": 158,
212
+ "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384": 159,
213
+ "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256": 160,
214
+ "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384": 161,
215
+ "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256": 162,
216
+ "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384": 163,
217
+ "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256": 164,
218
+ "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384": 165,
219
+ "TLS_DHE_RSA_WITH_AES_128_CCM": 166,
220
+ "TLS_DHE_RSA_WITH_AES_256_CCM": 167,
221
+ "TLS_DHE_RSA_WITH_AES_128_CCM_8": 168,
222
+ "TLS_DHE_RSA_WITH_AES_256_CCM_8": 169,
223
+ "TLS_DHE_PSK_WITH_AES_128_CCM": 170,
224
+ "TLS_DHE_PSK_WITH_AES_256_CCM": 171,
225
+ "TLS_ECDHE_ECDSA_WITH_AES_128_CCM": 172,
226
+ "TLS_ECDHE_ECDSA_WITH_AES_256_CCM": 173,
227
+ "TLS_ECDHE_ECDSA_WITH_AES_128_CCM_8": 174,
228
+ "TLS_ECDHE_ECDSA_WITH_AES_256_CCM_8": 175,
229
+ "TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256": 176,
230
+ "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256": 177,
231
+ "TLS_DHE_RSA_WITH_CHACHA20_POLY1305_SHA256": 178,
232
+ "TLS_ECDHE_PSK_WITH_CHACHA20_POLY1305_SHA256": 179,
233
+ "TLS_DHE_PSK_WITH_CHACHA20_POLY1305_SHA256": 180,
234
+ "TLS_ECDHE_PSK_WITH_AES_128_GCM_SHA256": 181,
235
+ "TLS_ECDHE_PSK_WITH_AES_256_GCM_SHA384": 182,
236
+ "TLS_ECDHE_PSK_WITH_AES_128_CCM_SHA256": 183,
237
+ "FIN": 184,
238
+ "SYN": 185,
239
+ "RST": 186,
240
+ "PSH": 187,
241
+ "ACK": 188,
242
+ "URG": 189,
243
+ "ECE": 190,
244
+ "CWR": 191,
245
+ "ramnit": 192,
246
+ "crthrazy": 193,
247
+ "nymaim": 194,
248
+ "bunitu": 195,
249
+ "zeus": 196,
250
+ "azorult": 197,
251
+ "parite": 198,
252
+ "vawtrak": 199,
253
+ "reposfxg": 200,
254
+ "zeus-panda": 201,
255
+ "gandcrab": 202,
256
+ "bankerx": 203,
257
+ "gootkit": 204,
258
+ "dridex": 205,
259
+ "upatre": 206,
260
+ "qakbot": 207,
261
+ "chthonic": 208,
262
+ "emotet": 209,
263
+ "troldesh": 210,
264
+ "kovter": 211,
265
+ "boleto": 212,
266
+ "hancitor": 213,
267
+ "remcos": 214,
268
+ "trickbot": 215,
269
+ "rig": 216,
270
+ "tofsee": 217,
271
+ "neutrino": 218,
272
+ "icedid": 219,
273
+ "dreambot": 220,
274
+ "miuref": 221,
275
+ "crypt": 222,
276
+ "cerber": 223,
277
+ "unclassified": 224,
278
+ "sigma": 225,
279
+ "spora": 226,
280
+ "locky": 227,
281
+ "fallout": 228,
282
+ "banload": 229,
283
+ "globeimposter": 230,
284
+ "angler": 231,
285
+ "ursnif": 232,
286
+ "?": 233,
287
+ "[PAD]": 234
288
+ }
289
+ }
290
+ }
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"max_len": 256, "tokenizer_class": "PreTrainedTokenizerFast"}