SharpAI commited on
Commit
1879102
·
1 Parent(s): 181671e

add tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +1 -290
  2. tokenizer_config.json +1 -1
tokenizer.json CHANGED
@@ -1,290 +1 @@
1
- {
2
- "version": "1.0",
3
- "truncation": null,
4
- "padding": null,
5
- "added_tokens": [
6
- {
7
- "id": 1,
8
- "content": "[UNK]",
9
- "single_word": false,
10
- "lstrip": false,
11
- "rstrip": false,
12
- "normalized": false,
13
- "special": true
14
- },
15
- {
16
- "id": 234,
17
- "content": "[PAD]",
18
- "single_word": false,
19
- "lstrip": false,
20
- "rstrip": false,
21
- "normalized": false,
22
- "special": true
23
- },
24
- {
25
- "id": 235,
26
- "content": "[MASK]",
27
- "single_word": false,
28
- "lstrip": false,
29
- "rstrip": false,
30
- "normalized": false,
31
- "special": true
32
- },
33
- {
34
- "id": 236,
35
- "content": "[SEP]",
36
- "single_word": false,
37
- "lstrip": false,
38
- "rstrip": false,
39
- "normalized": false,
40
- "special": true
41
- }
42
- ],
43
- "normalizer": null,
44
- "pre_tokenizer": null,
45
- "post_processor": null,
46
- "decoder": null,
47
- "model": {
48
- "type": "WordPiece",
49
- "unk_token": "[UNK]",
50
- "continuing_subword_prefix": "##",
51
- "max_input_chars_per_word": 100,
52
- "vocab": {
53
- "": 0,
54
- "[UNK]": 1,
55
- "[start]": 2,
56
- "[end]": 3,
57
- "malicious": 4,
58
- "benign": 5,
59
- "alert": 6,
60
- "server_name": 7,
61
- "sequence": 8,
62
- "TLS": 9,
63
- "cipher": 10,
64
- "change_cipher_spec": 11,
65
- "client_key_length": 12,
66
- "c0": 13,
67
- "s0": 14,
68
- "c1": 15,
69
- "s1": 16,
70
- "c2": 17,
71
- "s2": 18,
72
- "c3": 19,
73
- "s3": 20,
74
- "c4": 21,
75
- "s4": 22,
76
- "c5": 23,
77
- "s5": 24,
78
- "c6": 25,
79
- "s6": 26,
80
- "c7": 27,
81
- "s7": 28,
82
- "c8": 29,
83
- "s8": 30,
84
- "c9": 31,
85
- "s9": 32,
86
- "c10": 33,
87
- "s10": 34,
88
- "c11": 35,
89
- "s11": 36,
90
- "c12": 37,
91
- "s12": 38,
92
- "c13": 39,
93
- "s13": 40,
94
- "c14": 41,
95
- "s14": 42,
96
- "c15": 43,
97
- "s15": 44,
98
- "c16": 45,
99
- "s16": 46,
100
- "l<1": 47,
101
- "l:1": 48,
102
- "l:2": 49,
103
- "l:3": 50,
104
- "l:4": 51,
105
- "l:5": 52,
106
- "l:6": 53,
107
- "l:7": 54,
108
- "l:8": 55,
109
- "l:9": 56,
110
- "l:10": 57,
111
- "l:11": 58,
112
- "l:12": 59,
113
- "l:13": 60,
114
- "l:14": 61,
115
- "l:15": 62,
116
- "l:16": 63,
117
- "l:17": 64,
118
- "l:18": 65,
119
- "l:19": 66,
120
- "l:20": 67,
121
- "l>20": 68,
122
- "l>10": 69,
123
- "a": 70,
124
- "b": 71,
125
- "c": 72,
126
- "d": 73,
127
- "e": 74,
128
- "f": 75,
129
- "g": 76,
130
- "h": 77,
131
- "i": 78,
132
- "j": 79,
133
- "k": 80,
134
- "l": 81,
135
- "m": 82,
136
- "n": 83,
137
- "o": 84,
138
- "p": 85,
139
- "q": 86,
140
- "r": 87,
141
- "s": 88,
142
- "t": 89,
143
- "u": 90,
144
- "v": 91,
145
- "w": 92,
146
- "x": 93,
147
- "y": 94,
148
- "z": 95,
149
- "0": 96,
150
- "1": 97,
151
- "2": 98,
152
- "3": 99,
153
- "4": 100,
154
- "5": 101,
155
- "6": 102,
156
- "7": 103,
157
- "8": 104,
158
- "9": 105,
159
- ".": 106,
160
- "-": 107,
161
- "SSLv2": 108,
162
- "SSLv3": 109,
163
- "TLS1.0": 110,
164
- "TLS1.1": 111,
165
- "TLS1.2": 112,
166
- "TLS1.3": 113,
167
- "TLS1.3-d18": 114,
168
- "TLS1.3-d19": 115,
169
- "TLS_RSA_WITH_RC4_128_MD5": 116,
170
- "TLS_RSA_WITH_RC4_128_SHA": 117,
171
- "TLS_RSA_WITH_3DES_EDE_CBC_SHA": 118,
172
- "TLS_DHE_DSS_WITH_3DES_EDE_CBC_SHA": 119,
173
- "TLS_DHE_RSA_WITH_DES_CBC_SHA": 120,
174
- "TLS_RSA_WITH_AES_128_CBC_SHA": 121,
175
- "TLS_DHE_DSS_WITH_AES_128_CBC_SHA": 122,
176
- "TLS_DHE_RSA_WITH_AES_128_CBC_SHA": 123,
177
- "TLS_RSA_WITH_AES_256_CBC_SHA": 124,
178
- "TLS_DHE_DSS_WITH_AES_256_CBC_SHA": 125,
179
- "TLS_DHE_RSA_WITH_AES_256_CBC_SHA": 126,
180
- "TLS_RSA_WITH_AES_128_CBC_SHA256": 127,
181
- "TLS_RSA_WITH_AES_256_CBC_SHA256": 128,
182
- "TLS_DHE_DSS_WITH_AES_128_CBC_SHA256": 129,
183
- "TLS_RSA_WITH_CAMELLIA_128_CBC_SHA": 130,
184
- "TLS_DHE_DSS_WITH_CAMELLIA_128_CBC_SHA": 131,
185
- "TLS_DHE_RSA_WITH_CAMELLIA_128_CBC_SHA": 132,
186
- "SSL_RSA_EXPORT1024_WITH_RC4_56_SHA": 133,
187
- "TLS_DHE_RSA_WITH_AES_128_CBC_SHA256": 134,
188
- "TLS_DHE_DSS_WITH_AES_256_CBC_SHA256": 135,
189
- "TLS_DHE_RSA_WITH_AES_256_CBC_SHA256": 136,
190
- "TLS_RSA_WITH_CAMELLIA_256_CBC_SHA": 137,
191
- "TLS_DHE_DSS_WITH_CAMELLIA_256_CBC_SHA": 138,
192
- "TLS_DHE_RSA_WITH_CAMELLIA_256_CBC_SHA": 139,
193
- "TLS_RSA_WITH_SEED_CBC_SHA": 140,
194
- "TLS_RSA_WITH_AES_128_GCM_SHA256": 141,
195
- "TLS_RSA_WITH_AES_256_GCM_SHA384": 142,
196
- "TLS_DHE_RSA_WITH_AES_128_GCM_SHA256": 143,
197
- "TLS_DHE_RSA_WITH_AES_256_GCM_SHA384": 144,
198
- "TLS_DHE_DSS_WITH_AES_128_GCM_SHA256": 145,
199
- "TLS_DHE_DSS_WITH_AES_256_GCM_SHA384": 146,
200
- "TLS_DHE_PSK_WITH_AES_128_GCM_SHA256": 147,
201
- "TLS_DHE_PSK_WITH_AES_256_GCM_SHA384": 148,
202
- "TLS_AES_128_GCM_SHA256": 149,
203
- "TLS_AES_256_GCM_SHA384": 150,
204
- "TLS_CHACHA20_POLY1305_SHA256": 151,
205
- "TLS_AES_128_CCM_SHA256": 152,
206
- "TLS_ECDHE_ECDSA_WITH_RC4_128_SHA": 153,
207
- "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA": 154,
208
- "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA": 155,
209
- "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA": 156,
210
- "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA": 157,
211
- "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256": 158,
212
- "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384": 159,
213
- "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256": 160,
214
- "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384": 161,
215
- "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256": 162,
216
- "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384": 163,
217
- "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256": 164,
218
- "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384": 165,
219
- "TLS_DHE_RSA_WITH_AES_128_CCM": 166,
220
- "TLS_DHE_RSA_WITH_AES_256_CCM": 167,
221
- "TLS_DHE_RSA_WITH_AES_128_CCM_8": 168,
222
- "TLS_DHE_RSA_WITH_AES_256_CCM_8": 169,
223
- "TLS_DHE_PSK_WITH_AES_128_CCM": 170,
224
- "TLS_DHE_PSK_WITH_AES_256_CCM": 171,
225
- "TLS_ECDHE_ECDSA_WITH_AES_128_CCM": 172,
226
- "TLS_ECDHE_ECDSA_WITH_AES_256_CCM": 173,
227
- "TLS_ECDHE_ECDSA_WITH_AES_128_CCM_8": 174,
228
- "TLS_ECDHE_ECDSA_WITH_AES_256_CCM_8": 175,
229
- "TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256": 176,
230
- "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256": 177,
231
- "TLS_DHE_RSA_WITH_CHACHA20_POLY1305_SHA256": 178,
232
- "TLS_ECDHE_PSK_WITH_CHACHA20_POLY1305_SHA256": 179,
233
- "TLS_DHE_PSK_WITH_CHACHA20_POLY1305_SHA256": 180,
234
- "TLS_ECDHE_PSK_WITH_AES_128_GCM_SHA256": 181,
235
- "TLS_ECDHE_PSK_WITH_AES_256_GCM_SHA384": 182,
236
- "TLS_ECDHE_PSK_WITH_AES_128_CCM_SHA256": 183,
237
- "FIN": 184,
238
- "SYN": 185,
239
- "RST": 186,
240
- "PSH": 187,
241
- "ACK": 188,
242
- "URG": 189,
243
- "ECE": 190,
244
- "CWR": 191,
245
- "ramnit": 192,
246
- "crthrazy": 193,
247
- "nymaim": 194,
248
- "bunitu": 195,
249
- "zeus": 196,
250
- "azorult": 197,
251
- "parite": 198,
252
- "vawtrak": 199,
253
- "reposfxg": 200,
254
- "zeus-panda": 201,
255
- "gandcrab": 202,
256
- "bankerx": 203,
257
- "gootkit": 204,
258
- "dridex": 205,
259
- "upatre": 206,
260
- "qakbot": 207,
261
- "chthonic": 208,
262
- "emotet": 209,
263
- "troldesh": 210,
264
- "kovter": 211,
265
- "boleto": 212,
266
- "hancitor": 213,
267
- "remcos": 214,
268
- "trickbot": 215,
269
- "rig": 216,
270
- "tofsee": 217,
271
- "neutrino": 218,
272
- "icedid": 219,
273
- "dreambot": 220,
274
- "miuref": 221,
275
- "crypt": 222,
276
- "cerber": 223,
277
- "unclassified": 224,
278
- "sigma": 225,
279
- "spora": 226,
280
- "locky": 227,
281
- "fallout": 228,
282
- "banload": 229,
283
- "globeimposter": 230,
284
- "angler": 231,
285
- "ursnif": 232,
286
- "?": 233,
287
- "[PAD]": 234
288
- }
289
- }
290
- }
 
1
+ {"version":"1.0","truncation":{"max_length":160,"strategy":"LongestFirst","stride":0},"padding":{"strategy":{"Fixed":160},"direction":"Right","pad_to_multiple_of":null,"pad_id":234,"pad_type_id":0,"pad_token":"[PAD]"},"added_tokens":[{"id":1,"special":true,"content":"[UNK]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":3,"special":true,"content":"[end]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":234,"special":true,"content":"[PAD]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":235,"special":true,"content":"[MASK]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":236,"special":true,"content":"[SEP]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false}],"normalizer":null,"pre_tokenizer":null,"post_processor":null,"decoder":null,"model":{"type":"WordPiece","unk_token":"[UNK]","continuing_subword_prefix":"##","max_input_chars_per_word":100,"vocab":{"":0,"[UNK]":1,"[start]":2,"[end]":3,"malicious":4,"benign":5,"alert":6,"server_name":7,"sequence":8,"TLS":9,"cipher":10,"change_cipher_spec":11,"client_key_length":12,"c0":13,"s0":14,"c1":15,"s1":16,"c2":17,"s2":18,"c3":19,"s3":20,"c4":21,"s4":22,"c5":23,"s5":24,"c6":25,"s6":26,"c7":27,"s7":28,"c8":29,"s8":30,"c9":31,"s9":32,"c10":33,"s10":34,"c11":35,"s11":36,"c12":37,"s12":38,"c13":39,"s13":40,"c14":41,"s14":42,"c15":43,"s15":44,"c16":45,"s16":46,"l<1":47,"l:1":48,"l:2":49,"l:3":50,"l:4":51,"l:5":52,"l:6":53,"l:7":54,"l:8":55,"l:9":56,"l:10":57,"l:11":58,"l:12":59,"l:13":60,"l:14":61,"l:15":62,"l:16":63,"l:17":64,"l:18":65,"l:19":66,"l:20":67,"l>20":68,"l>10":69,"a":70,"b":71,"c":72,"d":73,"e":74,"f":75,"g":76,"h":77,"i":78,"j":79,"k":80,"l":81,"m":82,"n":83,"o":84,"p":85,"q":86,"r":87,"s":88,"t":89,"u":90,"v":91,"w":92,"x":93,"y":94,"z":95,"0":96,"1":97,"2":98,"3":99,"4":100,"5":101,"6":102,"7":103,"8":104,"9":105,".":106,"-":107,"SSLv2":108,"SSLv3":109,"TLS1.0":110,"TLS1.1":111,"TLS1.2":112,"TLS1.3":113,"TLS1.3-d18":114,"TLS1.3-d19":115,"TLS_RSA_WITH_RC4_128_MD5":116,"TLS_RSA_WITH_RC4_128_SHA":117,"TLS_RSA_WITH_3DES_EDE_CBC_SHA":118,"TLS_DHE_DSS_WITH_3DES_EDE_CBC_SHA":119,"TLS_DHE_RSA_WITH_DES_CBC_SHA":120,"TLS_RSA_WITH_AES_128_CBC_SHA":121,"TLS_DHE_DSS_WITH_AES_128_CBC_SHA":122,"TLS_DHE_RSA_WITH_AES_128_CBC_SHA":123,"TLS_RSA_WITH_AES_256_CBC_SHA":124,"TLS_DHE_DSS_WITH_AES_256_CBC_SHA":125,"TLS_DHE_RSA_WITH_AES_256_CBC_SHA":126,"TLS_RSA_WITH_AES_128_CBC_SHA256":127,"TLS_RSA_WITH_AES_256_CBC_SHA256":128,"TLS_DHE_DSS_WITH_AES_128_CBC_SHA256":129,"TLS_RSA_WITH_CAMELLIA_128_CBC_SHA":130,"TLS_DHE_DSS_WITH_CAMELLIA_128_CBC_SHA":131,"TLS_DHE_RSA_WITH_CAMELLIA_128_CBC_SHA":132,"SSL_RSA_EXPORT1024_WITH_RC4_56_SHA":133,"TLS_DHE_RSA_WITH_AES_128_CBC_SHA256":134,"TLS_DHE_DSS_WITH_AES_256_CBC_SHA256":135,"TLS_DHE_RSA_WITH_AES_256_CBC_SHA256":136,"TLS_RSA_WITH_CAMELLIA_256_CBC_SHA":137,"TLS_DHE_DSS_WITH_CAMELLIA_256_CBC_SHA":138,"TLS_DHE_RSA_WITH_CAMELLIA_256_CBC_SHA":139,"TLS_RSA_WITH_SEED_CBC_SHA":140,"TLS_RSA_WITH_AES_128_GCM_SHA256":141,"TLS_RSA_WITH_AES_256_GCM_SHA384":142,"TLS_DHE_RSA_WITH_AES_128_GCM_SHA256":143,"TLS_DHE_RSA_WITH_AES_256_GCM_SHA384":144,"TLS_DHE_DSS_WITH_AES_128_GCM_SHA256":145,"TLS_DHE_DSS_WITH_AES_256_GCM_SHA384":146,"TLS_DHE_PSK_WITH_AES_128_GCM_SHA256":147,"TLS_DHE_PSK_WITH_AES_256_GCM_SHA384":148,"TLS_AES_128_GCM_SHA256":149,"TLS_AES_256_GCM_SHA384":150,"TLS_CHACHA20_POLY1305_SHA256":151,"TLS_AES_128_CCM_SHA256":152,"TLS_ECDHE_ECDSA_WITH_RC4_128_SHA":153,"TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA":154,"TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA":155,"TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA":156,"TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA":157,"TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256":158,"TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384":159,"TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256":160,"TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384":161,"TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256":162,"TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384":163,"TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256":164,"TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384":165,"TLS_DHE_RSA_WITH_AES_128_CCM":166,"TLS_DHE_RSA_WITH_AES_256_CCM":167,"TLS_DHE_RSA_WITH_AES_128_CCM_8":168,"TLS_DHE_RSA_WITH_AES_256_CCM_8":169,"TLS_DHE_PSK_WITH_AES_128_CCM":170,"TLS_DHE_PSK_WITH_AES_256_CCM":171,"TLS_ECDHE_ECDSA_WITH_AES_128_CCM":172,"TLS_ECDHE_ECDSA_WITH_AES_256_CCM":173,"TLS_ECDHE_ECDSA_WITH_AES_128_CCM_8":174,"TLS_ECDHE_ECDSA_WITH_AES_256_CCM_8":175,"TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256":176,"TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256":177,"TLS_DHE_RSA_WITH_CHACHA20_POLY1305_SHA256":178,"TLS_ECDHE_PSK_WITH_CHACHA20_POLY1305_SHA256":179,"TLS_DHE_PSK_WITH_CHACHA20_POLY1305_SHA256":180,"TLS_ECDHE_PSK_WITH_AES_128_GCM_SHA256":181,"TLS_ECDHE_PSK_WITH_AES_256_GCM_SHA384":182,"TLS_ECDHE_PSK_WITH_AES_128_CCM_SHA256":183,"FIN":184,"SYN":185,"RST":186,"PSH":187,"ACK":188,"URG":189,"ECE":190,"CWR":191,"ramnit":192,"crthrazy":193,"nymaim":194,"bunitu":195,"zeus":196,"azorult":197,"parite":198,"vawtrak":199,"reposfxg":200,"zeus-panda":201,"gandcrab":202,"bankerx":203,"gootkit":204,"dridex":205,"upatre":206,"qakbot":207,"chthonic":208,"emotet":209,"troldesh":210,"kovter":211,"boleto":212,"hancitor":213,"remcos":214,"trickbot":215,"rig":216,"tofsee":217,"neutrino":218,"icedid":219,"dreambot":220,"miuref":221,"crypt":222,"cerber":223,"unclassified":224,"sigma":225,"spora":226,"locky":227,"fallout":228,"banload":229,"globeimposter":230,"angler":231,"ursnif":232,"?":233,"[PAD]":234}}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"max_len": 256, "tokenizer_class": "PreTrainedTokenizerFast"}
 
1
+ {"max_len": 256, "special_tokens_map_file": "/root/.cache/huggingface/transformers/85a76eea59fe40ae80bc50b05c4fe93e7547727086c4a19787726e35a451f9fd.45ed21ffc69cb3eceab51050529cfc4e1b82b5f17027779bf75c6eacc17a5079", "name_or_path": "./out/ZeroQuant/W8A8_quantization/best", "tokenizer_class": "PreTrainedTokenizerFast"}