chemfie-gpt-experiment-1 / gpt2_tokenizer.json
gbyuvd's picture
Upload 4 files
b4bdfa5 verified
raw
history blame
21.4 kB
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<pad>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "<unk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "</s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "<s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "<mask>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 5,
"content": "<extra_id_0>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 6,
"content": "<extra_id_1>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 7,
"content": "<extra_id_2>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 8,
"content": "<extra_id_3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 9,
"content": "<extra_id_4>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 10,
"content": "<extra_id_5>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 11,
"content": "<extra_id_6>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 12,
"content": "<extra_id_7>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 13,
"content": "<extra_id_8>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 14,
"content": "<extra_id_9>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 15,
"content": "<extra_id_10>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 16,
"content": "<extra_id_11>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 17,
"content": "<extra_id_12>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 18,
"content": "<extra_id_13>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 19,
"content": "<extra_id_14>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 20,
"content": "<extra_id_15>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 21,
"content": "<extra_id_16>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 22,
"content": "<extra_id_17>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 23,
"content": "<extra_id_18>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 24,
"content": "<extra_id_19>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 25,
"content": "<extra_id_20>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 26,
"content": "<extra_id_21>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 27,
"content": "<extra_id_22>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 28,
"content": "<extra_id_23>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 29,
"content": "<extra_id_24>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 30,
"content": "<extra_id_25>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 31,
"content": "<extra_id_26>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 32,
"content": "<extra_id_27>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 33,
"content": "<extra_id_28>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 34,
"content": "<extra_id_29>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 35,
"content": "<extra_id_30>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 36,
"content": "<extra_id_31>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 37,
"content": "<extra_id_32>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 38,
"content": "<extra_id_33>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 39,
"content": "<extra_id_34>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 40,
"content": "<extra_id_35>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 41,
"content": "<extra_id_36>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 42,
"content": "<extra_id_37>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 43,
"content": "<extra_id_38>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 44,
"content": "<extra_id_39>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 45,
"content": "<extra_id_40>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 46,
"content": "<extra_id_41>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 47,
"content": "<extra_id_42>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 48,
"content": "<extra_id_43>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 49,
"content": "<extra_id_44>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 50,
"content": "<extra_id_45>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 51,
"content": "<extra_id_46>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 52,
"content": "<extra_id_47>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 53,
"content": "<extra_id_48>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 54,
"content": "<extra_id_49>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "WhitespaceSplit"
},
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "<s>",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "</s>",
"type_id": 0
}
}
],
"pair": [
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
}
],
"special_tokens": {
"</s>": {
"id": "</s>",
"ids": [
2
],
"tokens": [
"</s>"
]
},
"<s>": {
"id": "<s>",
"ids": [
3
],
"tokens": [
"<s>"
]
}
}
},
"decoder": null,
"model": {
"type": "WordPiece",
"unk_token": "<unk>",
"continuing_subword_prefix": "##",
"max_input_chars_per_word": 100,
"vocab": {
"<pad>": 0,
"<unk>": 1,
"</s>": 2,
"<s>": 3,
"<mask>": 4,
"<extra_id_0>": 5,
"<extra_id_1>": 6,
"<extra_id_2>": 7,
"<extra_id_3>": 8,
"<extra_id_4>": 9,
"<extra_id_5>": 10,
"<extra_id_6>": 11,
"<extra_id_7>": 12,
"<extra_id_8>": 13,
"<extra_id_9>": 14,
"<extra_id_10>": 15,
"<extra_id_11>": 16,
"<extra_id_12>": 17,
"<extra_id_13>": 18,
"<extra_id_14>": 19,
"<extra_id_15>": 20,
"<extra_id_16>": 21,
"<extra_id_17>": 22,
"<extra_id_18>": 23,
"<extra_id_19>": 24,
"<extra_id_20>": 25,
"<extra_id_21>": 26,
"<extra_id_22>": 27,
"<extra_id_23>": 28,
"<extra_id_24>": 29,
"<extra_id_25>": 30,
"<extra_id_26>": 31,
"<extra_id_27>": 32,
"<extra_id_28>": 33,
"<extra_id_29>": 34,
"<extra_id_30>": 35,
"<extra_id_31>": 36,
"<extra_id_32>": 37,
"<extra_id_33>": 38,
"<extra_id_34>": 39,
"<extra_id_35>": 40,
"<extra_id_36>": 41,
"<extra_id_37>": 42,
"<extra_id_38>": 43,
"<extra_id_39>": 44,
"<extra_id_40>": 45,
"<extra_id_41>": 46,
"<extra_id_42>": 47,
"<extra_id_43>": 48,
"<extra_id_44>": 49,
"<extra_id_45>": 50,
"<extra_id_46>": 51,
"<extra_id_47>": 52,
"<extra_id_48>": 53,
"<extra_id_49>": 54,
"[C]": 55,
"[=C]": 56,
"[Ring1]": 57,
"[Branch1]": 58,
"[O]": 59,
"[=Branch1]": 60,
"[N]": 61,
"[=O]": 62,
"[Ring2]": 63,
"[Branch2]": 64,
"[=N]": 65,
"[#Branch1]": 66,
"[=Branch2]": 67,
"[S]": 68,
"[#Branch2]": 69,
"[C@H1]": 70,
"[C@@H1]": 71,
"[F]": 72,
"[#C]": 73,
"[P]": 74,
"[=Ring1]": 75,
"[Cl]": 76,
"[/C]": 77,
"[NH1]": 78,
"[C@]": 79,
"[O-1]": 80,
"[C@@]": 81,
"[Br]": 82,
"[N+1]": 83,
"[=Ring2]": 84,
"[\\C]": 85,
"[#N]": 86,
"[=S]": 87,
"[/N]": 88,
".[Cl]": 89,
"[CH1]": 90,
"[=N+1]": 91,
".[O]": 92,
"[CH0]": 93,
"[\\N]": 94,
"[I]": 95,
"[NH1+1]": 96,
".[Na+1]": 97,
"[/O]": 98,
".[C]": 99,
"[\\O]": 100,
".[N]": 101,
".[Br-1]": 102,
"[\\S]": 103,
"[S+1]": 104,
".[Cl-1]": 105,
"[Branch3]": 106,
".[I-1]": 107,
"[B]": 108,
"[Si]": 109,
"[/C@@H1]": 110,
"[=P]": 111,
"[/C@H1]": 112,
"[/S]": 113,
"[NH2+1]": 114,
"[OH1+1]": 115,
"[Se]": 116,
"[=N-1]": 117,
"[2H]": 118,
"[N-1]": 119,
"[CH1-1]": 120,
"[P+1]": 121,
"[C-1]": 122,
".[K+1]": 123,
"[\\C@H1]": 124,
"[\\C@@H1]": 125,
"[/N+1]": 126,
"[B-1]": 127,
"[C+1]": 128,
"[NH0]": 129,
"[#N+1]": 130,
".[F]": 131,
"[OH0]": 132,
".[O-1]": 133,
"[18F]": 134,
"[Cl+3]": 135,
"[\\NH1]": 136,
".[Li+1]": 137,
".[Br]": 138,
"[P@@]": 139,
"[P@]": 140,
"[3H]": 141,
"[-/Ring2]": 142,
"[/Br]": 143,
"[Ring3]": 144,
"[CH2-1]": 145,
"[Na]": 146,
"[\\Cl]": 147,
"[S-1]": 148,
"[\\O-1]": 149,
".[I]": 150,
"[/C@@]": 151,
"[11CH3]": 152,
"[125I]": 153,
"[S@@+1]": 154,
"[S@+1]": 155,
"[\\N+1]": 156,
"[N@+1]": 157,
"[Br-1]": 158,
"[\\F]": 159,
"[/Cl]": 160,
"[As]": 161,
"[PH1]": 162,
"[/C@]": 163,
"[CH1+1]": 164,
"[-\\Ring2]": 165,
"[=Se]": 166,
"[NH1-1]": 167,
"[=O+1]": 168,
"[O+1]": 169,
"[=NH1+1]": 170,
"[=S+1]": 171,
"[N@@+1]": 172,
"[/P]": 173,
"[/I]": 174,
"[Te]": 175,
".[OH1-1]": 176,
".[Ca+2]": 177,
"[Cl-1]": 178,
"[\\Br]": 179,
"[=NH2+1]": 180,
"[/F]": 181,
".[Zn+2]": 182,
".[NaH1]": 183,
".[Mg+2]": 184,
"[N@]": 185,
"[N@@]": 186,
".[S]": 187,
"[I+1]": 188,
"[\\C@@]": 189,
"[/O-1]": 190,
"[Al]": 191,
"[123I]": 192,
"[-\\Ring1]": 193,
"[=As]": 194,
"[/S+1]": 195,
"[BH2-1]": 196,
"[/NH1]": 197,
"[\\I]": 198,
"[-/Ring1]": 199,
"[NH3+1]": 200,
"[14C]": 201,
"[SeH1]": 202,
"[S@@]": 203,
"[131I]": 204,
"[\\C@]": 205,
"[11C]": 206,
"[I-1]": 207,
"[Na+1]": 208,
".[N-1]": 209,
"[S@]": 210,
"[\\-Ring1]": 211,
"[Fe]": 212,
"[SH1]": 213,
".[Al+3]": 214,
"[SiH2]": 215,
"[\\P]": 216,
"[=Branch3]": 217,
"[=Se+1]": 218,
".[Ag+1]": 219,
"[=11C]": 220,
".[Zn]": 221,
"[14CH2]": 222,
"[K+1]": 223,
"[=P@@]": 224,
".[2H]": 225,
"[=Te+1]": 226,
"[#C-1]": 227,
"[K]": 228,
"[=P@]": 229,
"[=PH1]": 230,
"[=14C]": 231,
".[S-1]": 232,
"[14CH3]": 233,
"[BH3-1]": 234,
"[/-Ring1]": 235,
"[\\S+1]": 236,
"[As+1]": 237,
"[=Si]": 238,
"[124I]": 239,
"[/C-1]": 240,
"[Zn]": 241,
"[/N-1]": 242,
".[Sr+2]": 243,
"[FeH2-2]": 244,
"[SH1+1]": 245,
"[/B]": 246,
"[P@+1]": 247,
"[19F]": 248,
".[H+1]": 249,
"[Se+1]": 250,
"[Cl+1]": 251,
"[=S@@]": 252,
"[/Se]": 253,
"[13C]": 254,
"[/125I]": 255,
"[SiH1]": 256,
".[Rb+1]": 257,
"[/Si]": 258,
".[Ba+2]": 259,
"[=SH1]": 260,
"[OH2+1]": 261,
"[BH1-1]": 262,
".[MgH2]": 263,
"[Ag+1]": 264,
"[=Mg]": 265,
"[/123I]": 266,
"[\\B]": 267,
"[Ca+2]": 268,
"[10B]": 269,
"[P@@+1]": 270,
"[=OH1+1]": 271,
"[Fe+2]": 272,
".[Cs+1]": 273,
".[3H]": 274,
"[15NH1]": 275,
"[=14CH1]": 276,
"[\\N-1]": 277,
"[=13CH1]": 278,
".[NH2+1]": 279,
"[=B]": 280,
"[Fe+3]": 281,
".[SiH4]": 282,
"[127I]": 283,
"[/131I]": 284,
"[35S]": 285,
"[#11C]": 286,
"[32P]": 287,
"[=S@]": 288,
"[13CH2]": 289,
".[C-1]": 290,
"[\\S-1]": 291,
"[\\125I]": 292,
"[14CH1]": 293,
"[17F]": 294,
"[\\-Ring2]": 295,
"[Fe-3]": 296,
"[Fe+1]": 297,
"[NH2+2]": 298,
"[Ag]": 299,
".[B]": 300,
"[11CH2]": 301,
"[13CH3]": 302,
"[B@-1]": 303,
".[Ag]": 304,
".[Bi+3]": 305,
"[/-Ring2]": 306,
"[#S]": 307,
".[Li]": 308,
".[S-2]": 309,
"[13CH1]": 310,
"[76Br]": 311,
"[/13CH1]": 312,
"[Li+1]": 313,
".[NH1-1]": 314,
"[=Al]": 315,
"[=13C]": 316,
"[/P+1]": 317,
"[Br+2]": 318,
"[211At]": 319,
"[At]": 320,
"[Cl+2]": 321,
"[Zn+2]": 322,
"[B@@-1]": 323,
"[AsH1]": 324,
"[/S-1]": 325,
"[/13C]": 326,
"[\\Si]": 327,
"[=PH2]": 328,
"[=SeH1]": 329,
".[TeH2]": 330,
"[Ra]": 331,
"[/SeH1]": 332,
"[\\PH1]": 333,
"[Al-3]": 334,
"[11CH1]": 335,
"[F-1]": 336,
"[\\SeH1]": 337,
".[CaH2]": 338,
"[\\123I]": 339,
"[Se-1]": 340,
"[=Te]": 341,
".[Zn+1]": 342,
"[TeH1]": 343,
".[F-1]": 344,
"[I+3]": 345,
"[\\Se]": 346,
"[Mg+2]": 347,
"[75Se]": 348,
"[Zn-2]": 349,
"[#O+1]": 350,
".[H-1]": 351,
".[O-2]": 352,
"[15N]": 353,
".[BH0]": 354,
"[=B-1]": 355,
"[223Ra]": 356,
"[#14C]": 357,
"[Ba+2]": 358,
".[KH1]": 359,
".[Be+2]": 360,
"[\\CH1-1]": 361,
".[K]": 362,
"[#Ring2]": 363,
"[/S@]": 364,
"[/S@@]": 365,
"[SH0]": 366,
"[Fe-4]": 367,
"[=Fe]": 368,
"[=SH0]": 369,
"[=CH1-1]": 370,
"[N+3]": 371,
"[SH2]": 372,
".[125IH1]": 373,
"[/PH1]": 374,
"[Cs]": 375,
"[14C@@]": 376,
"[Te-1]": 377,
".[Se-2]": 378,
"[135I]": 379,
"[=32PH1]": 380,
"[73Se]": 381,
"[11C@@H1]": 382,
"[14C@H1]": 383,
"[18OH1]": 384,
"[#P]": 385,
"[He]": 386,
".[68Ga+3]": 387,
".[OH3+1]": 388,
".[18F-1]": 389,
"[129Xe]": 390,
"[Se-2]": 391,
"[\\NH1-1]": 392,
"[Si-1]": 393,
"[=18O]": 394,
".[HH1]": 395,
"[I+2]": 396,
"[123I-1]": 397,
"[131I-1]": 398,
"[127Xe]": 399,
"[133Xe]": 400,
"[89Sr+2]": 401,
"[82Rb+1]": 402,
"[Rb]": 403,
"[=S-1]": 404,
"[81Kr]": 405,
"[18F-1]": 406,
"[13NH3]": 407,
"[SeH2]": 408,
"[AsH3]": 409,
"[Kr]": 410,
"[Xe]": 411,
"[N@H1+1]": 412,
"[/As]": 413,
".[NH4+1]": 414,
"[\\C-1]": 415,
"[Si@]": 416,
".[SH1-1]": 417,
"[OH1]": 418,
"[18FH1]": 419,
"[123IH1]": 420,
".[Bi]": 421,
"[BH0]": 422,
"[SiH1-1]": 423,
".[As]": 424,
"[=Ring3]": 425,
"[SH1-1]": 426,
"[\\11CH3]": 427,
"[\\3H]": 428,
"[MgH2]": 429,
"[LiH1]": 430,
"[42K+1]": 431,
"[123Te]": 432,
"[22Na+1]": 433,
"[125I-1]": 434,
"[85Sr+2]": 435,
"[SrH2]": 436,
"[=32P]": 437,
"[15OH2]": 438,
"[47Ca+2]": 439,
".[85Sr+2]": 440,
"[85SrH2]": 441,
"[45Ca+2]": 442,
"[PH2+1]": 443,
"[11C-1]": 444,
"[Mg+1]": 445,
"[/P@@]": 446,
".[SiH3-1]": 447,
".[LiH1]": 448,
"[Ca]": 449,
"[Mg]": 450,
".[NH3+1]": 451,
"[124I-1]": 452,
"[As-1]": 453,
"[O-2]": 454,
"[/14C]": 455,
"[Ba]": 456,
"[82Rb]": 457,
"[76BrH1]": 458,
"[131Cs]": 459,
"[125IH1]": 460,
"[H]": 461
}
}
}