Transformers
mohammad-khodadad commited on
Commit
55e12b0
·
verified ·
1 Parent(s): 787d61c

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +900 -900
  2. vocab.txt +326 -326
tokenizer.json CHANGED
@@ -146,910 +146,910 @@
146
  "max_input_chars_per_word": 100,
147
  "vocab": {
148
  "[PAD]": 0,
149
- "##ethyl": 1,
150
- "yl": 2,
151
- "##henyl": 3,
152
- "##phenyl": 4,
153
- "##oxy": 5,
154
- "##methyl": 6,
155
- "##enz": 7,
156
- "hydroxy": 8,
157
- "##azol": 9,
158
- "##arb": 10,
159
- "##ethoxy": 11,
160
- "##idin": 12,
161
- "##amide": 13,
162
- "carb": 14,
163
- "phenyl": 15,
164
- "eth": 16,
165
- "##idene": 17,
166
- "##hydro": 18,
167
- "dimethyl": 19,
168
- "acet": 20,
169
- "##etr": 21,
170
- "pyr": 22,
171
- "thi": 23,
172
- "ethyl": 24,
173
- "##amino": 25,
174
- "##hydroxy": 26,
175
- "##prop": 27,
176
- "##anyl": 28,
177
- "benzo": 29,
178
- "tetr": 30,
179
- "oxy": 31,
180
- "##luoro": 32,
181
- "##dec": 33,
182
- "##idine": 34,
183
- "##ahydro": 35,
184
- "##benz": 36,
185
- "dihydro": 37,
186
- "carboxyl": 38,
187
- "##pyr": 39,
188
- "##phen": 40,
189
- "##oyl": 41,
190
- "##pent": 42,
191
- "##onyl": 43,
192
- "##uran": 44,
193
- "##adec": 45,
194
- "##iper": 46,
195
- "##methoxy": 47,
196
- "hex": 48,
197
- "cyclo": 49,
198
- "##azin": 50,
199
- "##but": 51,
200
- "##hex": 52,
201
- "nit": 53,
202
- "##mid": 54,
203
- "nitro": 55,
204
- "dic": 56,
205
- "pent": 57,
206
- "##ophen": 58,
207
- "phen": 59,
208
- "##sulf": 60,
209
- "##imid": 61,
210
- "##aph": 62,
211
- "acetyl": 63,
212
- "##acet": 64,
213
- "##aphth": 65,
214
- "sulf": 66,
215
- "benzyl": 67,
216
- "hept": 68,
217
- "##aphthal": 69,
218
- "acetate": 70,
219
- "##amethyl": 71,
220
- "##azole": 72,
221
- "##osph": 73,
222
- "##carb": 74,
223
- "carbonyl": 75,
224
- "##anium": 76,
225
- "##atri": 77,
226
- "tetra": 78,
227
- "##orph": 79,
228
- "amine": 80,
229
- "##azine": 81,
230
- "##thr": 82,
231
- "meth": 83,
232
- "carboxylic": 84,
233
- "##onitrile": 85,
234
- "##othi": 86,
235
- "diethyl": 87,
236
- "benzene": 88,
237
- "phosph": 89,
238
- "pyridine": 90,
239
- "##acetyl": 91,
240
- "##benzene": 92,
241
- "##hydr": 93,
242
- "##azo": 94,
243
- "##urin": 95,
244
- "carboxy": 96,
245
- "##arbox": 97,
246
- "##phosph": 98,
247
- "indole": 99,
248
  "[UNK]": 100,
249
  "[CLS]": 101,
250
  "[SEP]": 102,
251
  "[MASK]": 103,
252
- "##anol": 104,
253
- "##oct": 105,
254
- "hydr": 106,
255
- "##tetr": 107,
256
- "urea": 108,
257
- "acetic": 109,
258
- "##urea": 110,
259
- "tric": 111,
260
- "##meth": 112,
261
- "##azep": 113,
262
- "yn": 114,
263
- "phenol": 115,
264
- "##butyl": 116,
265
- "##hyd": 117,
266
- "##orm": 118,
267
- "##aldehyde": 119,
268
- "##acetate": 120,
269
- "hydrochloride": 121,
270
- "thio": 122,
271
- "##oxane": 123,
272
- "phosphoryl": 124,
273
- "methanol": 125,
274
- "spiro": 126,
275
- "##indole": 127,
276
- "##urine": 128,
277
- "sil": 129,
278
- "##anil": 130,
279
- "##oxo": 131,
280
- "##odium": 132,
281
- "fluor": 133,
282
- "tet": 134,
283
- "tris": 135,
284
- "anthr": 136,
285
- "##izin": 137,
286
- "hydrox": 138,
287
- "##benzyl": 139,
288
- "##acetic": 140,
289
- "##carbox": 141,
290
- "thiol": 142,
291
- "##tert": 143,
292
- "iod": 144,
293
- "acetonitrile": 145,
294
- "octa": 146,
295
- "##acyl": 147,
296
- "chrys": 148,
297
- "##icos": 149,
298
- "##inum": 150,
299
- "##ethanol": 151,
300
- "##ylate": 152,
301
- "dodecyl": 153,
302
- "iodide": 154,
303
- "##erc": 155,
304
- "bromide": 156,
305
- "hexane": 157,
306
- "##inic": 158,
307
- "##anth": 159,
308
- "##fluor": 160,
309
- "fluoride": 161,
310
- "##alen": 162,
311
- "bor": 163,
312
- "##iden": 164,
313
- "##yrin": 165,
314
- "##iod": 166,
315
- "##onium": 167,
316
- "##uric": 168,
317
- "##then": 169,
318
- "oxal": 170,
319
- "##inden": 171,
320
- "hyd": 172,
321
- "##inc": 173,
322
- "##hexane": 174,
323
- "chromium": 175,
324
- "##doc": 176,
325
- "##ylic": 177,
326
- "##adium": 178,
327
- "sel": 179,
328
- "brom": 180,
329
- "amide": 181,
330
- "ide": 182,
331
- "##erm": 183,
332
- "chlor": 184,
333
- "alum": 185,
334
- "disulf": 186,
335
- "fg": 187,
336
- "manganese": 188,
337
- "epoxy": 189,
338
- "##echn": 190,
339
- "##bd": 191,
340
- "##nit": 192,
341
- "sti": 193,
342
- "##ithi": 194,
343
- "oxid": 195,
344
- "selen": 196,
345
- "hydroxide": 197,
346
- "nitric": 198,
347
- "boron": 199,
348
- "##amate": 200,
349
- "##yrene": 201,
350
- "formaldehyde": 202,
351
- "silic": 203,
352
- "##amid": 204,
353
- "nitr": 205,
354
- "perc": 206,
355
- "##oline": 207,
356
- "##amic": 208,
357
- "cadmium": 209,
358
- "ij": 210,
359
- "##ylene": 211,
360
- "##elen": 212,
361
- "##inine": 213,
362
- "formic": 214,
363
- "##brom": 215,
364
- "##ocin": 216,
365
- "yt": 217,
366
- "##ophenyl": 218,
367
- "##germ": 219,
368
- "##iron": 220,
369
- "##role": 221,
370
- "##aza": 222,
371
- "mol": 223,
372
- "cyan": 224,
373
- "##onine": 225,
374
- "nitrite": 226,
375
- "##inin": 227,
376
- "peroxide": 228,
377
- "##hec": 229,
378
- "anion": 230,
379
- "germ": 231,
380
- "methan": 232,
381
- "selenium": 233,
382
- "##lead": 234,
383
- "##enic": 235,
384
- "phthal": 236,
385
- "##lut": 237,
386
- "##roph": 238,
387
- "chrom": 239,
388
- "##onit": 240,
389
- "hypo": 241,
390
- "sulfide": 242,
391
- "iodine": 243,
392
- "trin": 244,
393
- "prot": 245,
394
- "xy": 246,
395
- "##esium": 247,
396
- "phospho": 248,
397
- "##ecan": 249,
398
- "##ophosph": 250,
399
- "americ": 251,
400
- "##onin": 252,
401
- "chloroform": 253,
402
- "##inite": 254,
403
- "tol": 255,
404
- "##trin": 256,
405
- "toluene": 257,
406
- "ber": 258,
407
- "ine": 259,
408
- "perm": 260,
409
- "##odi": 261,
410
- "ino": 262,
411
- "rad": 263,
412
- "##phe": 264,
413
- "radon": 265,
414
- "##itro": 266,
415
- "arb": 267,
416
- "chlorine": 268,
417
- "ferm": 269,
418
- "yr": 270,
419
- "##ryp": 271,
420
- "##tio": 272,
421
- "plat": 273,
422
- "zol": 274,
423
- "ent": 275,
424
- "phe": 276,
425
- "disulfide": 277,
426
- "cet": 278,
427
- "lf": 279,
428
- "xo": 280,
429
- "clo": 281,
430
- "ful": 282,
431
- "hyp": 283,
432
- "iu": 284,
433
- "##bene": 285,
434
- "##ameth": 286,
435
- "argon": 287,
436
- "chl": 288,
437
- "##acycl": 289,
438
- "dro": 290,
439
- "hy": 291,
440
- "lm": 292,
441
- "##opyran": 293,
442
- "##tit": 294,
443
- "dif": 295,
444
- "##amin": 296,
445
- "##anim": 297,
446
- "dich": 298,
447
- "##ydro": 299,
448
- "##ydroxy": 300,
449
- "##hlo": 301,
450
- "oxo": 302,
451
- "##hloro": 303,
452
- "##clo": 304,
453
- "##yclo": 305,
454
- "methoxy": 306,
455
- "2s": 307,
456
- "chloro": 308,
457
- "carbox": 309,
458
- "2r": 310,
459
- "##cyclo": 311,
460
- "##ulfanyl": 312,
461
- "##luo": 313,
462
- "1h": 314,
463
- "3r": 315,
464
- "4s": 316,
465
- "methylidene": 317,
466
- "5s": 318,
467
- "5r": 319,
468
- "6r": 320,
469
- "3s": 321,
470
- "methoxyphenyl": 322,
471
- "sulfanyl": 323,
472
- "hydroxymethyl": 324,
473
- "diox": 325,
474
- "4r": 326,
475
- "##yrim": 327,
476
- "dihydroxy": 328,
477
- "dione": 329,
478
- "thiazol": 330,
479
- "methylphenyl": 331,
480
- "chlorophenyl": 332,
481
- "fluoro": 333,
482
- "##romo": 334,
483
- "propan": 335,
484
- "carboxylate": 336,
485
- "acetamide": 337,
486
- "bromo": 338,
487
- "pyrim": 339,
488
- "##dol": 340,
489
- "carboxamide": 341,
490
- "##hrom": 342,
491
- "##uinol": 343,
492
- "##acyclo": 344,
493
- "dioxo": 345,
494
- "tetrahydro": 346,
495
- "trihydroxy": 347,
496
- "oxan": 348,
497
- "##anoyl": 349,
498
- "fluorophenyl": 350,
499
- "dimethoxy": 351,
500
- "pyrrol": 352,
501
- "ium": 353,
502
- "##hromen": 354,
503
- "1r": 355,
504
- "1s": 356,
505
- "##propyl": 357,
506
- "furan": 358,
507
- "##sulfanyl": 359,
508
- "##oxan": 360,
509
- "trimethyl": 361,
510
- "ethoxy": 362,
511
- "pyridin": 363,
512
- "6s": 364,
513
- "ylmethyl": 365,
514
- "##oate": 366,
515
- "##fluoro": 367,
516
- "triazol": 368,
517
- "dichloro": 369,
518
- "butyl": 370,
519
- "##adeca": 371,
520
- "enyl": 372,
521
- "##propan": 373,
522
- "##anoate": 374,
523
- "carbam": 375,
524
- "indol": 376,
525
- "propyl": 377,
526
- "oxa": 378,
527
- "pyrimidin": 379,
528
- "##uinolin": 380,
529
- "trifluoro": 381,
530
- "benzothi": 382,
531
- "##piper": 383,
532
- "##pyrrol": 384,
533
- "##anilino": 385,
534
- "dimethoxyphenyl": 386,
535
- "nitrophenyl": 387,
536
- "##furan": 388,
537
- "##anoic": 389,
538
- "ylidene": 390,
539
- "##itri": 391,
540
- "##itrile": 392,
541
- "cyclohex": 393,
542
- "sulfanylidene": 394,
543
- "##phenoxy": 395,
544
- "##olan": 396,
545
- "##yano": 397,
546
- "benzamide": 398,
547
- "thia": 399,
548
- "##amido": 400,
549
- "cyano": 401,
550
- "##anone": 402,
551
- "thiophen": 403,
552
- "thiazolidin": 404,
553
- "benzodi": 405,
554
- "##chromen": 406,
555
- "##aphthalen": 407,
556
- "##etracyclo": 408,
557
- "trifluoromethyl": 409,
558
- "hydroxyphenyl": 410,
559
- "2h": 411,
560
- "##atetracyclo": 412,
561
- "pyrimidine": 413,
562
- "cyclopent": 414,
563
- "dichlorophenyl": 415,
564
- "methylbut": 416,
565
- "##atricyclo": 417,
566
- "acetamido": 418,
567
- "##indol": 419,
568
- "##benzoyl": 420,
569
- "##orphol": 421,
570
- "##icyclo": 422,
571
- "##imidazol": 423,
572
- "methylideneamino": 424,
573
- "benzodiox": 425,
574
- "benzoate": 426,
575
- "tert": 427,
576
- "oxoethyl": 428,
577
- "bromophenyl": 429,
578
- "chromen": 430,
579
- "##ydr": 431,
580
- "##anthr": 432,
581
- "##sulfon": 433,
582
- "##mino": 434,
583
- "9s": 435,
584
- "pyrrolidine": 436,
585
- "methylsulfanyl": 437,
586
- "##uinoline": 438,
587
- "dimethylphenyl": 439,
588
- "##piro": 440,
589
- "##diazol": 441,
590
- "morphol": 442,
591
- "sulfonyl": 443,
592
- "benzofuran": 444,
593
- "dimethylamino": 445,
594
- "##bicyclo": 446,
595
- "dien": 447,
596
- "7s": 448,
597
- "7r": 449,
598
- "##benzamide": 450,
599
- "6a": 451,
600
- "enamide": 452,
601
- "##pyrim": 453,
602
- "cyclohexyl": 454,
603
- "quinolin": 455,
604
- "8a": 456,
605
- "##cyclohex": 457,
606
- "hexahydro": 458,
607
- "naphthalen": 459,
608
- "morpholin": 460,
609
- "##uinazol": 461,
610
- "4h": 462,
611
- "oxolan": 463,
612
- "phenanthr": 464,
613
- "imid": 465,
614
- "8r": 466,
615
- "9r": 467,
616
- "enoate": 468,
617
- "dodec": 469,
618
- "##oxyphenyl": 470,
619
- "##aen": 471,
620
- "##hept": 472,
621
- "5e": 473,
622
- "5z": 474,
623
- "tetramethyl": 475,
624
- "5h": 476,
625
- "##pyrazol": 477,
626
- "benzodioxol": 478,
627
- "butan": 479,
628
- "3h": 480,
629
- "benzox": 481,
630
- "carbonitrile": 482,
631
- "8s": 483,
632
- "ethoxyphenyl": 484,
633
- "piperidin": 485,
634
- "##sulfonyl": 486,
635
- "##acetamide": 487,
636
- "10s": 488,
637
- "thiazolo": 489,
638
- "##benzoate": 490,
639
- "methyloxan": 491,
640
- "phenylprop": 492,
641
- "##quinolin": 493,
642
- "##abicyclo": 494,
643
- "piperazin": 495,
644
- "10r": 496,
645
- "oxyoxan": 497,
646
- "##rolo": 498,
647
- "cyclopenta": 499,
648
- "phenylethyl": 500,
649
- "##uinazolin": 501,
650
- "phenoxy": 502,
651
- "diphenyl": 503,
652
- "##enyl": 504,
653
- "imino": 505,
654
- "phenanthren": 506,
655
- "oxobut": 507,
656
- "carbamoyl": 508,
657
- "chlo": 509,
658
- "##carbam": 510,
659
- "##benzo": 511,
660
- "##pentacyclo": 512,
661
- "##cyclopent": 513,
662
- "enoyl": 514,
663
- "##sulfonamide": 515,
664
- "ene": 516,
665
- "2e": 517,
666
- "triazolo": 518,
667
- "##rophen": 519,
668
- "##oryl": 520,
669
- "##azanium": 521,
670
- "benzothiazol": 522,
671
- "benzimidazol": 523,
672
- "##pyrimidin": 524,
673
- "##piperidin": 525,
674
- "thiadiazol": 526,
675
- "dioxopyrrol": 527,
676
- "13r": 528,
677
- "pyrazol": 529,
678
- "azanium": 530,
679
- "diazin": 531,
680
- "##piperazin": 532,
681
- "##carbonyl": 533,
682
- "oxochromen": 534,
683
- "14s": 535,
684
- "##aene": 536,
685
- "oxymethyl": 537,
686
- "##azatetracyclo": 538,
687
- "trimethoxy": 539,
688
- "propanoyl": 540,
689
- "13s": 541,
690
- "##ideneamino": 542,
691
- "oxazol": 543,
692
- "ethanone": 544,
693
- "acetyloxy": 545,
694
- "tetradec": 546,
695
- "quinoline": 547,
696
- "trien": 548,
697
- "propanamide": 549,
698
- "##rophenoxy": 550,
699
- "methylpropyl": 551,
700
- "hexadeca": 552,
701
- "##ophene": 553,
702
- "diol": 554,
703
- "##spiro": 555,
704
- "##othioyl": 556,
705
- "methylpent": 557,
706
- "##ridec": 558,
707
- "2z": 559,
708
- "propanoate": 560,
709
- "imidazol": 561,
710
- "benzoyl": 562,
711
- "ethenyl": 563,
712
- "octahydro": 564,
713
- "pyrrolidin": 565,
714
- "heptadeca": 566,
715
- "pyrrole": 567,
716
- "##azinyl": 568,
717
- "hydroxyethyl": 569,
718
- "011": 570,
719
- "trione": 571,
720
- "methylamino": 572,
721
- "4ar": 573,
722
- "methanone": 574,
723
- "##quinoline": 575,
724
- "12s": 576,
725
- "17r": 577,
726
- "##uino": 578,
727
- "##xal": 579,
728
- "##pyridin": 580,
729
- "##uinoxal": 581,
730
- "thiophene": 582,
731
- "methylprop": 583,
732
- "##cyclopenta": 584,
733
- "phenylmethoxy": 585,
734
- "diazinane": 586,
735
- "dihydropyr": 587,
736
- "ani": 588,
737
- "ylphenyl": 589,
738
- "anilino": 590,
739
- "##aphthalene": 591,
740
- "tridec": 592,
741
- "4as": 593,
742
- "ylethyl": 594,
743
- "11r": 595,
744
- "##anedi": 596,
745
- "dioxa": 597,
746
- "7a": 598,
747
- "##phosphoryl": 599,
748
- "14r": 600,
749
- "benzoic": 601,
750
- "quinazolin": 602,
751
- "oxopropan": 603,
752
- "oxane": 604,
753
- "propanoic": 605,
754
- "hydroxypropan": 606,
755
- "dicarbox": 607,
756
- "triox": 608,
757
- "6ar": 609,
758
- "012": 610,
759
- "11s": 611,
760
- "diethylamino": 612,
761
- "oxopent": 613,
762
- "12r": 614,
763
- "8ar": 615,
764
- "carbamate": 616,
765
- "##oxolan": 617,
766
- "3as": 618,
767
- "iodo": 619,
768
- "quinoxal": 620,
769
- "triol": 621,
770
- "diamino": 622,
771
- "carbamothioyl": 623,
772
- "pyrrolo": 624,
773
- "##isoindol": 625,
774
- "methylphenoxy": 626,
775
- "##butan": 627,
776
- "3ar": 628,
777
- "tetraen": 629,
778
- "cyclopentyl": 630,
779
- "tetradecahydro": 631,
780
- "17s": 632,
781
- "##imino": 633,
782
- "dihydroxyphenyl": 634,
783
- "##ndec": 635,
784
- "hydrazinyl": 636,
785
- "##hloride": 637,
786
- "##naphthalen": 638,
787
- "trimethoxyphenyl": 639,
788
- "##apentacyclo": 640,
789
- "##hydraz": 641,
790
- "ylmethylidene": 642,
791
- "oxadiazol": 643,
792
- "6as": 644,
793
- "trideca": 645,
794
- "##carbamoyl": 646,
795
- "diazenyl": 647,
796
- "yloxy": 648,
797
- "tetraene": 649,
798
- "ethylphenyl": 650,
799
- "##cyclohexyl": 651,
800
- "piperidine": 652,
801
- "tetrazol": 653,
802
- "##thieno": 654,
803
- "##hydrazide": 655,
804
- "octadec": 656,
805
- "hexadec": 657,
806
- "piperazine": 658,
807
- "9a": 659,
808
- "dodecahydro": 660,
809
- "difluoro": 661,
810
- "azabicyclo": 662,
811
- "##pyrrolidine": 663,
812
- "##propanoyl": 664,
813
- "trihydroxyoxan": 665,
814
- "methylanilino": 666,
815
- "benzothiophene": 667,
816
- "##chloride": 668,
817
- "sulfon": 669,
818
- "diamine": 670,
819
- "##amoyl": 671,
820
- "triazin": 672,
821
- "cyclopropyl": 673,
822
- "##hromene": 674,
823
- "purin": 675,
824
- "##enitrile": 676,
825
- "pentamethyl": 677,
826
- "pyrazole": 678,
827
- "8as": 679,
828
- "##anamine": 680,
829
- "pyran": 681,
830
- "benzenesulfonamide": 682,
831
- "penta": 683,
832
- "##furo": 684,
833
- "16s": 685,
834
- "dicarboxyl": 686,
835
- "12a": 687,
836
- "triaz": 688,
837
- "methylhept": 689,
838
- "oxido": 690,
839
- "##bromo": 691,
840
- "triazole": 692,
841
- "decahydro": 693,
842
- "undec": 694,
843
- "diazatricyclo": 695,
844
- "013": 696,
845
- "butylphenyl": 697,
846
- "15r": 698,
847
- "6h": 699,
848
- "9h": 700,
849
- "octadeca": 701,
850
- "methylpiperidin": 702,
851
- "##quinazolin": 703,
852
- "##anamide": 704,
853
- "[unused700]": 705,
854
- "[unused701]": 706,
855
- "[unused702]": 707,
856
- "[unused703]": 708,
857
- "[unused704]": 709,
858
- "[unused705]": 710,
859
- "[unused706]": 711,
860
- "[unused707]": 712,
861
- "[unused708]": 713,
862
- "[unused709]": 714,
863
- "[unused710]": 715,
864
- "[unused711]": 716,
865
- "[unused712]": 717,
866
- "[unused713]": 718,
867
- "[unused714]": 719,
868
- "[unused715]": 720,
869
- "[unused716]": 721,
870
- "[unused717]": 722,
871
- "[unused718]": 723,
872
- "[unused719]": 724,
873
- "[unused720]": 725,
874
- "[unused721]": 726,
875
- "[unused722]": 727,
876
- "[unused723]": 728,
877
- "[unused724]": 729,
878
- "[unused725]": 730,
879
- "[unused726]": 731,
880
- "[unused727]": 732,
881
- "[unused728]": 733,
882
- "[unused729]": 734,
883
- "[unused730]": 735,
884
- "[unused731]": 736,
885
- "[unused732]": 737,
886
- "[unused733]": 738,
887
- "[unused734]": 739,
888
- "[unused735]": 740,
889
- "[unused736]": 741,
890
- "[unused737]": 742,
891
- "[unused738]": 743,
892
- "[unused739]": 744,
893
- "[unused740]": 745,
894
- "[unused741]": 746,
895
- "[unused742]": 747,
896
- "[unused743]": 748,
897
- "[unused744]": 749,
898
- "[unused745]": 750,
899
- "[unused746]": 751,
900
- "[unused747]": 752,
901
- "[unused748]": 753,
902
- "[unused749]": 754,
903
- "[unused750]": 755,
904
- "[unused751]": 756,
905
- "[unused752]": 757,
906
- "[unused753]": 758,
907
- "[unused754]": 759,
908
- "[unused755]": 760,
909
- "[unused756]": 761,
910
- "[unused757]": 762,
911
- "[unused758]": 763,
912
- "[unused759]": 764,
913
- "[unused760]": 765,
914
- "[unused761]": 766,
915
- "[unused762]": 767,
916
- "[unused763]": 768,
917
- "[unused764]": 769,
918
- "[unused765]": 770,
919
- "[unused766]": 771,
920
- "[unused767]": 772,
921
- "[unused768]": 773,
922
- "[unused769]": 774,
923
- "[unused770]": 775,
924
- "[unused771]": 776,
925
- "[unused772]": 777,
926
- "[unused773]": 778,
927
- "[unused774]": 779,
928
- "[unused775]": 780,
929
- "[unused776]": 781,
930
- "[unused777]": 782,
931
- "[unused778]": 783,
932
- "[unused779]": 784,
933
- "[unused780]": 785,
934
- "[unused781]": 786,
935
- "[unused782]": 787,
936
- "[unused783]": 788,
937
- "[unused784]": 789,
938
- "[unused785]": 790,
939
- "[unused786]": 791,
940
- "[unused787]": 792,
941
- "[unused788]": 793,
942
- "[unused789]": 794,
943
- "[unused790]": 795,
944
- "[unused791]": 796,
945
- "[unused792]": 797,
946
- "[unused793]": 798,
947
- "[unused794]": 799,
948
- "[unused795]": 800,
949
- "[unused796]": 801,
950
- "[unused797]": 802,
951
- "[unused798]": 803,
952
- "[unused799]": 804,
953
- "[unused800]": 805,
954
- "[unused801]": 806,
955
- "[unused802]": 807,
956
- "[unused803]": 808,
957
- "[unused804]": 809,
958
- "[unused805]": 810,
959
- "[unused806]": 811,
960
- "[unused807]": 812,
961
- "[unused808]": 813,
962
- "[unused809]": 814,
963
- "[unused810]": 815,
964
- "[unused811]": 816,
965
- "[unused812]": 817,
966
- "[unused813]": 818,
967
- "[unused814]": 819,
968
- "[unused815]": 820,
969
- "[unused816]": 821,
970
- "[unused817]": 822,
971
- "[unused818]": 823,
972
- "[unused819]": 824,
973
- "[unused820]": 825,
974
- "[unused821]": 826,
975
- "[unused822]": 827,
976
- "[unused823]": 828,
977
- "[unused824]": 829,
978
- "[unused825]": 830,
979
- "[unused826]": 831,
980
- "[unused827]": 832,
981
- "[unused828]": 833,
982
- "[unused829]": 834,
983
- "[unused830]": 835,
984
- "[unused831]": 836,
985
- "[unused832]": 837,
986
- "[unused833]": 838,
987
- "[unused834]": 839,
988
- "[unused835]": 840,
989
- "[unused836]": 841,
990
- "[unused837]": 842,
991
- "[unused838]": 843,
992
- "[unused839]": 844,
993
- "[unused840]": 845,
994
- "[unused841]": 846,
995
- "[unused842]": 847,
996
- "[unused843]": 848,
997
- "[unused844]": 849,
998
- "[unused845]": 850,
999
- "[unused846]": 851,
1000
- "[unused847]": 852,
1001
- "[unused848]": 853,
1002
- "[unused849]": 854,
1003
- "[unused850]": 855,
1004
- "[unused851]": 856,
1005
- "[unused852]": 857,
1006
- "[unused853]": 858,
1007
- "[unused854]": 859,
1008
- "[unused855]": 860,
1009
- "[unused856]": 861,
1010
- "[unused857]": 862,
1011
- "[unused858]": 863,
1012
- "[unused859]": 864,
1013
- "[unused860]": 865,
1014
- "[unused861]": 866,
1015
- "[unused862]": 867,
1016
- "[unused863]": 868,
1017
- "[unused864]": 869,
1018
- "[unused865]": 870,
1019
- "[unused866]": 871,
1020
- "[unused867]": 872,
1021
- "[unused868]": 873,
1022
- "[unused869]": 874,
1023
- "[unused870]": 875,
1024
- "[unused871]": 876,
1025
- "[unused872]": 877,
1026
- "[unused873]": 878,
1027
- "[unused874]": 879,
1028
- "[unused875]": 880,
1029
- "[unused876]": 881,
1030
- "[unused877]": 882,
1031
- "[unused878]": 883,
1032
- "[unused879]": 884,
1033
- "[unused880]": 885,
1034
- "[unused881]": 886,
1035
- "[unused882]": 887,
1036
- "[unused883]": 888,
1037
- "[unused884]": 889,
1038
- "[unused885]": 890,
1039
- "[unused886]": 891,
1040
- "[unused887]": 892,
1041
- "[unused888]": 893,
1042
- "[unused889]": 894,
1043
- "[unused890]": 895,
1044
- "[unused891]": 896,
1045
- "[unused892]": 897,
1046
- "[unused893]": 898,
1047
- "[unused894]": 899,
1048
- "[unused895]": 900,
1049
- "[unused896]": 901,
1050
- "[unused897]": 902,
1051
- "[unused898]": 903,
1052
- "[unused899]": 904,
1053
  "[unused900]": 905,
1054
  "[unused901]": 906,
1055
  "[unused902]": 907,
 
146
  "max_input_chars_per_word": 100,
147
  "vocab": {
148
  "[PAD]": 0,
149
+ "phenyl": 1,
150
+ "oxy": 2,
151
+ "hydroxy": 3,
152
+ "meth": 4,
153
+ "oxo": 5,
154
+ "##oxo": 6,
155
+ "ethyl": 7,
156
+ "chloro": 8,
157
+ "cyclo": 9,
158
+ "benzo": 10,
159
+ "##amide": 11,
160
+ "acet": 12,
161
+ "tetra": 13,
162
+ "##methylidene": 14,
163
+ "fluoro": 15,
164
+ "thi": 16,
165
+ "eth": 17,
166
+ "sulfanyl": 18,
167
+ "##acid": 19,
168
+ "propyl": 20,
169
+ "deca": 21,
170
+ "##furan": 22,
171
+ "##pyridin": 23,
172
+ "##pyrimidin": 24,
173
+ "##carboxylate": 25,
174
+ "penta": 26,
175
+ "phen": 27,
176
+ "bromo": 28,
177
+ "hexa": 29,
178
+ "aza": 30,
179
+ "oxa": 31,
180
+ "nitro": 32,
181
+ "##carboxamide": 33,
182
+ "##idene": 34,
183
+ "acetyl": 35,
184
+ "##indol": 36,
185
+ "##chromen": 37,
186
+ "##quinolin": 38,
187
+ "##pyrrol": 39,
188
+ "##pyrrolidin": 40,
189
+ "##thiophen": 41,
190
+ "anilino": 42,
191
+ "carbonyl": 43,
192
+ "##carbonyl": 44,
193
+ "pent": 45,
194
+ "##oate": 46,
195
+ "benzyl": 47,
196
+ "##pyrazol": 48,
197
+ "##piperidin": 49,
198
+ "##piperazin": 50,
199
+ "sulfonyl": 51,
200
+ "##naphthalen": 52,
201
+ "##imidazol": 53,
202
+ "hex": 54,
203
+ "octa": 55,
204
+ "##idin": 56,
205
+ "thia": 57,
206
+ "hexyl": 58,
207
+ "sulfanylidene": 59,
208
+ "cyano": 60,
209
+ "hepta": 61,
210
+ "amido": 62,
211
+ "butyl": 63,
212
+ "##azanium": 64,
213
+ "##carboxylic": 65,
214
+ "tert-butyl": 66,
215
+ "##morpholin": 67,
216
+ "propanoyl": 68,
217
+ "tetr": 69,
218
+ "thio": 70,
219
+ "imino": 71,
220
+ "spiro": 72,
221
+ "carbamoyl": 73,
222
+ "hept": 74,
223
+ "##pyran": 75,
224
+ "##phenanthren": 76,
225
+ "##carbonitrile": 77,
226
+ "##quinazolin": 78,
227
+ "##purin": 79,
228
+ "pentyl": 80,
229
+ "##sulfonamide": 81,
230
+ "butanoyl": 82,
231
+ "phosphoryl": 83,
232
+ "##urea": 84,
233
+ "carboxy": 85,
234
+ "##chloride": 86,
235
+ "##nitrile": 87,
236
+ "nona": 88,
237
+ "##carbamate": 89,
238
+ "carbamo": 90,
239
+ "pyrido": 91,
240
+ "hydrazin": 92,
241
+ "##quinoxalin": 93,
242
+ "cosa": 94,
243
+ "iodo": 95,
244
+ "imidazo": 96,
245
+ "furo": 97,
246
+ "##imine": 98,
247
+ "pentanoyl": 99,
248
  "[UNK]": 100,
249
  "[CLS]": 101,
250
  "[SEP]": 102,
251
  "[MASK]": 103,
252
+ "##idine": 104,
253
+ "##inden": 105,
254
+ "##aniline": 106,
255
+ "##phosphate": 107,
256
+ "sulfamoyl": 108,
257
+ "oxido": 109,
258
+ "cos": 110,
259
+ "##thione": 111,
260
+ "##imidazolidin": 112,
261
+ "##thiol": 113,
262
+ "thiolo": 114,
263
+ "##azonia": 115,
264
+ "##sulfonate": 116,
265
+ "##picen": 117,
266
+ "##pyrazin": 118,
267
+ "##hydrazide": 119,
268
+ "decanoyl": 120,
269
+ "##sulfonic": 121,
270
+ "icosa": 122,
271
+ "sulfo": 123,
272
+ "##anthracen": 124,
273
+ "##thiourea": 125,
274
+ "silyl": 126,
275
+ "oc": 127,
276
+ "tria": 128,
277
+ "##pyridazin": 129,
278
+ "phosph": 130,
279
+ "conta": 131,
280
+ "pyrimido": 132,
281
+ "##carbaldehyde": 133,
282
+ "icos": 134,
283
+ "heptyl": 135,
284
+ "carbamimid": 136,
285
+ "formyl": 137,
286
+ "##silane": 138,
287
+ "phosphono": 139,
288
+ "aceto": 140,
289
+ "##carbazol": 141,
290
+ "##acridin": 142,
291
+ "##fluoren": 143,
292
+ "##azanide": 144,
293
+ "heni": 145,
294
+ "##xanthen": 146,
295
+ "octyl": 147,
296
+ "##azulen": 148,
297
+ "hexanoyl": 149,
298
+ "carbohydrazide": 150,
299
+ "silyloxy": 151,
300
+ "##sulfate": 152,
301
+ "tris": 153,
302
+ "nitroso": 154,
303
+ "carbo": 155,
304
+ "##phthalazin": 156,
305
+ "hydroxyl": 157,
306
+ "##phosphane": 158,
307
+ "##guanidin": 159,
308
+ "benzhydryl": 160,
309
+ "(2+)": 161,
310
+ "sulfinyl": 162,
311
+ "##bromide": 163,
312
+ "##benzaldehyde": 164,
313
+ "##naphthyridin": 165,
314
+ "##indazol": 166,
315
+ "cont": 167,
316
+ "##quinolizin": 168,
317
+ "carbonimidoyl": 169,
318
+ "##iodide": 170,
319
+ "##pteridin": 171,
320
+ "##chrysen": 172,
321
+ "##tetracen": 173,
322
+ "azido": 174,
323
+ "naphtho": 175,
324
+ "pheno": 176,
325
+ "cyanato": 177,
326
+ "##hydrate": 178,
327
+ "##carbamimidoyl": 179,
328
+ "sulfonato": 180,
329
+ "##hydrazine": 181,
330
+ "##pyrrolizin": 182,
331
+ "phenacyl": 183,
332
+ "imido": 184,
333
+ "##carboximidamide": 185,
334
+ "carboxylato": 186,
335
+ "imid": 187,
336
+ "formamide": 188,
337
+ "sulfonamido": 189,
338
+ "##indolizin": 190,
339
+ "buta": 191,
340
+ "carbothioyl": 192,
341
+ "peroxy": 193,
342
+ "phosphanyl": 194,
343
+ "##phenanthridin": 195,
344
+ "##oxamide": 196,
345
+ "kis": 197,
346
+ "##fluoride": 198,
347
+ "(3+)": 199,
348
+ "##carbonate": 200,
349
+ "##thian": 201,
350
+ "##carbamimidothioate": 202,
351
+ "oxino": 203,
352
+ "annulen": 204,
353
+ "##porphyrin": 205,
354
+ "##stannane": 206,
355
+ "##hydride": 207,
356
+ "acenaphthylen": 208,
357
+ "##ethyl": 209,
358
+ "##henyl": 210,
359
+ "##phenyl": 211,
360
+ "##oxy": 212,
361
+ "##methyl": 213,
362
+ "##enz": 214,
363
+ "##azol": 215,
364
+ "##arb": 216,
365
+ "##ethoxy": 217,
366
+ "carb": 218,
367
+ "##hydro": 219,
368
+ "dimethyl": 220,
369
+ "##etr": 221,
370
+ "pyr": 222,
371
+ "##amino": 223,
372
+ "##hydroxy": 224,
373
+ "##prop": 225,
374
+ "##anyl": 226,
375
+ "##luoro": 227,
376
+ "##dec": 228,
377
+ "##ahydro": 229,
378
+ "##benz": 230,
379
+ "dihydro": 231,
380
+ "carboxyl": 232,
381
+ "##pyr": 233,
382
+ "##phen": 234,
383
+ "##oyl": 235,
384
+ "##pent": 236,
385
+ "##onyl": 237,
386
+ "##uran": 238,
387
+ "##adec": 239,
388
+ "##iper": 240,
389
+ "##methoxy": 241,
390
+ "##azin": 242,
391
+ "##but": 243,
392
+ "##hex": 244,
393
+ "nit": 245,
394
+ "##mid": 246,
395
+ "dic": 247,
396
+ "##ophen": 248,
397
+ "##sulf": 249,
398
+ "##imid": 250,
399
+ "##aph": 251,
400
+ "##acet": 252,
401
+ "##aphth": 253,
402
+ "sulf": 254,
403
+ "##aphthal": 255,
404
+ "acetate": 256,
405
+ "##amethyl": 257,
406
+ "##azole": 258,
407
+ "##osph": 259,
408
+ "##carb": 260,
409
+ "##anium": 261,
410
+ "##atri": 262,
411
+ "##orph": 263,
412
+ "amine": 264,
413
+ "##azine": 265,
414
+ "##thr": 266,
415
+ "carboxylic": 267,
416
+ "##onitrile": 268,
417
+ "##othi": 269,
418
+ "diethyl": 270,
419
+ "benzene": 271,
420
+ "pyridine": 272,
421
+ "##acetyl": 273,
422
+ "##benzene": 274,
423
+ "##hydr": 275,
424
+ "##azo": 276,
425
+ "##urin": 277,
426
+ "##arbox": 278,
427
+ "##phosph": 279,
428
+ "indole": 280,
429
+ "##anol": 281,
430
+ "##oct": 282,
431
+ "hydr": 283,
432
+ "##tetr": 284,
433
+ "urea": 285,
434
+ "acetic": 286,
435
+ "tric": 287,
436
+ "##meth": 288,
437
+ "##azep": 289,
438
+ "phenol": 290,
439
+ "##butyl": 291,
440
+ "##hyd": 292,
441
+ "##orm": 293,
442
+ "##aldehyde": 294,
443
+ "##acetate": 295,
444
+ "hydrochloride": 296,
445
+ "##oxane": 297,
446
+ "methanol": 298,
447
+ "##indole": 299,
448
+ "##urine": 300,
449
+ "sil": 301,
450
+ "##anil": 302,
451
+ "##odium": 303,
452
+ "fluor": 304,
453
+ "tet": 305,
454
+ "anthr": 306,
455
+ "##izin": 307,
456
+ "hydrox": 308,
457
+ "##benzyl": 309,
458
+ "##acetic": 310,
459
+ "##carbox": 311,
460
+ "thiol": 312,
461
+ "##tert": 313,
462
+ "iod": 314,
463
+ "acetonitrile": 315,
464
+ "##acyl": 316,
465
+ "chrys": 317,
466
+ "##icos": 318,
467
+ "##inum": 319,
468
+ "##ethanol": 320,
469
+ "##ylate": 321,
470
+ "dodecyl": 322,
471
+ "iodide": 323,
472
+ "##erc": 324,
473
+ "bromide": 325,
474
+ "hexane": 326,
475
+ "##inic": 327,
476
+ "##anth": 328,
477
+ "##fluor": 329,
478
+ "fluoride": 330,
479
+ "##alen": 331,
480
+ "bor": 332,
481
+ "##iden": 333,
482
+ "##yrin": 334,
483
+ "##iod": 335,
484
+ "##onium": 336,
485
+ "##uric": 337,
486
+ "##then": 338,
487
+ "oxal": 339,
488
+ "hyd": 340,
489
+ "##inc": 341,
490
+ "##hexane": 342,
491
+ "chromium": 343,
492
+ "##doc": 344,
493
+ "##ylic": 345,
494
+ "##adium": 346,
495
+ "sel": 347,
496
+ "brom": 348,
497
+ "amide": 349,
498
+ "ide": 350,
499
+ "##erm": 351,
500
+ "chlor": 352,
501
+ "alum": 353,
502
+ "disulf": 354,
503
+ "manganese": 355,
504
+ "epoxy": 356,
505
+ "##echn": 357,
506
+ "##yb": 358,
507
+ "##nit": 359,
508
+ "sti": 360,
509
+ "##ithi": 361,
510
+ "oxid": 362,
511
+ "selen": 363,
512
+ "hydroxide": 364,
513
+ "nitric": 365,
514
+ "boron": 366,
515
+ "##amate": 367,
516
+ "##yrene": 368,
517
+ "formaldehyde": 369,
518
+ "silic": 370,
519
+ "##amid": 371,
520
+ "nitr": 372,
521
+ "perc": 373,
522
+ "##oline": 374,
523
+ "##amic": 375,
524
+ "cadmium": 376,
525
+ "##ylene": 377,
526
+ "##elen": 378,
527
+ "##inine": 379,
528
+ "formic": 380,
529
+ "##brom": 381,
530
+ "##ocin": 382,
531
+ "##ophenyl": 383,
532
+ "##germ": 384,
533
+ "##iron": 385,
534
+ "##role": 386,
535
+ "##aza": 387,
536
+ "mol": 388,
537
+ "cyan": 389,
538
+ "##onine": 390,
539
+ "nitrite": 391,
540
+ "##inin": 392,
541
+ "peroxide": 393,
542
+ "##hec": 394,
543
+ "anion": 395,
544
+ "germ": 396,
545
+ "methan": 397,
546
+ "selenium": 398,
547
+ "##lead": 399,
548
+ "##enic": 400,
549
+ "phthal": 401,
550
+ "##lut": 402,
551
+ "##roph": 403,
552
+ "chrom": 404,
553
+ "##onit": 405,
554
+ "hypo": 406,
555
+ "sulfide": 407,
556
+ "iodine": 408,
557
+ "trin": 409,
558
+ "prot": 410,
559
+ "##esium": 411,
560
+ "phospho": 412,
561
+ "##ecan": 413,
562
+ "##ophosph": 414,
563
+ "americ": 415,
564
+ "##onin": 416,
565
+ "chloroform": 417,
566
+ "##inite": 418,
567
+ "tol": 419,
568
+ "##trin": 420,
569
+ "toluene": 421,
570
+ "ber": 422,
571
+ "ine": 423,
572
+ "perm": 424,
573
+ "##odi": 425,
574
+ "ino": 426,
575
+ "rad": 427,
576
+ "##phe": 428,
577
+ "radon": 429,
578
+ "##itro": 430,
579
+ "arb": 431,
580
+ "chlorine": 432,
581
+ "ferm": 433,
582
+ "##tio": 434,
583
+ "##ryp": 435,
584
+ "plat": 436,
585
+ "zol": 437,
586
+ "ent": 438,
587
+ "phe": 439,
588
+ "disulfide": 440,
589
+ "cet": 441,
590
+ "clo": 442,
591
+ "ful": 443,
592
+ "hyp": 444,
593
+ "##bene": 445,
594
+ "##ameth": 446,
595
+ "argon": 447,
596
+ "chl": 448,
597
+ "##acycl": 449,
598
+ "dro": 450,
599
+ "##tit": 451,
600
+ "##opyran": 452,
601
+ "dif": 453,
602
+ "##amin": 454,
603
+ "##anim": 455,
604
+ "dich": 456,
605
+ "##ydro": 457,
606
+ "##ydroxy": 458,
607
+ "##hlo": 459,
608
+ "##hloro": 460,
609
+ "##clo": 461,
610
+ "##yclo": 462,
611
+ "methoxy": 463,
612
+ "carbox": 464,
613
+ "##cyclo": 465,
614
+ "##ulfanyl": 466,
615
+ "##luo": 467,
616
+ "methylidene": 468,
617
+ "methoxyphenyl": 469,
618
+ "hydroxymethyl": 470,
619
+ "diox": 471,
620
+ "##yrim": 472,
621
+ "dihydroxy": 473,
622
+ "dione": 474,
623
+ "thiazol": 475,
624
+ "methylphenyl": 476,
625
+ "chlorophenyl": 477,
626
+ "##romo": 478,
627
+ "propan": 479,
628
+ "carboxylate": 480,
629
+ "acetamide": 481,
630
+ "pyrim": 482,
631
+ "##dol": 483,
632
+ "carboxamide": 484,
633
+ "##hrom": 485,
634
+ "##uinol": 486,
635
+ "##acyclo": 487,
636
+ "dioxo": 488,
637
+ "tetrahydro": 489,
638
+ "trihydroxy": 490,
639
+ "oxan": 491,
640
+ "##anoyl": 492,
641
+ "fluorophenyl": 493,
642
+ "dimethoxy": 494,
643
+ "pyrrol": 495,
644
+ "ium": 496,
645
+ "##hromen": 497,
646
+ "##propyl": 498,
647
+ "furan": 499,
648
+ "##sulfanyl": 500,
649
+ "##oxan": 501,
650
+ "trimethyl": 502,
651
+ "ethoxy": 503,
652
+ "pyridin": 504,
653
+ "ylmethyl": 505,
654
+ "##fluoro": 506,
655
+ "triazol": 507,
656
+ "dichloro": 508,
657
+ "##adeca": 509,
658
+ "enyl": 510,
659
+ "##propan": 511,
660
+ "##anoate": 512,
661
+ "carbam": 513,
662
+ "indol": 514,
663
+ "pyrimidin": 515,
664
+ "##uinolin": 516,
665
+ "trifluoro": 517,
666
+ "benzothi": 518,
667
+ "##piper": 519,
668
+ "##anilino": 520,
669
+ "dimethoxyphenyl": 521,
670
+ "nitrophenyl": 522,
671
+ "##anoic": 523,
672
+ "ylidene": 524,
673
+ "##itri": 525,
674
+ "##itrile": 526,
675
+ "cyclohex": 527,
676
+ "##phenoxy": 528,
677
+ "##olan": 529,
678
+ "##yano": 530,
679
+ "benzamide": 531,
680
+ "##amido": 532,
681
+ "##anone": 533,
682
+ "thiophen": 534,
683
+ "thiazolidin": 535,
684
+ "benzodi": 536,
685
+ "##aphthalen": 537,
686
+ "##etracyclo": 538,
687
+ "trifluoromethyl": 539,
688
+ "hydroxyphenyl": 540,
689
+ "##atetracyclo": 541,
690
+ "pyrimidine": 542,
691
+ "cyclopent": 543,
692
+ "dichlorophenyl": 544,
693
+ "methylbut": 545,
694
+ "##atricyclo": 546,
695
+ "acetamido": 547,
696
+ "##benzoyl": 548,
697
+ "##orphol": 549,
698
+ "##icyclo": 550,
699
+ "methylideneamino": 551,
700
+ "benzodiox": 552,
701
+ "benzoate": 553,
702
+ "tert": 554,
703
+ "oxoethyl": 555,
704
+ "bromophenyl": 556,
705
+ "chromen": 557,
706
+ "##ydr": 558,
707
+ "##anthr": 559,
708
+ "##sulfon": 560,
709
+ "##mino": 561,
710
+ "pyrrolidine": 562,
711
+ "methylsulfanyl": 563,
712
+ "##uinoline": 564,
713
+ "dimethylphenyl": 565,
714
+ "##piro": 566,
715
+ "##diazol": 567,
716
+ "morphol": 568,
717
+ "benzofuran": 569,
718
+ "dimethylamino": 570,
719
+ "##bicyclo": 571,
720
+ "dien": 572,
721
+ "##benzamide": 573,
722
+ "enamide": 574,
723
+ "##pyrim": 575,
724
+ "cyclohexyl": 576,
725
+ "quinolin": 577,
726
+ "##cyclohex": 578,
727
+ "hexahydro": 579,
728
+ "naphthalen": 580,
729
+ "morpholin": 581,
730
+ "##uinazol": 582,
731
+ "oxolan": 583,
732
+ "phenanthr": 584,
733
+ "enoate": 585,
734
+ "dodec": 586,
735
+ "##oxyphenyl": 587,
736
+ "##aen": 588,
737
+ "##hept": 589,
738
+ "tetramethyl": 590,
739
+ "benzodioxol": 591,
740
+ "butan": 592,
741
+ "benzox": 593,
742
+ "carbonitrile": 594,
743
+ "ethoxyphenyl": 595,
744
+ "piperidin": 596,
745
+ "##sulfonyl": 597,
746
+ "##acetamide": 598,
747
+ "10s": 599,
748
+ "thiazolo": 600,
749
+ "##benzoate": 601,
750
+ "methyloxan": 602,
751
+ "phenylprop": 603,
752
+ "##abicyclo": 604,
753
+ "piperazin": 605,
754
+ "10r": 606,
755
+ "oxyoxan": 607,
756
+ "##rolo": 608,
757
+ "cyclopenta": 609,
758
+ "phenylethyl": 610,
759
+ "##uinazolin": 611,
760
+ "phenoxy": 612,
761
+ "diphenyl": 613,
762
+ "##enyl": 614,
763
+ "phenanthren": 615,
764
+ "oxobut": 616,
765
+ "chlo": 617,
766
+ "##carbam": 618,
767
+ "##benzo": 619,
768
+ "##pentacyclo": 620,
769
+ "##cyclopent": 621,
770
+ "enoyl": 622,
771
+ "ene": 623,
772
+ "triazolo": 624,
773
+ "##rophen": 625,
774
+ "##oryl": 626,
775
+ "benzothiazol": 627,
776
+ "benzimidazol": 628,
777
+ "thiadiazol": 629,
778
+ "dioxopyrrol": 630,
779
+ "13r": 631,
780
+ "pyrazol": 632,
781
+ "azanium": 633,
782
+ "diazin": 634,
783
+ "oxochromen": 635,
784
+ "14s": 636,
785
+ "##aene": 637,
786
+ "oxymethyl": 638,
787
+ "##azatetracyclo": 639,
788
+ "trimethoxy": 640,
789
+ "13s": 641,
790
+ "##ideneamino": 642,
791
+ "oxazol": 643,
792
+ "ethanone": 644,
793
+ "acetyloxy": 645,
794
+ "tetradec": 646,
795
+ "quinoline": 647,
796
+ "trien": 648,
797
+ "propanamide": 649,
798
+ "##rophenoxy": 650,
799
+ "methylpropyl": 651,
800
+ "hexadeca": 652,
801
+ "##ophene": 653,
802
+ "diol": 654,
803
+ "##spiro": 655,
804
+ "##othioyl": 656,
805
+ "methylpent": 657,
806
+ "##ridec": 658,
807
+ "propanoate": 659,
808
+ "imidazol": 660,
809
+ "benzoyl": 661,
810
+ "ethenyl": 662,
811
+ "octahydro": 663,
812
+ "pyrrolidin": 664,
813
+ "heptadeca": 665,
814
+ "pyrrole": 666,
815
+ "##azinyl": 667,
816
+ "hydroxyethyl": 668,
817
+ "011": 669,
818
+ "trione": 670,
819
+ "methylamino": 671,
820
+ "4ar": 672,
821
+ "methanone": 673,
822
+ "##quinoline": 674,
823
+ "12s": 675,
824
+ "17r": 676,
825
+ "##xal": 677,
826
+ "##uino": 678,
827
+ "##uinoxal": 679,
828
+ "thiophene": 680,
829
+ "methylprop": 681,
830
+ "##cyclopenta": 682,
831
+ "phenylmethoxy": 683,
832
+ "diazinane": 684,
833
+ "dihydropyr": 685,
834
+ "ani": 686,
835
+ "ylphenyl": 687,
836
+ "##aphthalene": 688,
837
+ "tridec": 689,
838
+ "4as": 690,
839
+ "ylethyl": 691,
840
+ "11r": 692,
841
+ "##anedi": 693,
842
+ "dioxa": 694,
843
+ "##phosphoryl": 695,
844
+ "14r": 696,
845
+ "benzoic": 697,
846
+ "quinazolin": 698,
847
+ "oxopropan": 699,
848
+ "oxane": 700,
849
+ "propanoic": 701,
850
+ "hydroxypropan": 702,
851
+ "dicarbox": 703,
852
+ "triox": 704,
853
+ "6ar": 705,
854
+ "012": 706,
855
+ "11s": 707,
856
+ "diethylamino": 708,
857
+ "oxopent": 709,
858
+ "12r": 710,
859
+ "8ar": 711,
860
+ "carbamate": 712,
861
+ "##oxolan": 713,
862
+ "3as": 714,
863
+ "quinoxal": 715,
864
+ "triol": 716,
865
+ "diamino": 717,
866
+ "carbamothioyl": 718,
867
+ "pyrrolo": 719,
868
+ "##isoindol": 720,
869
+ "methylphenoxy": 721,
870
+ "##butan": 722,
871
+ "3ar": 723,
872
+ "tetraen": 724,
873
+ "cyclopentyl": 725,
874
+ "tetradecahydro": 726,
875
+ "17s": 727,
876
+ "##imino": 728,
877
+ "dihydroxyphenyl": 729,
878
+ "##ndec": 730,
879
+ "hydrazinyl": 731,
880
+ "##hloride": 732,
881
+ "trimethoxyphenyl": 733,
882
+ "##apentacyclo": 734,
883
+ "##hydraz": 735,
884
+ "ylmethylidene": 736,
885
+ "oxadiazol": 737,
886
+ "6as": 738,
887
+ "trideca": 739,
888
+ "##carbamoyl": 740,
889
+ "diazenyl": 741,
890
+ "yloxy": 742,
891
+ "tetraene": 743,
892
+ "ethylphenyl": 744,
893
+ "##cyclohexyl": 745,
894
+ "piperidine": 746,
895
+ "tetrazol": 747,
896
+ "##thieno": 748,
897
+ "octadec": 749,
898
+ "hexadec": 750,
899
+ "piperazine": 751,
900
+ "dodecahydro": 752,
901
+ "difluoro": 753,
902
+ "azabicyclo": 754,
903
+ "##pyrrolidine": 755,
904
+ "##propanoyl": 756,
905
+ "trihydroxyoxan": 757,
906
+ "methylanilino": 758,
907
+ "benzothiophene": 759,
908
+ "sulfon": 760,
909
+ "diamine": 761,
910
+ "##amoyl": 762,
911
+ "triazin": 763,
912
+ "cyclopropyl": 764,
913
+ "##hromene": 765,
914
+ "purin": 766,
915
+ "##enitrile": 767,
916
+ "pentamethyl": 768,
917
+ "pyrazole": 769,
918
+ "8as": 770,
919
+ "##anamine": 771,
920
+ "pyran": 772,
921
+ "benzenesulfonamide": 773,
922
+ "##furo": 774,
923
+ "16s": 775,
924
+ "dicarboxyl": 776,
925
+ "12a": 777,
926
+ "triaz": 778,
927
+ "methylhept": 779,
928
+ "##bromo": 780,
929
+ "triazole": 781,
930
+ "decahydro": 782,
931
+ "undec": 783,
932
+ "diazatricyclo": 784,
933
+ "013": 785,
934
+ "butylphenyl": 786,
935
+ "15r": 787,
936
+ "octadeca": 788,
937
+ "methylpiperidin": 789,
938
+ "##anamide": 790,
939
+ "enoic": 791,
940
+ "phenylpyrazol": 792,
941
+ "benzoxazol": 793,
942
+ "15s": 794,
943
+ "16r": 795,
944
+ "triazatetracyclo": 796,
945
+ "##alde": 797,
946
+ "##aldehyd": 798,
947
+ "diene": 799,
948
+ "ylamino": 800,
949
+ "##acont": 801,
950
+ "##tetrazol": 802,
951
+ "trichloro": 803,
952
+ "##diazole": 804,
953
+ "hexaen": 805,
954
+ "isoquinolin": 806,
955
+ "pentaen": 807,
956
+ "naphthalene": 808,
957
+ "thione": 809,
958
+ "fluo": 810,
959
+ "##azepin": 811,
960
+ "##olane": 812,
961
+ "dioxoisoindol": 813,
962
+ "pyrano": 814,
963
+ "ylsulfanyl": 815,
964
+ "thiazole": 816,
965
+ "dibromo": 817,
966
+ "methylsulfonyl": 818,
967
+ "##benzenesulfonamide": 819,
968
+ "##pyrido": 820,
969
+ "benzothiolo": 821,
970
+ "##imidazole": 822,
971
+ "methoxyphenoxy": 823,
972
+ "azoni": 824,
973
+ "azatricyclo": 825,
974
+ "##propanamide": 826,
975
+ "enylidene": 827,
976
+ "##butanoyl": 828,
977
+ "dimethylphenoxy": 829,
978
+ "dicarboxylate": 830,
979
+ "##hexacyclo": 831,
980
+ "##imidothi": 832,
981
+ "chlorophenoxy": 833,
982
+ "##silyl": 834,
983
+ "hexaene": 835,
984
+ "##aspiro": 836,
985
+ "##anoyloxy": 837,
986
+ "##fluo": 838,
987
+ "butanamide": 839,
988
+ "dihydroxyoxan": 840,
989
+ "dioxopyrrolidin": 841,
990
+ "methoxyethyl": 842,
991
+ "dodeca": 843,
992
+ "##animine": 844,
993
+ "14b": 845,
994
+ "##pyrrolo": 846,
995
+ "##acetyloxy": 847,
996
+ "sulfonylamino": 848,
997
+ "##pyrimidine": 849,
998
+ "010": 850,
999
+ "benzenesulfonyl": 851,
1000
+ "methylfuran": 852,
1001
+ "oxazole": 853,
1002
+ "##ethenyl": 854,
1003
+ "##carbonylamino": 855,
1004
+ "methylpiperazin": 856,
1005
+ "##ooxy": 857,
1006
+ "tetrac": 858,
1007
+ "trimethylsilyl": 859,
1008
+ "18s": 860,
1009
+ "10a": 861,
1010
+ "tetrahydroxy": 862,
1011
+ "thiazolidine": 863,
1012
+ "oxapentacyclo": 864,
1013
+ "phenylmethoxyphenyl": 865,
1014
+ "methylbenzoyl": 866,
1015
+ "hydroxypropyl": 867,
1016
+ "hydroxyphosphoryl": 868,
1017
+ "propoxyphenyl": 869,
1018
+ "hexamethyl": 870,
1019
+ "bicyclo": 871,
1020
+ "##thiophene": 872,
1021
+ "##init": 873,
1022
+ "enenitrile": 874,
1023
+ "dienyl": 875,
1024
+ "quinoxalin": 876,
1025
+ "dimethylazanium": 877,
1026
+ "##pyrazolo": 878,
1027
+ "chloroethyl": 879,
1028
+ "oxobutan": 880,
1029
+ "##aconta": 881,
1030
+ "nonadeca": 882,
1031
+ "triene": 883,
1032
+ "##ohydrazide": 884,
1033
+ "##imidothioic": 885,
1034
+ "tetrazatetracyclo": 886,
1035
+ "##propane": 887,
1036
+ "thieno": 888,
1037
+ "quinoxaline": 889,
1038
+ "pyrazolo": 890,
1039
+ "diethoxy": 891,
1040
+ "methylpyridin": 892,
1041
+ "phosphon": 893,
1042
+ "##benzimidazol": 894,
1043
+ "pentaene": 895,
1044
+ "##piperazine": 896,
1045
+ "diaminomethyl": 897,
1046
+ "oxoprop": 898,
1047
+ "##naphthalene": 899,
1048
+ "##pyridine": 900,
1049
+ "diazatetracyclo": 901,
1050
+ "oxopropyl": 902,
1051
+ "heptaen": 903,
1052
+ "##imidazo": 904,
1053
  "[unused900]": 905,
1054
  "[unused901]": 906,
1055
  "[unused902]": 907,
vocab.txt CHANGED
@@ -1,38 +1,232 @@
1
  [PAD]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  ##ethyl
3
- yl
4
  ##henyl
5
  ##phenyl
6
  ##oxy
7
  ##methyl
8
  ##enz
9
- hydroxy
10
  ##azol
11
  ##arb
12
  ##ethoxy
13
- ##idin
14
- ##amide
15
  carb
16
- phenyl
17
- eth
18
- ##idene
19
  ##hydro
20
  dimethyl
21
- acet
22
  ##etr
23
  pyr
24
- thi
25
- ethyl
26
  ##amino
27
  ##hydroxy
28
  ##prop
29
  ##anyl
30
- benzo
31
- tetr
32
- oxy
33
  ##luoro
34
  ##dec
35
- ##idine
36
  ##ahydro
37
  ##benz
38
  dihydro
@@ -46,73 +240,54 @@ carboxyl
46
  ##adec
47
  ##iper
48
  ##methoxy
49
- hex
50
- cyclo
51
  ##azin
52
  ##but
53
  ##hex
54
  nit
55
  ##mid
56
- nitro
57
  dic
58
- pent
59
  ##ophen
60
- phen
61
  ##sulf
62
  ##imid
63
  ##aph
64
- acetyl
65
  ##acet
66
  ##aphth
67
  sulf
68
- benzyl
69
- hept
70
  ##aphthal
71
  acetate
72
  ##amethyl
73
  ##azole
74
  ##osph
75
  ##carb
76
- carbonyl
77
  ##anium
78
  ##atri
79
- tetra
80
  ##orph
81
  amine
82
  ##azine
83
  ##thr
84
- meth
85
  carboxylic
86
  ##onitrile
87
  ##othi
88
  diethyl
89
  benzene
90
- phosph
91
  pyridine
92
  ##acetyl
93
  ##benzene
94
  ##hydr
95
  ##azo
96
  ##urin
97
- carboxy
98
  ##arbox
99
  ##phosph
100
  indole
101
- [UNK]
102
- [CLS]
103
- [SEP]
104
- [MASK]
105
  ##anol
106
  ##oct
107
  hydr
108
  ##tetr
109
  urea
110
  acetic
111
- ##urea
112
  tric
113
  ##meth
114
  ##azep
115
- yn
116
  phenol
117
  ##butyl
118
  ##hyd
@@ -120,20 +295,15 @@ phenol
120
  ##aldehyde
121
  ##acetate
122
  hydrochloride
123
- thio
124
  ##oxane
125
- phosphoryl
126
  methanol
127
- spiro
128
  ##indole
129
  ##urine
130
  sil
131
  ##anil
132
- ##oxo
133
  ##odium
134
  fluor
135
  tet
136
- tris
137
  anthr
138
  ##izin
139
  hydrox
@@ -144,7 +314,6 @@ thiol
144
  ##tert
145
  iod
146
  acetonitrile
147
- octa
148
  ##acyl
149
  chrys
150
  ##icos
@@ -169,7 +338,6 @@ bor
169
  ##uric
170
  ##then
171
  oxal
172
- ##inden
173
  hyd
174
  ##inc
175
  ##hexane
@@ -185,11 +353,10 @@ ide
185
  chlor
186
  alum
187
  disulf
188
- fg
189
  manganese
190
  epoxy
191
  ##echn
192
- ##bd
193
  ##nit
194
  sti
195
  ##ithi
@@ -208,14 +375,12 @@ perc
208
  ##oline
209
  ##amic
210
  cadmium
211
- ij
212
  ##ylene
213
  ##elen
214
  ##inine
215
  formic
216
  ##brom
217
  ##ocin
218
- yt
219
  ##ophenyl
220
  ##germ
221
  ##iron
@@ -244,7 +409,6 @@ sulfide
244
  iodine
245
  trin
246
  prot
247
- xy
248
  ##esium
249
  phospho
250
  ##ecan
@@ -268,31 +432,25 @@ radon
268
  arb
269
  chlorine
270
  ferm
271
- yr
272
- ##ryp
273
  ##tio
 
274
  plat
275
  zol
276
  ent
277
  phe
278
  disulfide
279
  cet
280
- lf
281
- xo
282
  clo
283
  ful
284
  hyp
285
- iu
286
  ##bene
287
  ##ameth
288
  argon
289
  chl
290
  ##acycl
291
  dro
292
- hy
293
- lm
294
- ##opyran
295
  ##tit
 
296
  dif
297
  ##amin
298
  ##anim
@@ -300,43 +458,28 @@ dich
300
  ##ydro
301
  ##ydroxy
302
  ##hlo
303
- oxo
304
  ##hloro
305
  ##clo
306
  ##yclo
307
  methoxy
308
- 2s
309
- chloro
310
  carbox
311
- 2r
312
  ##cyclo
313
  ##ulfanyl
314
  ##luo
315
- 1h
316
- 3r
317
- 4s
318
  methylidene
319
- 5s
320
- 5r
321
- 6r
322
- 3s
323
  methoxyphenyl
324
- sulfanyl
325
  hydroxymethyl
326
  diox
327
- 4r
328
  ##yrim
329
  dihydroxy
330
  dione
331
  thiazol
332
  methylphenyl
333
  chlorophenyl
334
- fluoro
335
  ##romo
336
  propan
337
  carboxylate
338
  acetamide
339
- bromo
340
  pyrim
341
  ##dol
342
  carboxamide
@@ -353,8 +496,6 @@ dimethoxy
353
  pyrrol
354
  ium
355
  ##hromen
356
- 1r
357
- 1s
358
  ##propyl
359
  furan
360
  ##sulfanyl
@@ -362,54 +503,42 @@ furan
362
  trimethyl
363
  ethoxy
364
  pyridin
365
- 6s
366
  ylmethyl
367
- ##oate
368
  ##fluoro
369
  triazol
370
  dichloro
371
- butyl
372
  ##adeca
373
  enyl
374
  ##propan
375
  ##anoate
376
  carbam
377
  indol
378
- propyl
379
- oxa
380
  pyrimidin
381
  ##uinolin
382
  trifluoro
383
  benzothi
384
  ##piper
385
- ##pyrrol
386
  ##anilino
387
  dimethoxyphenyl
388
  nitrophenyl
389
- ##furan
390
  ##anoic
391
  ylidene
392
  ##itri
393
  ##itrile
394
  cyclohex
395
- sulfanylidene
396
  ##phenoxy
397
  ##olan
398
  ##yano
399
  benzamide
400
- thia
401
  ##amido
402
- cyano
403
  ##anone
404
  thiophen
405
  thiazolidin
406
  benzodi
407
- ##chromen
408
  ##aphthalen
409
  ##etracyclo
410
  trifluoromethyl
411
  hydroxyphenyl
412
- 2h
413
  ##atetracyclo
414
  pyrimidine
415
  cyclopent
@@ -417,11 +546,9 @@ dichlorophenyl
417
  methylbut
418
  ##atricyclo
419
  acetamido
420
- ##indol
421
  ##benzoyl
422
  ##orphol
423
  ##icyclo
424
- ##imidazol
425
  methylideneamino
426
  benzodiox
427
  benzoate
@@ -433,7 +560,6 @@ chromen
433
  ##anthr
434
  ##sulfon
435
  ##mino
436
- 9s
437
  pyrrolidine
438
  methylsulfanyl
439
  ##uinoline
@@ -441,47 +567,32 @@ dimethylphenyl
441
  ##piro
442
  ##diazol
443
  morphol
444
- sulfonyl
445
  benzofuran
446
  dimethylamino
447
  ##bicyclo
448
  dien
449
- 7s
450
- 7r
451
  ##benzamide
452
- 6a
453
  enamide
454
  ##pyrim
455
  cyclohexyl
456
  quinolin
457
- 8a
458
  ##cyclohex
459
  hexahydro
460
  naphthalen
461
  morpholin
462
  ##uinazol
463
- 4h
464
  oxolan
465
  phenanthr
466
- imid
467
- 8r
468
- 9r
469
  enoate
470
  dodec
471
  ##oxyphenyl
472
  ##aen
473
  ##hept
474
- 5e
475
- 5z
476
  tetramethyl
477
- 5h
478
- ##pyrazol
479
  benzodioxol
480
  butan
481
- 3h
482
  benzox
483
  carbonitrile
484
- 8s
485
  ethoxyphenyl
486
  piperidin
487
  ##sulfonyl
@@ -491,7 +602,6 @@ thiazolo
491
  ##benzoate
492
  methyloxan
493
  phenylprop
494
- ##quinolin
495
  ##abicyclo
496
  piperazin
497
  10r
@@ -503,42 +613,32 @@ phenylethyl
503
  phenoxy
504
  diphenyl
505
  ##enyl
506
- imino
507
  phenanthren
508
  oxobut
509
- carbamoyl
510
  chlo
511
  ##carbam
512
  ##benzo
513
  ##pentacyclo
514
  ##cyclopent
515
  enoyl
516
- ##sulfonamide
517
  ene
518
- 2e
519
  triazolo
520
  ##rophen
521
  ##oryl
522
- ##azanium
523
  benzothiazol
524
  benzimidazol
525
- ##pyrimidin
526
- ##piperidin
527
  thiadiazol
528
  dioxopyrrol
529
  13r
530
  pyrazol
531
  azanium
532
  diazin
533
- ##piperazin
534
- ##carbonyl
535
  oxochromen
536
  14s
537
  ##aene
538
  oxymethyl
539
  ##azatetracyclo
540
  trimethoxy
541
- propanoyl
542
  13s
543
  ##ideneamino
544
  oxazol
@@ -557,7 +657,6 @@ diol
557
  ##othioyl
558
  methylpent
559
  ##ridec
560
- 2z
561
  propanoate
562
  imidazol
563
  benzoyl
@@ -576,9 +675,8 @@ methanone
576
  ##quinoline
577
  12s
578
  17r
579
- ##uino
580
  ##xal
581
- ##pyridin
582
  ##uinoxal
583
  thiophene
584
  methylprop
@@ -588,7 +686,6 @@ diazinane
588
  dihydropyr
589
  ani
590
  ylphenyl
591
- anilino
592
  ##aphthalene
593
  tridec
594
  4as
@@ -596,7 +693,6 @@ ylethyl
596
  11r
597
  ##anedi
598
  dioxa
599
- 7a
600
  ##phosphoryl
601
  14r
602
  benzoic
@@ -617,7 +713,6 @@ oxopent
617
  carbamate
618
  ##oxolan
619
  3as
620
- iodo
621
  quinoxal
622
  triol
623
  diamino
@@ -636,7 +731,6 @@ dihydroxyphenyl
636
  ##ndec
637
  hydrazinyl
638
  ##hloride
639
- ##naphthalen
640
  trimethoxyphenyl
641
  ##apentacyclo
642
  ##hydraz
@@ -653,11 +747,9 @@ ethylphenyl
653
  piperidine
654
  tetrazol
655
  ##thieno
656
- ##hydrazide
657
  octadec
658
  hexadec
659
  piperazine
660
- 9a
661
  dodecahydro
662
  difluoro
663
  azabicyclo
@@ -666,7 +758,6 @@ azabicyclo
666
  trihydroxyoxan
667
  methylanilino
668
  benzothiophene
669
- ##chloride
670
  sulfon
671
  diamine
672
  ##amoyl
@@ -681,14 +772,12 @@ pyrazole
681
  ##anamine
682
  pyran
683
  benzenesulfonamide
684
- penta
685
  ##furo
686
  16s
687
  dicarboxyl
688
  12a
689
  triaz
690
  methylhept
691
- oxido
692
  ##bromo
693
  triazole
694
  decahydro
@@ -697,212 +786,123 @@ diazatricyclo
697
  013
698
  butylphenyl
699
  15r
700
- 6h
701
- 9h
702
  octadeca
703
  methylpiperidin
704
- ##quinazolin
705
  ##anamide
706
- [unused700]
707
- [unused701]
708
- [unused702]
709
- [unused703]
710
- [unused704]
711
- [unused705]
712
- [unused706]
713
- [unused707]
714
- [unused708]
715
- [unused709]
716
- [unused710]
717
- [unused711]
718
- [unused712]
719
- [unused713]
720
- [unused714]
721
- [unused715]
722
- [unused716]
723
- [unused717]
724
- [unused718]
725
- [unused719]
726
- [unused720]
727
- [unused721]
728
- [unused722]
729
- [unused723]
730
- [unused724]
731
- [unused725]
732
- [unused726]
733
- [unused727]
734
- [unused728]
735
- [unused729]
736
- [unused730]
737
- [unused731]
738
- [unused732]
739
- [unused733]
740
- [unused734]
741
- [unused735]
742
- [unused736]
743
- [unused737]
744
- [unused738]
745
- [unused739]
746
- [unused740]
747
- [unused741]
748
- [unused742]
749
- [unused743]
750
- [unused744]
751
- [unused745]
752
- [unused746]
753
- [unused747]
754
- [unused748]
755
- [unused749]
756
- [unused750]
757
- [unused751]
758
- [unused752]
759
- [unused753]
760
- [unused754]
761
- [unused755]
762
- [unused756]
763
- [unused757]
764
- [unused758]
765
- [unused759]
766
- [unused760]
767
- [unused761]
768
- [unused762]
769
- [unused763]
770
- [unused764]
771
- [unused765]
772
- [unused766]
773
- [unused767]
774
- [unused768]
775
- [unused769]
776
- [unused770]
777
- [unused771]
778
- [unused772]
779
- [unused773]
780
- [unused774]
781
- [unused775]
782
- [unused776]
783
- [unused777]
784
- [unused778]
785
- [unused779]
786
- [unused780]
787
- [unused781]
788
- [unused782]
789
- [unused783]
790
- [unused784]
791
- [unused785]
792
- [unused786]
793
- [unused787]
794
- [unused788]
795
- [unused789]
796
- [unused790]
797
- [unused791]
798
- [unused792]
799
- [unused793]
800
- [unused794]
801
- [unused795]
802
- [unused796]
803
- [unused797]
804
- [unused798]
805
- [unused799]
806
- [unused800]
807
- [unused801]
808
- [unused802]
809
- [unused803]
810
- [unused804]
811
- [unused805]
812
- [unused806]
813
- [unused807]
814
- [unused808]
815
- [unused809]
816
- [unused810]
817
- [unused811]
818
- [unused812]
819
- [unused813]
820
- [unused814]
821
- [unused815]
822
- [unused816]
823
- [unused817]
824
- [unused818]
825
- [unused819]
826
- [unused820]
827
- [unused821]
828
- [unused822]
829
- [unused823]
830
- [unused824]
831
- [unused825]
832
- [unused826]
833
- [unused827]
834
- [unused828]
835
- [unused829]
836
- [unused830]
837
- [unused831]
838
- [unused832]
839
- [unused833]
840
- [unused834]
841
- [unused835]
842
- [unused836]
843
- [unused837]
844
- [unused838]
845
- [unused839]
846
- [unused840]
847
- [unused841]
848
- [unused842]
849
- [unused843]
850
- [unused844]
851
- [unused845]
852
- [unused846]
853
- [unused847]
854
- [unused848]
855
- [unused849]
856
- [unused850]
857
- [unused851]
858
- [unused852]
859
- [unused853]
860
- [unused854]
861
- [unused855]
862
- [unused856]
863
- [unused857]
864
- [unused858]
865
- [unused859]
866
- [unused860]
867
- [unused861]
868
- [unused862]
869
- [unused863]
870
- [unused864]
871
- [unused865]
872
- [unused866]
873
- [unused867]
874
- [unused868]
875
- [unused869]
876
- [unused870]
877
- [unused871]
878
- [unused872]
879
- [unused873]
880
- [unused874]
881
- [unused875]
882
- [unused876]
883
- [unused877]
884
- [unused878]
885
- [unused879]
886
- [unused880]
887
- [unused881]
888
- [unused882]
889
- [unused883]
890
- [unused884]
891
- [unused885]
892
- [unused886]
893
- [unused887]
894
- [unused888]
895
- [unused889]
896
- [unused890]
897
- [unused891]
898
- [unused892]
899
- [unused893]
900
- [unused894]
901
- [unused895]
902
- [unused896]
903
- [unused897]
904
- [unused898]
905
- [unused899]
906
  [unused900]
907
  [unused901]
908
  [unused902]
 
1
  [PAD]
2
+ phenyl
3
+ oxy
4
+ hydroxy
5
+ meth
6
+ oxo
7
+ ##oxo
8
+ ethyl
9
+ chloro
10
+ cyclo
11
+ benzo
12
+ ##amide
13
+ acet
14
+ tetra
15
+ ##methylidene
16
+ fluoro
17
+ thi
18
+ eth
19
+ sulfanyl
20
+ ##acid
21
+ propyl
22
+ deca
23
+ ##furan
24
+ ##pyridin
25
+ ##pyrimidin
26
+ ##carboxylate
27
+ penta
28
+ phen
29
+ bromo
30
+ hexa
31
+ aza
32
+ oxa
33
+ nitro
34
+ ##carboxamide
35
+ ##idene
36
+ acetyl
37
+ ##indol
38
+ ##chromen
39
+ ##quinolin
40
+ ##pyrrol
41
+ ##pyrrolidin
42
+ ##thiophen
43
+ anilino
44
+ carbonyl
45
+ ##carbonyl
46
+ pent
47
+ ##oate
48
+ benzyl
49
+ ##pyrazol
50
+ ##piperidin
51
+ ##piperazin
52
+ sulfonyl
53
+ ##naphthalen
54
+ ##imidazol
55
+ hex
56
+ octa
57
+ ##idin
58
+ thia
59
+ hexyl
60
+ sulfanylidene
61
+ cyano
62
+ hepta
63
+ amido
64
+ butyl
65
+ ##azanium
66
+ ##carboxylic
67
+ tert-butyl
68
+ ##morpholin
69
+ propanoyl
70
+ tetr
71
+ thio
72
+ imino
73
+ spiro
74
+ carbamoyl
75
+ hept
76
+ ##pyran
77
+ ##phenanthren
78
+ ##carbonitrile
79
+ ##quinazolin
80
+ ##purin
81
+ pentyl
82
+ ##sulfonamide
83
+ butanoyl
84
+ phosphoryl
85
+ ##urea
86
+ carboxy
87
+ ##chloride
88
+ ##nitrile
89
+ nona
90
+ ##carbamate
91
+ carbamo
92
+ pyrido
93
+ hydrazin
94
+ ##quinoxalin
95
+ cosa
96
+ iodo
97
+ imidazo
98
+ furo
99
+ ##imine
100
+ pentanoyl
101
+ [UNK]
102
+ [CLS]
103
+ [SEP]
104
+ [MASK]
105
+ ##idine
106
+ ##inden
107
+ ##aniline
108
+ ##phosphate
109
+ sulfamoyl
110
+ oxido
111
+ cos
112
+ ##thione
113
+ ##imidazolidin
114
+ ##thiol
115
+ thiolo
116
+ ##azonia
117
+ ##sulfonate
118
+ ##picen
119
+ ##pyrazin
120
+ ##hydrazide
121
+ decanoyl
122
+ ##sulfonic
123
+ icosa
124
+ sulfo
125
+ ##anthracen
126
+ ##thiourea
127
+ silyl
128
+ oc
129
+ tria
130
+ ##pyridazin
131
+ phosph
132
+ conta
133
+ pyrimido
134
+ ##carbaldehyde
135
+ icos
136
+ heptyl
137
+ carbamimid
138
+ formyl
139
+ ##silane
140
+ phosphono
141
+ aceto
142
+ ##carbazol
143
+ ##acridin
144
+ ##fluoren
145
+ ##azanide
146
+ heni
147
+ ##xanthen
148
+ octyl
149
+ ##azulen
150
+ hexanoyl
151
+ carbohydrazide
152
+ silyloxy
153
+ ##sulfate
154
+ tris
155
+ nitroso
156
+ carbo
157
+ ##phthalazin
158
+ hydroxyl
159
+ ##phosphane
160
+ ##guanidin
161
+ benzhydryl
162
+ (2+)
163
+ sulfinyl
164
+ ##bromide
165
+ ##benzaldehyde
166
+ ##naphthyridin
167
+ ##indazol
168
+ cont
169
+ ##quinolizin
170
+ carbonimidoyl
171
+ ##iodide
172
+ ##pteridin
173
+ ##chrysen
174
+ ##tetracen
175
+ azido
176
+ naphtho
177
+ pheno
178
+ cyanato
179
+ ##hydrate
180
+ ##carbamimidoyl
181
+ sulfonato
182
+ ##hydrazine
183
+ ##pyrrolizin
184
+ phenacyl
185
+ imido
186
+ ##carboximidamide
187
+ carboxylato
188
+ imid
189
+ formamide
190
+ sulfonamido
191
+ ##indolizin
192
+ buta
193
+ carbothioyl
194
+ peroxy
195
+ phosphanyl
196
+ ##phenanthridin
197
+ ##oxamide
198
+ kis
199
+ ##fluoride
200
+ (3+)
201
+ ##carbonate
202
+ ##thian
203
+ ##carbamimidothioate
204
+ oxino
205
+ annulen
206
+ ##porphyrin
207
+ ##stannane
208
+ ##hydride
209
+ acenaphthylen
210
  ##ethyl
 
211
  ##henyl
212
  ##phenyl
213
  ##oxy
214
  ##methyl
215
  ##enz
 
216
  ##azol
217
  ##arb
218
  ##ethoxy
 
 
219
  carb
 
 
 
220
  ##hydro
221
  dimethyl
 
222
  ##etr
223
  pyr
 
 
224
  ##amino
225
  ##hydroxy
226
  ##prop
227
  ##anyl
 
 
 
228
  ##luoro
229
  ##dec
 
230
  ##ahydro
231
  ##benz
232
  dihydro
 
240
  ##adec
241
  ##iper
242
  ##methoxy
 
 
243
  ##azin
244
  ##but
245
  ##hex
246
  nit
247
  ##mid
 
248
  dic
 
249
  ##ophen
 
250
  ##sulf
251
  ##imid
252
  ##aph
 
253
  ##acet
254
  ##aphth
255
  sulf
 
 
256
  ##aphthal
257
  acetate
258
  ##amethyl
259
  ##azole
260
  ##osph
261
  ##carb
 
262
  ##anium
263
  ##atri
 
264
  ##orph
265
  amine
266
  ##azine
267
  ##thr
 
268
  carboxylic
269
  ##onitrile
270
  ##othi
271
  diethyl
272
  benzene
 
273
  pyridine
274
  ##acetyl
275
  ##benzene
276
  ##hydr
277
  ##azo
278
  ##urin
 
279
  ##arbox
280
  ##phosph
281
  indole
 
 
 
 
282
  ##anol
283
  ##oct
284
  hydr
285
  ##tetr
286
  urea
287
  acetic
 
288
  tric
289
  ##meth
290
  ##azep
 
291
  phenol
292
  ##butyl
293
  ##hyd
 
295
  ##aldehyde
296
  ##acetate
297
  hydrochloride
 
298
  ##oxane
 
299
  methanol
 
300
  ##indole
301
  ##urine
302
  sil
303
  ##anil
 
304
  ##odium
305
  fluor
306
  tet
 
307
  anthr
308
  ##izin
309
  hydrox
 
314
  ##tert
315
  iod
316
  acetonitrile
 
317
  ##acyl
318
  chrys
319
  ##icos
 
338
  ##uric
339
  ##then
340
  oxal
 
341
  hyd
342
  ##inc
343
  ##hexane
 
353
  chlor
354
  alum
355
  disulf
 
356
  manganese
357
  epoxy
358
  ##echn
359
+ ##yb
360
  ##nit
361
  sti
362
  ##ithi
 
375
  ##oline
376
  ##amic
377
  cadmium
 
378
  ##ylene
379
  ##elen
380
  ##inine
381
  formic
382
  ##brom
383
  ##ocin
 
384
  ##ophenyl
385
  ##germ
386
  ##iron
 
409
  iodine
410
  trin
411
  prot
 
412
  ##esium
413
  phospho
414
  ##ecan
 
432
  arb
433
  chlorine
434
  ferm
 
 
435
  ##tio
436
+ ##ryp
437
  plat
438
  zol
439
  ent
440
  phe
441
  disulfide
442
  cet
 
 
443
  clo
444
  ful
445
  hyp
 
446
  ##bene
447
  ##ameth
448
  argon
449
  chl
450
  ##acycl
451
  dro
 
 
 
452
  ##tit
453
+ ##opyran
454
  dif
455
  ##amin
456
  ##anim
 
458
  ##ydro
459
  ##ydroxy
460
  ##hlo
 
461
  ##hloro
462
  ##clo
463
  ##yclo
464
  methoxy
 
 
465
  carbox
 
466
  ##cyclo
467
  ##ulfanyl
468
  ##luo
 
 
 
469
  methylidene
 
 
 
 
470
  methoxyphenyl
 
471
  hydroxymethyl
472
  diox
 
473
  ##yrim
474
  dihydroxy
475
  dione
476
  thiazol
477
  methylphenyl
478
  chlorophenyl
 
479
  ##romo
480
  propan
481
  carboxylate
482
  acetamide
 
483
  pyrim
484
  ##dol
485
  carboxamide
 
496
  pyrrol
497
  ium
498
  ##hromen
 
 
499
  ##propyl
500
  furan
501
  ##sulfanyl
 
503
  trimethyl
504
  ethoxy
505
  pyridin
 
506
  ylmethyl
 
507
  ##fluoro
508
  triazol
509
  dichloro
 
510
  ##adeca
511
  enyl
512
  ##propan
513
  ##anoate
514
  carbam
515
  indol
 
 
516
  pyrimidin
517
  ##uinolin
518
  trifluoro
519
  benzothi
520
  ##piper
 
521
  ##anilino
522
  dimethoxyphenyl
523
  nitrophenyl
 
524
  ##anoic
525
  ylidene
526
  ##itri
527
  ##itrile
528
  cyclohex
 
529
  ##phenoxy
530
  ##olan
531
  ##yano
532
  benzamide
 
533
  ##amido
 
534
  ##anone
535
  thiophen
536
  thiazolidin
537
  benzodi
 
538
  ##aphthalen
539
  ##etracyclo
540
  trifluoromethyl
541
  hydroxyphenyl
 
542
  ##atetracyclo
543
  pyrimidine
544
  cyclopent
 
546
  methylbut
547
  ##atricyclo
548
  acetamido
 
549
  ##benzoyl
550
  ##orphol
551
  ##icyclo
 
552
  methylideneamino
553
  benzodiox
554
  benzoate
 
560
  ##anthr
561
  ##sulfon
562
  ##mino
 
563
  pyrrolidine
564
  methylsulfanyl
565
  ##uinoline
 
567
  ##piro
568
  ##diazol
569
  morphol
 
570
  benzofuran
571
  dimethylamino
572
  ##bicyclo
573
  dien
 
 
574
  ##benzamide
 
575
  enamide
576
  ##pyrim
577
  cyclohexyl
578
  quinolin
 
579
  ##cyclohex
580
  hexahydro
581
  naphthalen
582
  morpholin
583
  ##uinazol
 
584
  oxolan
585
  phenanthr
 
 
 
586
  enoate
587
  dodec
588
  ##oxyphenyl
589
  ##aen
590
  ##hept
 
 
591
  tetramethyl
 
 
592
  benzodioxol
593
  butan
 
594
  benzox
595
  carbonitrile
 
596
  ethoxyphenyl
597
  piperidin
598
  ##sulfonyl
 
602
  ##benzoate
603
  methyloxan
604
  phenylprop
 
605
  ##abicyclo
606
  piperazin
607
  10r
 
613
  phenoxy
614
  diphenyl
615
  ##enyl
 
616
  phenanthren
617
  oxobut
 
618
  chlo
619
  ##carbam
620
  ##benzo
621
  ##pentacyclo
622
  ##cyclopent
623
  enoyl
 
624
  ene
 
625
  triazolo
626
  ##rophen
627
  ##oryl
 
628
  benzothiazol
629
  benzimidazol
 
 
630
  thiadiazol
631
  dioxopyrrol
632
  13r
633
  pyrazol
634
  azanium
635
  diazin
 
 
636
  oxochromen
637
  14s
638
  ##aene
639
  oxymethyl
640
  ##azatetracyclo
641
  trimethoxy
 
642
  13s
643
  ##ideneamino
644
  oxazol
 
657
  ##othioyl
658
  methylpent
659
  ##ridec
 
660
  propanoate
661
  imidazol
662
  benzoyl
 
675
  ##quinoline
676
  12s
677
  17r
 
678
  ##xal
679
+ ##uino
680
  ##uinoxal
681
  thiophene
682
  methylprop
 
686
  dihydropyr
687
  ani
688
  ylphenyl
 
689
  ##aphthalene
690
  tridec
691
  4as
 
693
  11r
694
  ##anedi
695
  dioxa
 
696
  ##phosphoryl
697
  14r
698
  benzoic
 
713
  carbamate
714
  ##oxolan
715
  3as
 
716
  quinoxal
717
  triol
718
  diamino
 
731
  ##ndec
732
  hydrazinyl
733
  ##hloride
 
734
  trimethoxyphenyl
735
  ##apentacyclo
736
  ##hydraz
 
747
  piperidine
748
  tetrazol
749
  ##thieno
 
750
  octadec
751
  hexadec
752
  piperazine
 
753
  dodecahydro
754
  difluoro
755
  azabicyclo
 
758
  trihydroxyoxan
759
  methylanilino
760
  benzothiophene
 
761
  sulfon
762
  diamine
763
  ##amoyl
 
772
  ##anamine
773
  pyran
774
  benzenesulfonamide
 
775
  ##furo
776
  16s
777
  dicarboxyl
778
  12a
779
  triaz
780
  methylhept
 
781
  ##bromo
782
  triazole
783
  decahydro
 
786
  013
787
  butylphenyl
788
  15r
 
 
789
  octadeca
790
  methylpiperidin
 
791
  ##anamide
792
+ enoic
793
+ phenylpyrazol
794
+ benzoxazol
795
+ 15s
796
+ 16r
797
+ triazatetracyclo
798
+ ##alde
799
+ ##aldehyd
800
+ diene
801
+ ylamino
802
+ ##acont
803
+ ##tetrazol
804
+ trichloro
805
+ ##diazole
806
+ hexaen
807
+ isoquinolin
808
+ pentaen
809
+ naphthalene
810
+ thione
811
+ fluo
812
+ ##azepin
813
+ ##olane
814
+ dioxoisoindol
815
+ pyrano
816
+ ylsulfanyl
817
+ thiazole
818
+ dibromo
819
+ methylsulfonyl
820
+ ##benzenesulfonamide
821
+ ##pyrido
822
+ benzothiolo
823
+ ##imidazole
824
+ methoxyphenoxy
825
+ azoni
826
+ azatricyclo
827
+ ##propanamide
828
+ enylidene
829
+ ##butanoyl
830
+ dimethylphenoxy
831
+ dicarboxylate
832
+ ##hexacyclo
833
+ ##imidothi
834
+ chlorophenoxy
835
+ ##silyl
836
+ hexaene
837
+ ##aspiro
838
+ ##anoyloxy
839
+ ##fluo
840
+ butanamide
841
+ dihydroxyoxan
842
+ dioxopyrrolidin
843
+ methoxyethyl
844
+ dodeca
845
+ ##animine
846
+ 14b
847
+ ##pyrrolo
848
+ ##acetyloxy
849
+ sulfonylamino
850
+ ##pyrimidine
851
+ 010
852
+ benzenesulfonyl
853
+ methylfuran
854
+ oxazole
855
+ ##ethenyl
856
+ ##carbonylamino
857
+ methylpiperazin
858
+ ##ooxy
859
+ tetrac
860
+ trimethylsilyl
861
+ 18s
862
+ 10a
863
+ tetrahydroxy
864
+ thiazolidine
865
+ oxapentacyclo
866
+ phenylmethoxyphenyl
867
+ methylbenzoyl
868
+ hydroxypropyl
869
+ hydroxyphosphoryl
870
+ propoxyphenyl
871
+ hexamethyl
872
+ bicyclo
873
+ ##thiophene
874
+ ##init
875
+ enenitrile
876
+ dienyl
877
+ quinoxalin
878
+ dimethylazanium
879
+ ##pyrazolo
880
+ chloroethyl
881
+ oxobutan
882
+ ##aconta
883
+ nonadeca
884
+ triene
885
+ ##ohydrazide
886
+ ##imidothioic
887
+ tetrazatetracyclo
888
+ ##propane
889
+ thieno
890
+ quinoxaline
891
+ pyrazolo
892
+ diethoxy
893
+ methylpyridin
894
+ phosphon
895
+ ##benzimidazol
896
+ pentaene
897
+ ##piperazine
898
+ diaminomethyl
899
+ oxoprop
900
+ ##naphthalene
901
+ ##pyridine
902
+ diazatetracyclo
903
+ oxopropyl
904
+ heptaen
905
+ ##imidazo
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
906
  [unused900]
907
  [unused901]
908
  [unused902]