#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sun Aug 25 20:29:36 2019 @author: charleshen @Note: The code are copyed from PyBioMed, with a minor repair https://www.ncbi.nlm.nih.gov/pubmed/29556758 these are SMARTS patterns corresponding to the PubChem fingerprints https://astro.temple.edu/~tua87106/list_fingerprints.pdf ftp://ftp.ncbi.nlm.nih.gov/pubchem/specifications/pubchem_fingerprints.txt """ _type = 'SMARTS-based' import numpy as np from rdkit import Chem from rdkit import DataStructs import os import pandas as pd smartsPatts = { 1: ('[H]', 3), # 1-115 2: ('[H]', 7), 3: ('[H]', 15), 4: ('[H]', 31), 5: ('[Li]', 0), 6: ('[Li]', 1), 7: ('[B]', 0), 8: ('[B]', 1), 9: ('[B]', 3), 10: ('[C]', 1), 11: ('[C]', 3), 12: ('[C]', 7), 13: ('[C]', 15), 14: ('[C]', 31), 15: ('[N]', 0), 16: ('[N]', 1), 17: ('[N]', 3), 18: ('[N]', 7), 19: ('[O]', 0), 20: ('[O]', 1), 21: ('[O]', 3), 22: ('[O]', 7), 23: ('[O]', 15), 24: ('[F]', 0), 25: ('[F]', 1), 26: ('[F]', 3), 27: ('[Na]', 0), 28: ('[Na]', 1), 29: ('[Si]', 0), 30: ('[Si]', 1), 31: ('[P]', 0), 32: ('[P]', 1), 33: ('[P]', 3), 34: ('[S]', 0), 35: ('[S]', 1), 36: ('[S]', 3), 37: ('[S]', 7), 38: ('[Cl]', 0), 39: ('[Cl]', 1), 40: ('[Cl]', 3), 41: ('[Cl]', 7), 42: ('[K]', 0), 43: ('[K]', 1), 44: ('[Br]', 0), 45: ('[Br]', 1), 46: ('[Br]', 3), 47: ('[I]', 0), 48: ('[I]', 1), 49: ('[I]', 3), 50: ('[Be]', 0), 51: ('[Mg]', 0), 52: ('[Al]', 0), 53: ('[Ca]', 0), 54: ('[Sc]', 0), 55: ('[Ti]', 0), 56: ('[V]', 0), 57: ('[Cr]', 0), 58: ('[Mn]', 0), 59: ('[Fe]', 0), 60: ('[CO]', 0), 61: ('[Ni]', 0), 62: ('[Cu]', 0), 63: ('[Zn]', 0), 64: ('[Ga]', 0), 65: ('[Ge]', 0), 66: ('[As]', 0), 67: ('[Se]', 0), 68: ('[Kr]', 0), 69: ('[Rb]', 0), 70: ('[Sr]', 0), 71: ('[Y]', 0), 72: ('[Zr]', 0), 73: ('[Nb]', 0), 74: ('[Mo]', 0), 75: ('[Ru]', 0), 76: ('[Rh]', 0), 77: ('[Pd]', 0), 78: ('[Ag]', 0), 79: ('[Cd]', 0), 80: ('[In]', 0), 81: ('[Sn]', 0), 82: ('[Sb]', 0), 83: ('[Te]', 0), 84: ('[Xe]', 0), 85: ('[Cs]', 0), 86: ('[Ba]', 0), 87: ('[Lu]', 0), 88: ('[Hf]', 0), 89: ('[Ta]', 0), 90: ('[W]', 0), 91: ('[Re]', 0), 92: ('[Os]', 0), 93: ('[Ir]', 0), 94: ('[Pt]', 0), 95: ('[Au]', 0), 96: ('[Hg]', 0), 97: ('[Tl]', 0), 98: ('[Pb]', 0), 99: ('[Bi]', 0), 100: ('[La]', 0), 101: ('[Ce]', 0), 102: ('[Pr]', 0), 103: ('[Nd]', 0), 104: ('[Pm]', 0), 105: ('[Sm]', 0), 106: ('[Eu]', 0), 107: ('[Gd]', 0), 108: ('[Tb]', 0), 109: ('[Dy]', 0), 110: ('[Ho]', 0), 111: ('[Er]', 0), 112: ('[Tm]', 0), 113: ('[Yb]', 0), 114: ('[Tc]', 0), 115: ('[U]', 0), 116: ('[Li&!H0]', 0), # 264-881 117: ('[Li]~[Li]', 0), 118: ('[Li]~[#5]', 0), 119: ('[Li]~[#6]', 0), 120: ('[Li]~[#8]', 0), 121: ('[Li]~[F]', 0), 122: ('[Li]~[#15]', 0), 123: ('[Li]~[#16]', 0), 124: ('[Li]~[Cl]', 0), 125: ('[#5&!H0]', 0), 126: ('[#5]~[#5]', 0), 127: ('[#5]~[#6]', 0), 128: ('[#5]~[#7]', 0), 129: ('[#5]~[#8]', 0), 130: ('[#5]~[F]', 0), 131: ('[#5]~[#14]', 0), 132: ('[#5]~[#15]', 0), 133: ('[#5]~[#16]', 0), 134: ('[#5]~[Cl]', 0), 135: ('[#5]~[Br]', 0), 136: ('[#6&!H0]', 0), 137: ('[#6]~[#6]', 0), 138: ('[#6]~[#7]', 0), 139: ('[#6]~[#8]', 0), 140: ('[#6]~[F]', 0), 141: ('[#6]~[Na]', 0), 142: ('[#6]~[Mg]', 0), 143: ('[#6]~[Al]', 0), 144: ('[#6]~[#14]', 0), 145: ('[#6]~[#15]', 0), 146: ('[#6]~[#16]', 0), 147: ('[#6]~[Cl]', 0), 148: ('[#6]~[#33]', 0), 149: ('[#6]~[#34]', 0), 150: ('[#6]~[Br]', 0), 151: ('[#6]~[I]', 0), 152: ('[#7&!H0]', 0), 153: ('[#7]~[#7]', 0), 154: ('[#7]~[#8]', 0), 155: ('[#7]~[F]', 0), 156: ('[#7]~[#14]', 0), 157: ('[#7]~[#15]', 0), 158: ('[#7]~[#16]', 0), 159: ('[#7]~[Cl]', 0), 160: ('[#7]~[Br]', 0), 161: ('[#8&!H0]', 0), 162: ('[#8]~[#8]', 0), 163: ('[#8]~[Mg]', 0), 164: ('[#8]~[Na]', 0), 165: ('[#8]~[Al]', 0), 166: ('[#8]~[#14]', 0), 167: ('[#8]~[#15]', 0), 168: ('[#8]~[K]', 0), 169: ('[F]~[#15]', 0), 170: ('[F]~[#16]', 0), 171: ('[Al&!H0]', 0), 172: ('[Al]~[Cl]', 0), 173: ('[#14&!H0]', 0), 174: ('[#14]~[#14]', 0), 175: ('[#14]~[Cl]', 0), 176: ('[#15&!H0]', 0), 177: ('[#15]~[#15]', 0), 178: ('[#33&!H0]', 0), 179: ('[#33]~[#33]', 0), 180: ('[#6](~Br)(~[#6])', 0), 181: ('[#6](~Br)(~[#6])(~[#6])', 0), 182: ('[#6&!H0]~[Br]', 0), 183: ('[#6](~[Br])(:[c])', 0), 184: ('[#6](~[Br])(:[n])', 0), 185: ('[#6](~[#6])(~[#6])', 0), 186: ('[#6](~[#6])(~[#6])(~[#6])', 0), 187: ('[#6](~[#6])(~[#6])(~[#6])(~[#6])', 0), 188: ('[#6H1](~[#6])(~[#6])(~[#6])', 0), 189: ('[#6](~[#6])(~[#6])(~[#6])(~[#7])', 0), 190: ('[#6](~[#6])(~[#6])(~[#6])(~[#8])', 0), 191: ('[#6H1](~[#6])(~[#6])(~[#7])', 0), 192: ('[#6H1](~[#6])(~[#6])(~[#8])', 0), 193: ('[#6](~[#6])(~[#6])(~[#7])', 0), 194: ('[#6](~[#6])(~[#6])(~[#8])', 0), 195: ('[#6](~[#6])(~[Cl])', 0), 196: ('[#6&!H0](~[#6])(~[Cl])', 0), 197: ('[#6H,#6H2,#6H3,#6H4]~[#6]', 0), 198: ('[#6&!H0](~[#6])(~[#7])', 0), 199: ('[#6&!H0](~[#6])(~[#8])', 0), 200: ('[#6H1](~[#6])(~[#8])(~[#8])', 0), 201: ('[#6&!H0](~[#6])(~[#15])', 0), 202: ('[#6&!H0](~[#6])(~[#16])', 0), 203: ('[#6](~[#6])(~[I])', 0), 204: ('[#6](~[#6])(~[#7])', 0), 205: ('[#6](~[#6])(~[#8])', 0), 206: ('[#6](~[#6])(~[#16])', 0), 207: ('[#6](~[#6])(~[#14])', 0), 208: ('[#6](~[#6])(:c)', 0), 209: ('[#6](~[#6])(:c)(:c)', 0), 210: ('[#6](~[#6])(:c)(:n)', 0), 211: ('[#6](~[#6])(:n)', 0), 212: ('[#6](~[#6])(:n)(:n)', 0), 213: ('[#6](~[Cl])(~[Cl])', 0), 214: ('[#6&!H0](~[Cl])', 0), 215: ('[#6](~[Cl])(:c)', 0), 216: ('[#6](~[F])(~[F])', 0), 217: ('[#6](~[F])(:c)', 0), 218: ('[#6&!H0](~[#7])', 0), 219: ('[#6&!H0](~[#8])', 0), 220: ('[#6&!H0](~[#8])(~[#8])', 0), 221: ('[#6&!H0](~[#16])', 0), 222: ('[#6&!H0](~[#14])', 0), 223: ('[#6&!H0]:c', 0), 224: ('[#6&!H0](:c)(:c)', 0), 225: ('[#6&!H0](:c)(:n)', 0), 226: ('[#6&!H0](:n)', 0), 227: ('[#6H3]', 0), 228: ('[#6](~[#7])(~[#7])', 0), 229: ('[#6](~[#7])(:c)', 0), 230: ('[#6](~[#7])(:c)(:c)', 0), 231: ('[#6](~[#7])(:c)(:n)', 0), 232: ('[#6](~[#7])(:n)', 0), 233: ('[#6](~[#8])(~[#8])', 0), 234: ('[#6](~[#8])(:c)', 0), 235: ('[#6](~[#8])(:c)(:c)', 0), 236: ('[#6](~[#16])(:c)', 0), 237: ('[#6](:c)(:c)', 0), 238: ('[#6](:c)(:c)(:c)', 0), 239: ('[#6](:c)(:c)(:n)', 0), 240: ('[#6](:c)(:n)', 0), 241: ('[#6](:c)(:n)(:n)', 0), 242: ('[#6](:n)(:n)', 0), 243: ('[#7](~[#6])(~[#6])', 0), 244: ('[#7](~[#6])(~[#6])(~[#6])', 0), 245: ('[#7&!H0](~[#6])(~[#6])', 0), 246: ('[#7&!H0](~[#6])', 0), 247: ('[#7&!H0](~[#6])(~[#7])', 0), 248: ('[#7](~[#6])(~[#8])', 0), 249: ('[#7](~[#6])(:c)', 0), 250: ('[#7](~[#6])(:c)(:c)', 0), 251: ('[#7&!H0](~[#7])', 0), 252: ('[#7&!H0](:c)', 0), 253: ('[#7&!H0](:c)(:c)', 0), 254: ('[#7](~[#8])(~[#8])', 0), 255: ('[#7](~[#8])(:o)', 0), 256: ('[#7](:c)(:c)', 0), 257: ('[#7](:c)(:c)(:c)', 0), 258: ('[#8](~[#6])(~[#6])', 0), 259: ('[#8&!H0](~[#6])', 0), 260: ('[#8](~[#6])(~[#15])', 0), 261: ('[#8&!H0](~[#16])', 0), 262: ('[#8](:c)(:c)', 0), 263: ('[#15](~[#6])(~[#6])', 0), 264: ('[#15](~[#8])(~[#8])', 0), 265: ('[#16](~[#6])(~[#6])', 0), 266: ('[#16&!H0](~[#6])', 0), 267: ('[#16](~[#6])(~[#8])', 0), 268: ('[#14](~[#6])(~[#6])', 0), 269: ('[#6]=,:[#6]', 0), 270: ('[#6]#[#6]', 0), 271: ('[#6]=,:[#7]', 0), 272: ('[#6]#[#7]', 0), 273: ('[#6]=,:[#8]', 0), 274: ('[#6]=,:[#16]', 0), 275: ('[#7]=,:[#7]', 0), 276: ('[#7]=,:[#8]', 0), 277: ('[#7]=,:[#15]', 0), 278: ('[#15]=,:[#8]', 0), 279: ('[#15]=,:[#15]', 0), 280: ('[#6](#[#6])(-,:[#6])', 0), 281: ('[#6&!H0](#[#6])', 0), 282: ('[#6](#[#7])(-,:[#6])', 0), 283: ('[#6](-,:[#6])(-,:[#6])(=,:[#6])', 0), 284: ('[#6](-,:[#6])(-,:[#6])(=,:[#7])', 0), 285: ('[#6](-,:[#6])(-,:[#6])(=,:[#8])', 0), 286: ('[#6](-,:[#6])([Cl])(=,:[#8])', 0), 287: ('[#6&!H0](-,:[#6])(=,:[#6])', 0), 288: ('[#6&!H0](-,:[#6])(=,:[#7])', 0), 289: ('[#6&!H0](-,:[#6])(=,:[#8])', 0), 290: ('[#6](-,:[#6])(-,:[#7])(=,:[#6])', 0), 291: ('[#6](-,:[#6])(-,:[#7])(=,:[#7])', 0), 292: ('[#6](-,:[#6])(-,:[#7])(=,:[#8])', 0), 293: ('[#6](-,:[#6])(-,:[#8])(=,:[#8])', 0), 294: ('[#6](-,:[#6])(=,:[#6])', 0), 295: ('[#6](-,:[#6])(=,:[#7])', 0), 296: ('[#6](-,:[#6])(=,:[#8])', 0), 297: ('[#6]([Cl])(=,:[#8])', 0), 298: ('[#6&!H0](-,:[#7])(=,:[#6])', 0), 299: ('[#6&!H0](=,:[#6])', 0), 300: ('[#6&!H0](=,:[#7])', 0), 301: ('[#6&!H0](=,:[#8])', 0), 302: ('[#6](-,:[#7])(=,:[#6])', 0), 303: ('[#6](-,:[#7])(=,:[#7])', 0), 304: ('[#6](-,:[#7])(=,:[#8])', 0), 305: ('[#6](-,:[#8])(=,:[#8])', 0), 306: ('[#7](-,:[#6])(=,:[#6])', 0), 307: ('[#7](-,:[#6])(=,:[#8])', 0), 308: ('[#7](-,:[#8])(=,:[#8])', 0), 309: ('[#15](-,:[#8])(=,:[#8])', 0), 310: ('[#16](-,:[#6])(=,:[#8])', 0), 311: ('[#16](-,:[#8])(=,:[#8])', 0), 312: ('[#16](=,:[#8])(=,:[#8])', 0), 313: ('[#6]-,:[#6]-,:[#6]#[#6]', 0), 314: ('[#8]-,:[#6]-,:[#6]=,:[#7]', 0), 315: ('[#8]-,:[#6]-,:[#6]=,:[#8]', 0), 316: ('[#7]:[#6]-,:[#16&!H0]', 0), 317: ('[#7]-,:[#6]-,:[#6]=,:[#6]', 0), 318: ('[#8]=,:[#16]-,:[#6]-,:[#6]', 0), 319: ('[#7]#[#6]-,:[#6]=,:[#6]', 0), 320: ('[#6]=,:[#7]-,:[#7]-,:[#6]', 0), 321: ('[#8]=,:[#16]-,:[#6]-,:[#7]', 0), 322: ('[#16]-,:[#16]-,:[#6]:[#6]', 0), 323: ('[#6]:[#6]-,:[#6]=,:[#6]', 0), 324: ('[#16]:[#6]:[#6]:[#6]', 0), 325: ('[#6]:[#7]:[#6]-,:[#6]', 0), 326: ('[#16]-,:[#6]:[#7]:[#6]', 0), 327: ('[#16]:[#6]:[#6]:[#7]', 0), 328: ('[#16]-,:[#6]=,:[#7]-,:[#6]', 0), 329: ('[#6]-,:[#8]-,:[#6]=,:[#6]', 0), 330: ('[#7]-,:[#7]-,:[#6]:[#6]', 0), 331: ('[#16]-,:[#6]=,:[#7&!H0]', 0), 332: ('[#16]-,:[#6]-,:[#16]-,:[#6]', 0), 333: ('[#6]:[#16]:[#6]-,:[#6]', 0), 334: ('[#8]-,:[#16]-,:[#6]:[#6]', 0), 335: ('[#6]:[#7]-,:[#6]:[#6]', 0), 336: ('[#7]-,:[#16]-,:[#6]:[#6]', 0), 337: ('[#7]-,:[#6]:[#7]:[#6]', 0), 338: ('[#7]:[#6]:[#6]:[#7]', 0), 339: ('[#7]-,:[#6]:[#7]:[#7]', 0), 340: ('[#7]-,:[#6]=,:[#7]-,:[#6]', 0), 341: ('[#7]-,:[#6]=,:[#7&!H0]', 0), 342: ('[#7]-,:[#6]-,:[#16]-,:[#6]', 0), 343: ('[#6]-,:[#6]-,:[#6]=,:[#6]', 0), 344: ('[#6]-,:[#7]:[#6&!H0]', 0), 345: ('[#7]-,:[#6]:[#8]:[#6]', 0), 346: ('[#8]=,:[#6]-,:[#6]:[#6]', 0), 347: ('[#8]=,:[#6]-,:[#6]:[#7]', 0), 348: ('[#6]-,:[#7]-,:[#6]:[#6]', 0), 349: ('[#7]:[#7]-,:[#6&!H0]', 0), 350: ('[#8]-,:[#6]:[#6]:[#7]', 0), 351: ('[#8]-,:[#6]=,:[#6]-,:[#6]', 0), 352: ('[#7]-,:[#6]:[#6]:[#7]', 0), 353: ('[#6]-,:[#16]-,:[#6]:[#6]', 0), 354: ('[Cl]-,:[#6]:[#6]-,:[#6]', 0), 355: ('[#7]-,:[#6]=,:[#6&!H0]', 0), 356: ('[Cl]-,:[#6]:[#6&!H0]', 0), 357: ('[#7]:[#6]:[#7]-,:[#6]', 0), 358: ('[Cl]-,:[#6]:[#6]-,:[#8]', 0), 359: ('[#6]-,:[#6]:[#7]:[#6]', 0), 360: ('[#6]-,:[#6]-,:[#16]-,:[#6]', 0), 361: ('[#16]=,:[#6]-,:[#7]-,:[#6]', 0), 362: ('[Br]-,:[#6]:[#6]-,:[#6]', 0), 363: ('[#7&!H0]-,:[#7&!H0]', 0), 364: ('[#16]=,:[#6]-,:[#7&!H0]', 0), 365: ('[#6]-,:[#33]-[#8&!H0]', 0), 366: ('[#16]:[#6]:[#6&!H0]', 0), 367: ('[#8]-,:[#7]-,:[#6]-,:[#6]', 0), 368: ('[#7]-,:[#7]-,:[#6]-,:[#6]', 0), 369: ('[#6H,#6H2,#6H3]=,:[#6H,#6H2,#6H3]', 0), 370: ('[#7]-,:[#7]-,:[#6]-,:[#7]', 0), 371: ('[#8]=,:[#6]-,:[#7]-,:[#7]', 0), 372: ('[#7]=,:[#6]-,:[#7]-,:[#6]', 0), 373: ('[#6]=,:[#6]-,:[#6]:[#6]', 0), 374: ('[#6]:[#7]-,:[#6&!H0]', 0), 375: ('[#6]-,:[#7]-,:[#7&!H0]', 0), 376: ('[#7]:[#6]:[#6]-,:[#6]', 0), 377: ('[#6]-,:[#6]=,:[#6]-,:[#6]', 0), 378: ('[#33]-,:[#6]:[#6&!H0]', 0), 379: ('[Cl]-,:[#6]:[#6]-,:[Cl]', 0), 380: ('[#6]:[#6]:[#7&!H0]', 0), 381: ('[#7&!H0]-,:[#6&!H0]', 0), 382: ('[Cl]-,:[#6]-,:[#6]-,:[Cl]', 0), 383: ('[#7]:[#6]-,:[#6]:[#6]', 0), 384: ('[#16]-,:[#6]:[#6]-,:[#6]', 0), 385: ('[#16]-,:[#6]:[#6&!H0]', 0), 386: ('[#16]-,:[#6]:[#6]-,:[#7]', 0), 387: ('[#16]-,:[#6]:[#6]-,:[#8]', 0), 388: ('[#8]=,:[#6]-,:[#6]-,:[#6]', 0), 389: ('[#8]=,:[#6]-,:[#6]-,:[#7]', 0), 390: ('[#8]=,:[#6]-,:[#6]-,:[#8]', 0), 391: ('[#7]=,:[#6]-,:[#6]-,:[#6]', 0), 392: ('[#7]=,:[#6]-,:[#6&!H0]', 0), 393: ('[#6]-,:[#7]-,:[#6&!H0]', 0), 394: ('[#8]-,:[#6]:[#6]-,:[#6]', 0), 395: ('[#8]-,:[#6]:[#6&!H0]', 0), 396: ('[#8]-,:[#6]:[#6]-,:[#7]', 0), 397: ('[#8]-,:[#6]:[#6]-,:[#8]', 0), 398: ('[#7]-,:[#6]:[#6]-,:[#6]', 0), 399: ('[#7]-,:[#6]:[#6&!H0]', 0), 400: ('[#7]-,:[#6]:[#6]-,:[#7]', 0), 401: ('[#8]-,:[#6]-,:[#6]:[#6]', 0), 402: ('[#7]-,:[#6]-,:[#6]:[#6]', 0), 403: ('[Cl]-,:[#6]-,:[#6]-,:[#6]', 0), 404: ('[Cl]-,:[#6]-,:[#6]-,:[#8]', 0), 405: ('[#6]:[#6]-,:[#6]:[#6]', 0), 406: ('[#8]=,:[#6]-,:[#6]=,:[#6]', 0), 407: ('[Br]-,:[#6]-,:[#6]-,:[#6]', 0), 408: ('[#7]=,:[#6]-,:[#6]=,:[#6]', 0), 409: ('[#6]=,:[#6]-,:[#6]-,:[#6]', 0), 410: ('[#7]:[#6]-,:[#8&!H0]', 0), 411: ('[#8]=,:[#7]-,:c:c', 0), 412: ('[#8]-,:[#6]-,:[#7&!H0]', 0), 413: ('[#7]-,:[#6]-,:[#7]-,:[#6]', 0), 414: ('[Cl]-,:[#6]-,:[#6]=,:[#8]', 0), 415: ('[Br]-,:[#6]-,:[#6]=,:[#8]', 0), 416: ('[#8]-,:[#6]-,:[#8]-,:[#6]', 0), 417: ('[#6]=,:[#6]-,:[#6]=,:[#6]', 0), 418: ('[#6]:[#6]-,:[#8]-,:[#6]', 0), 419: ('[#8]-,:[#6]-,:[#6]-,:[#7]', 0), 420: ('[#8]-,:[#6]-,:[#6]-,:[#8]', 0), 421: ('N#[#6]-,:[#6]-,:[#6]', 0), 422: ('[#7]-,:[#6]-,:[#6]-,:[#7]', 0), 423: ('[#6]:[#6]-,:[#6]-,:[#6]', 0), 424: ('[#6&!H0]-,:[#8&!H0]', 0), 425: ('n:c:n:c', 0), 426: ('[#8]-,:[#6]-,:[#6]=,:[#6]', 0), 427: ('[#8]-,:[#6]-,:[#6]:[#6]-,:[#6]', 0), 428: ('[#8]-,:[#6]-,:[#6]:[#6]-,:[#8]', 0), 429: ('[#7]=,:[#6]-,:[#6]:[#6&!H0]', 0), 430: ('c:c-,:[#7]-,:c:c', 0), 431: ('[#6]-,:[#6]:[#6]-,:c:c', 0), 432: ('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0), 433: ('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#7]', 0), 434: ('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#8]', 0), 435: ('[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0), 436: ('[Cl]-,:[#6]:[#6]-,:[#8]-,:[#6]', 0), 437: ('c:c-,:[#6]=,:[#6]-,:[#6]', 0), 438: ('[#6]-,:[#6]:[#6]-,:[#7]-,:[#6]', 0), 439: ('[#6]-,:[#16]-,:[#6]-,:[#6]-,:[#6]', 0), 440: ('[#7]-,:[#6]:[#6]-,:[#8&!H0]', 0), 441: ('[#8]=,:[#6]-,:[#6]-,:[#6]=,:[#8]', 0), 442: ('[#6]-,:[#6]:[#6]-,:[#8]-,:[#6]', 0), 443: ('[#6]-,:[#6]:[#6]-,:[#8&!H0]', 0), 444: ('[Cl]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0), 445: ('[#7]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0), 446: ('[#7]-,:[#6]-,:[#6]-,:[#6]-,:[#7]', 0), 447: ('[#6]-,:[#8]-,:[#6]-,:[#6]=,:[#6]', 0), 448: ('c:c-,:[#6]-,:[#6]-,:[#6]', 0), 449: ('[#7]=,:[#6]-,:[#7]-,:[#6]-,:[#6]', 0), 450: ('[#8]=,:[#6]-,:[#6]-,:c:c', 0), 451: ('[Cl]-,:[#6]:[#6]:[#6]-,:[#6]', 0), 452: ('[#6H,#6H2,#6H3]-,:[#6]=,:[#6H,#6H2,#6H3]', 0), 453: ('[#7]-,:[#6]:[#6]:[#6]-,:[#6]', 0), 454: ('[#7]-,:[#6]:[#6]:[#6]-,:[#7]', 0), 455: ('[#8]=,:[#6]-,:[#6]-,:[#7]-,:[#6]', 0), 456: ('[#6]-,:c:c:[#6]-,:[#6]', 0), 457: ('[#6]-,:[#8]-,:[#6]-,:[#6]:c', 0), 458: ('[#8]=,:[#6]-,:[#6]-,:[#8]-,:[#6]', 0), 459: ('[#8]-,:[#6]:[#6]-,:[#6]-,:[#6]', 0), 460: ('[#7]-,:[#6]-,:[#6]-,:[#6]:c', 0), 461: ('[#6]-,:[#6]-,:[#6]-,:[#6]:c', 0), 462: ('[Cl]-,:[#6]-,:[#6]-,:[#7]-,:[#6]', 0), 463: ('[#6]-,:[#8]-,:[#6]-,:[#8]-,:[#6]', 0), 464: ('[#7]-,:[#6]-,:[#6]-,:[#7]-,:[#6]', 0), 465: ('[#7]-,:[#6]-,:[#8]-,:[#6]-,:[#6]', 0), 466: ('[#6]-,:[#7]-,:[#6]-,:[#6]-,:[#6]', 0), 467: ('[#6]-,:[#6]-,:[#8]-,:[#6]-,:[#6]', 0), 468: ('[#7]-,:[#6]-,:[#6]-,:[#8]-,:[#6]', 0), 469: ('c:c:n:n:c', 0), 470: ('[#6]-,:[#6]-,:[#6]-,:[#8&!H0]', 0), 471: ('c:[#6]-,:[#6]-,:[#6]:c', 0), 472: ('[#8]-,:[#6]-,:[#6]=,:[#6]-,:[#6]', 0), 473: ('c:c-,:[#8]-,:[#6]-,:[#6]', 0), 474: ('[#7]-,:[#6]:c:c:n', 0), 475: ('[#8]=,:[#6]-,:[#8]-,:[#6]:c', 0), 476: ('[#8]=,:[#6]-,:[#6]:[#6]-,:[#6]', 0), 477: ('[#8]=,:[#6]-,:[#6]:[#6]-,:[#7]', 0), 478: ('[#8]=,:[#6]-,:[#6]:[#6]-,:[#8]', 0), 479: ('[#6]-,:[#8]-,:[#6]:[#6]-,:[#6]', 0), 480: ('[#8]=,:[#33]-,:[#6]:c:c', 0), 481: ('[#6]-,:[#7]-,:[#6]-,:[#6]:c', 0), 482: ('[#16]-,:[#6]:c:c-,:[#7]', 0), 483: ('[#8]-,:[#6]:[#6]-,:[#8]-,:[#6]', 0), 484: ('[#8]-,:[#6]:[#6]-,:[#8&!H0]', 0), 485: ('[#6]-,:[#6]-,:[#8]-,:[#6]:c', 0), 486: ('[#7]-,:[#6]-,:[#6]:[#6]-,:[#6]', 0), 487: ('[#6]-,:[#6]-,:[#6]:[#6]-,:[#6]', 0), 488: ('[#7]-,:[#7]-,:[#6]-,:[#7&!H0]', 0), 489: ('[#6]-,:[#7]-,:[#6]-,:[#7]-,:[#6]', 0), 490: ('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0), 491: ('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#7]', 0), 492: ('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#8]', 0), 493: ('[#6]=,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0), 494: ('[#8]-,:[#6]-,:[#6]-,:[#6]=,:[#6]', 0), 495: ('[#8]-,:[#6]-,:[#6]-,:[#6]=,:[#8]', 0), 496: ('[#6&!H0]-,:[#6]-,:[#7&!H0]', 0), 497: ('[#6]-,:[#6]=,:[#7]-,:[#7]-,:[#6]', 0), 498: ('[#8]=,:[#6]-,:[#7]-,:[#6]-,:[#6]', 0), 499: ('[#8]=,:[#6]-,:[#7]-,:[#6&!H0]', 0), 500: ('[#8]=,:[#6]-,:[#7]-,:[#6]-,:[#7]', 0), 501: ('[#8]=,:[#7]-,:[#6]:[#6]-,:[#7]', 0), 502: ('[#8]=,:[#7]-,:c:c-,:[#8]', 0), 503: ('[#8]=,:[#6]-,:[#7]-,:[#6]=,:[#8]', 0), 504: ('[#8]-,:[#6]:[#6]:[#6]-,:[#6]', 0), 505: ('[#8]-,:[#6]:[#6]:[#6]-,:[#7]', 0), 506: ('[#8]-,:[#6]:[#6]:[#6]-,:[#8]', 0), 507: ('[#7]-,:[#6]-,:[#7]-,:[#6]-,:[#6]', 0), 508: ('[#8]-,:[#6]-,:[#6]-,:[#6]:c', 0), 509: ('[#6]-,:[#6]-,:[#7]-,:[#6]-,:[#6]', 0), 510: ('[#6]-,:[#7]-,:[#6]:[#6]-,:[#6]', 0), 511: ('[#6]-,:[#6]-,:[#16]-,:[#6]-,:[#6]', 0), 512: ('[#8]-,:[#6]-,:[#6]-,:[#7]-,:[#6]', 0), 513: ('[#6]-,:[#6]=,:[#6]-,:[#6]-,:[#6]', 0), 514: ('[#8]-,:[#6]-,:[#8]-,:[#6]-,:[#6]', 0), 515: ('[#8]-,:[#6]-,:[#6]-,:[#8]-,:[#6]', 0), 516: ('[#8]-,:[#6]-,:[#6]-,:[#8&!H0]', 0), 517: ('[#6]-,:[#6]=,:[#6]-,:[#6]=,:[#6]', 0), 518: ('[#7]-,:[#6]:[#6]-,:[#6]-,:[#6]', 0), 519: ('[#6]=,:[#6]-,:[#6]-,:[#8]-,:[#6]', 0), 520: ('[#6]=,:[#6]-,:[#6]-,:[#8&!H0]', 0), 521: ('[#6]-,:[#6]:[#6]-,:[#6]-,:[#6]', 0), 522: ('[Cl]-,:[#6]:[#6]-,:[#6]=,:[#8]', 0), 523: ('[Br]-,:[#6]:c:c-,:[#6]', 0), 524: ('[#8]=,:[#6]-,:[#6]=,:[#6]-,:[#6]', 0), 525: ('[#8]=,:[#6]-,:[#6]=,:[#6&!H0]', 0), 526: ('[#8]=,:[#6]-,:[#6]=,:[#6]-,:[#7]', 0), 527: ('[#7]-,:[#6]-,:[#7]-,:[#6]:c', 0), 528: ('[Br]-,:[#6]-,:[#6]-,:[#6]:c', 0), 529: ('[#7]#[#6]-,:[#6]-,:[#6]-,:[#6]', 0), 530: ('[#6]-,:[#6]=,:[#6]-,:[#6]:cc1ccc([#6])cc1', 0), 567: ('[#6]c1ccc([#8])cc1', 0), 568: ('[#6]c1ccc([#16])cc1', 0), 569: ('[#6]c1ccc([#7])cc1', 0), 570: ('[#6]c1ccc(Cl)cc1', 0), 571: ('[#6]c1ccc(Br)cc1', 0), 572: ('[#8]c1ccc([#8])cc1', 0), 573: ('[#8]c1ccc([#16])cc1', 0), 574: ('[#8]c1ccc([#7])cc1', 0), 575: ('[#8]c1ccc(Cl)cc1', 0), 576: ('[#8]c1ccc(Br)cc1', 0), 577: ('[#16]c1ccc([#16])cc1', 0), 578: ('[#16]c1ccc([#7])cc1', 0), 579: ('[#16]c1ccc(Cl)cc1', 0), 580: ('[#16]c1ccc(Br)cc1', 0), 581: ('[#7]c1ccc([#7])cc1', 0), 582: ('[#7]c1ccc(Cl)cc1', 0), 583: ('[#7]c1ccc(Br)cc1', 0), 584: ('Clc1ccc(Cl)cc1', 0), 585: ('Clc1ccc(Br)cc1', 0), 586: ('Brc1ccc(Br)cc1', 0), 587: ('[#6]c1cc([#6])ccc1', 0), 588: ('[#6]c1cc([#8])ccc1', 0), 589: ('[#6]c1cc([#16])ccc1', 0), 590: ('[#6]c1cc([#7])ccc1', 0), 591: ('[#6]c1cc(Cl)ccc1', 0), 592: ('[#6]c1cc(Br)ccc1', 0), 593: ('[#8]c1cc([#8])ccc1', 0), 594: ('[#8]c1cc([#16])ccc1', 0), 595: ('[#8]c1cc([#7])ccc1', 0), 596: ('[#8]c1cc(Cl)ccc1', 0), 597: ('[#8]c1cc(Br)ccc1', 0), 598: ('[#16]c1cc([#16])ccc1', 0), 599: ('[#16]c1cc([#7])ccc1', 0), 600: ('[#16]c1cc(Cl)ccc1', 0), 601: ('[#16]c1cc(Br)ccc1', 0), 602: ('[#7]c1cc([#7])ccc1', 0), 603: ('[#7]c1cc(Cl)ccc1', 0), 604: ('[#7]c1cc(Br)ccc1', 0), 605: ('Clc1cc(Cl)ccc1', 0), 606: ('Clc1cc(Br)ccc1', 0), 607: ('Brc1cc(Br)ccc1', 0), 608: ('[#6]c1c([#6])cccc1', 0), 609: ('[#6]c1c([#8])cccc1', 0), 610: ('[#6]c1c([#16])cccc1', 0), 611: ('[#6]c1c([#7])cccc1', 0), 612: ('[#6]c1c(Cl)cccc1', 0), 613: ('[#6]c1c(Br)cccc1', 0), 614: ('[#8]c1c([#8])cccc1', 0), 615: ('[#8]c1c([#16])cccc1', 0), 616: ('[#8]c1c([#7])cccc1', 0), 617: ('[#8]c1c(Cl)cccc1', 0), 618: ('[#8]c1c(Br)cccc1', 0), 619: ('[#16]c1c([#16])cccc1', 0), 620: ('[#16]c1c([#7])cccc1', 0), 621: ('[#16]c1c(Cl)cccc1', 0), 622: ('[#16]c1c(Br)cccc1', 0), 623: ('[#7]c1c([#7])cccc1', 0), 624: ('[#7]c1c(Cl)cccc1', 0), 625: ('[#7]c1c(Br)cccc1', 0), 626: ('Clc1c(Cl)cccc1', 0), 627: ('Clc1c(Br)cccc1', 0), 628: ('Brc1c(Br)cccc1', 0), 629: ('[#6][#6]1[#6][#6][#6]([#6])[#6][#6]1', 0), 630: ('[#6][#6]1[#6][#6][#6]([#8])[#6][#6]1', 0), 631: ('[#6][#6]1[#6][#6][#6]([#16])[#6][#6]1', 0), 632: ('[#6][#6]1[#6][#6][#6]([#7])[#6][#6]1', 0), 633: ('[#6][#6]1[#6][#6][#6](Cl)[#6][#6]1', 0), 634: ('[#6][#6]1[#6][#6][#6](Br)[#6][#6]1', 0), 635: ('[#8][#6]1[#6][#6][#6]([#8])[#6][#6]1', 0), 636: ('[#8][#6]1[#6][#6][#6]([#16])[#6][#6]1', 0), 637: ('[#8][#6]1[#6][#6][#6]([#7])[#6][#6]1', 0), 638: ('[#8][#6]1[#6][#6][#6](Cl)[#6][#6]1', 0), 639: ('[#8][#6]1[#6][#6][#6](Br)[#6][#6]1', 0), 640: ('[#16][#6]1[#6][#6][#6]([#16])[#6][#6]1', 0), 641: ('[#16][#6]1[#6][#6][#6]([#7])[#6][#6]1', 0), 642: ('[#16][#6]1[#6][#6][#6](Cl)[#6][#6]1', 0), 643: ('[#16][#6]1[#6][#6][#6](Br)[#6][#6]1', 0), 644: ('[#7][#6]1[#6][#6][#6]([#7])[#6][#6]1', 0), 645: ('[#7][#6]1[#6][#6][#6](Cl)[#6][#6]1', 0), 646: ('[#7][#6]1[#6][#6][#6](Br)[#6][#6]1', 0), 647: ('Cl[#6]1[#6][#6][#6](Cl)[#6][#6]1', 0), 648: ('Cl[#6]1[#6][#6][#6](Br)[#6][#6]1', 0), 649: ('Br[#6]1[#6][#6][#6](Br)[#6][#6]1', 0), 650: ('[#6][#6]1[#6][#6]([#6])[#6][#6][#6]1', 0), 651: ('[#6][#6]1[#6][#6]([#8])[#6][#6][#6]1', 0), 652: ('[#6][#6]1[#6][#6]([#16])[#6][#6][#6]1', 0), 653: ('[#6][#6]1[#6][#6]([#7])[#6][#6][#6]1', 0), 654: ('[#6][#6]1[#6][#6](Cl)[#6][#6][#6]1', 0), 655: ('[#6][#6]1[#6][#6](Br)[#6][#6][#6]1', 0), 656: ('[#8][#6]1[#6][#6]([#8])[#6][#6][#6]1', 0), 657: ('[#8][#6]1[#6][#6]([#16])[#6][#6][#6]1', 0), 658: ('[#8][#6]1[#6][#6]([#7])[#6][#6][#6]1', 0), 659: ('[#8][#6]1[#6][#6](Cl)[#6][#6][#6]1', 0), 660: ('[#8][#6]1[#6][#6](Br)[#6][#6][#6]1', 0), 661: ('[#16][#6]1[#6][#6]([#16])[#6][#6][#6]1', 0), 662: ('[#16][#6]1[#6][#6]([#7])[#6][#6][#6]1', 0), 663: ('[#16][#6]1[#6][#6](Cl)[#6][#6][#6]1', 0), 664: ('[#16][#6]1[#6][#6](Br)[#6][#6][#6]1', 0), 665: ('[#7][#6]1[#6][#6]([#7])[#6][#6][#6]1', 0), 666: ('[#7][#6]1[#6][#6](Cl)[#6][#6][#6]1', 0), 667: ('[#7][#6]1[#6][#6](Br)[#6][#6][#6]1', 0), 668: ('Cl[#6]1[#6][#6](Cl)[#6][#6][#6]1', 0), 669: ('Cl[#6]1[#6][#6](Br)[#6][#6][#6]1', 0), 670: ('Br[#6]1[#6][#6](Br)[#6][#6][#6]1', 0), 671: ('[#6][#6]1[#6]([#6])[#6][#6][#6][#6]1', 0), 672: ('[#6][#6]1[#6]([#8])[#6][#6][#6][#6]1', 0), 673: ('[#6][#6]1[#6]([#16])[#6][#6][#6][#6]1', 0), 674: ('[#6][#6]1[#6]([#7])[#6][#6][#6][#6]1', 0), 675: ('[#6][#6]1[#6](Cl)[#6][#6][#6][#6]1', 0), 676: ('[#6][#6]1[#6](Br)[#6][#6][#6][#6]1', 0), 677: ('[#8][#6]1[#6]([#8])[#6][#6][#6][#6]1', 0), 678: ('[#8][#6]1[#6]([#16])[#6][#6][#6][#6]1', 0), 679: ('[#8][#6]1[#6]([#7])[#6][#6][#6][#6]1', 0), 680: ('[#8][#6]1[#6](Cl)[#6][#6][#6][#6]1', 0), 681: ('[#8][#6]1[#6](Br)[#6][#6][#6][#6]1', 0), 682: ('[#16][#6]1[#6]([#16])[#6][#6][#6][#6]1', 0), 683: ('[#16][#6]1[#6]([#7])[#6][#6][#6][#6]1', 0), 684: ('[#16][#6]1[#6](Cl)[#6][#6][#6][#6]1', 0), 685: ('[#16][#6]1[#6](Br)[#6][#6][#6][#6]1', 0), 686: ('[#7][#6]1[#6]([#7])[#6][#6][#6][#6]1', 0), 687: ('[#7][#6]1[#6](Cl)[#6][#6][#6][#6]1', 0), 688: ('[#7][#6]1[#6](Br)[#6][#6][#6][#6]1', 0), 689: ('Cl[#6]1[#6](Cl)[#6][#6][#6][#6]1', 0), 690: ('Cl[#6]1[#6](Br)[#6][#6][#6][#6]1', 0), 691: ('Br[#6]1[#6](Br)[#6][#6][#6][#6]1', 0), 692: ('[#6][#6]1[#6][#6]([#6])[#6][#6]1', 0), 693: ('[#6][#6]1[#6][#6]([#8])[#6][#6]1', 0), 694: ('[#6][#6]1[#6][#6]([#16])[#6][#6]1', 0), 695: ('[#6][#6]1[#6][#6]([#7])[#6][#6]1', 0), 696: ('[#6][#6]1[#6][#6](Cl)[#6][#6]1', 0), 697: ('[#6][#6]1[#6][#6](Br)[#6][#6]1', 0), 698: ('[#8][#6]1[#6][#6]([#8])[#6][#6]1', 0), 699: ('[#8][#6]1[#6][#6]([#16])[#6][#6]1', 0), 700: ('[#8][#6]1[#6][#6]([#7])[#6][#6]1', 0), 701: ('[#8][#6]1[#6][#6](Cl)[#6][#6]1', 0), 702: ('[#8][#6]1[#6][#6](Br)[#6][#6]1', 0), 703: ('[#16][#6]1[#6][#6]([#16])[#6][#6]1', 0), 704: ('[#16][#6]1[#6][#6]([#7])[#6][#6]1', 0), 705: ('[#16][#6]1[#6][#6](Cl)[#6][#6]1', 0), 706: ('[#16][#6]1[#6][#6](Br)[#6][#6]1', 0), 707: ('[#7][#6]1[#6][#6]([#7])[#6][#6]1', 0), 708: ('[#7][#6]1[#6][#6](Cl)[#6][#6]1', 0), 709: ('[#7][#6]1[#6][#6](Br)[#6][#6]1', 0), 710: ('Cl[#6]1[#6][#6](Cl)[#6][#6]1', 0), 711: ('Cl[#6]1[#6][#6](Br)[#6][#6]1', 0), 712: ('Br[#6]1[#6][#6](Br)[#6][#6]1', 0), 713: ('[#6][#6]1[#6]([#6])[#6][#6][#6]1', 0), 714: ('[#6][#6]1[#6]([#8])[#6][#6][#6]1', 0), 715: ('[#6][#6]1[#6]([#16])[#6][#6][#6]1', 0), 716: ('[#6][#6]1[#6]([#7])[#6][#6][#6]1', 0), 717: ('[#6][#6]1[#6](Cl)[#6][#6][#6]1', 0), 718: ('[#6][#6]1[#6](Br)[#6][#6][#6]1', 0), 719: ('[#8][#6]1[#6]([#8])[#6][#6][#6]1', 0), 720: ('[#8][#6]1[#6]([#16])[#6][#6][#6]1', 0), 721: ('[#8][#6]1[#6]([#7])[#6][#6][#6]1', 0), 722: ('[#8][#6]1[#6](Cl)[#6][#6][#6]1', 0), 723: ('[#8][#6]1[#6](Br)[#6][#6][#6]1', 0), 724: ('[#16][#6]1[#6]([#16])[#6][#6][#6]1', 0), 725: ('[#16][#6]1[#6]([#7])[#6][#6][#6]1', 0), 726: ('[#16][#6]1[#6](Cl)[#6][#6][#6]1', 0), 727: ('[#16][#6]1[#6](Br)[#6][#6][#6]1', 0), 728: ('[#7][#6]1[#6]([#7])[#6][#6][#6]1', 0), 729: ('[#7][#6]1[#6](Cl)[#6][#6]1', 0), 730: ('[#7][#6]1[#6](Br)[#6][#6][#6]1', 0), 731: ('Cl[#6]1[#6](Cl)[#6][#6][#6]1', 0), 732: ('Cl[#6]1[#6](Br)[#6][#6][#6]1', 0), 733: ('Br[#6]1[#6](Br)[#6][#6][#6]1', 0)} PubchemKeys = None def InitKeys(keyList, keyDict): """ *Internal Use Only* generates SMARTS patterns for the keys, run once """ assert len(keyList) == len(keyDict.keys()), 'length mismatch' for key in keyDict.keys(): patt, count = keyDict[key] if patt != '?': sma = Chem.MolFromSmarts(patt) if not sma: print('SMARTS parser error for key #%d: %s' % (key, patt)) else: keyList[key - 1] = sma, count def calcPubChemFingerPart1(mol, **kwargs): """ Calculate PubChem Fingerprints (1-115; 263-881) **Arguments** - mol: the molecule to be fingerprinted - any extra keyword arguments are ignored **Returns** a _DataStructs.SparseBitVect_ containing the fingerprint. >>> m = Chem.MolFromSmiles('CNO') >>> bv = PubChemFingerPart1(m) >>> tuple(bv.GetOnBits()) (24, 68, 69, 71, 93, 94, 102, 124, 131, 139, 151, 158, 160, 161, 164) >>> bv = PubChemFingerPart1(Chem.MolFromSmiles('CCC')) >>> tuple(bv.GetOnBits()) (74, 114, 149, 155, 160) """ global PubchemKeys if PubchemKeys is None: PubchemKeys = [(None, 0)] * len(smartsPatts.keys()) InitKeys(PubchemKeys, smartsPatts) ctor = kwargs.get('ctor', DataStructs.SparseBitVect) res = ctor(len(PubchemKeys) + 1) for i, (patt, count) in enumerate(PubchemKeys): if patt is not None: if count == 0: res[i + 1] = mol.HasSubstructMatch(patt) else: matches = mol.GetSubstructMatches(patt) if len(matches) > count: res[i + 1] = 1 return res def func_1(mol, bits): """ *Internal Use Only* Calculate PubChem Fingerprints (116-263) """ ringSize = [] temp = {3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0} AllRingsAtom = mol.GetRingInfo().AtomRings() for ring in AllRingsAtom: ringSize.append(len(ring)) for k, v in temp.items(): if len(ring) == k: temp[k] += 1 if temp[3] >= 2: bits[0] = 1 bits[7] = 1 elif temp[3] == 1: bits[0] = 1 else: pass if temp[4] >= 2: bits[14] = 1 bits[21] = 1 elif temp[4] == 1: bits[14] = 1 else: pass if temp[5] >= 5: bits[28] = 1 bits[35] = 1 bits[42] = 1 bits[49] = 1 bits[56] = 1 elif temp[5] == 4: bits[28] = 1 bits[35] = 1 bits[42] = 1 bits[49] = 1 elif temp[5] == 3: bits[28] = 1 bits[35] = 1 bits[42] = 1 elif temp[5] == 2: bits[28] = 1 bits[35] = 1 elif temp[5] == 1: bits[28] = 1 else: pass if temp[6] >= 5: bits[63] = 1 bits[70] = 1 bits[77] = 1 bits[84] = 1 bits[91] = 1 elif temp[6] == 4: bits[63] = 1 bits[70] = 1 bits[77] = 1 bits[84] = 1 elif temp[6] == 3: bits[63] = 1 bits[70] = 1 bits[77] = 1 elif temp[6] == 2: bits[63] = 1 bits[70] = 1 elif temp[6] == 1: bits[63] = 1 else: pass if temp[7] >= 2: bits[98] = 1 bits[105] = 1 elif temp[7] == 1: bits[98] = 1 else: pass if temp[8] >= 2: bits[112] = 1 bits[119] = 1 elif temp[8] == 1: bits[112] = 1 else: pass if temp[9] >= 1: bits[126] = 1 else: pass if temp[10] >= 1: bits[133] = 1 else: pass return ringSize, bits def func_2(mol, bits): """ *Internal Use Only* saturated or aromatic carbon-only ring """ AllRingsBond = mol.GetRingInfo().BondRings() ringSize = [] temp = {3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0} for ring in AllRingsBond: ######### saturated nonsingle = False for bondIdx in ring: if mol.GetBondWithIdx(bondIdx).GetBondType().name != 'SINGLE': nonsingle = True break if nonsingle == False: ringSize.append(len(ring)) for k, v in temp.items(): if len(ring) == k: temp[k] += 1 ######## aromatic carbon-only aromatic = True AllCarb = True for bondIdx in ring: if mol.GetBondWithIdx(bondIdx).GetBondType().name != 'AROMATIC': aromatic = False break for bondIdx in ring: BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom() EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom() if BeginAtom.GetAtomicNum() != 6 or EndAtom.GetAtomicNum() != 6: AllCarb = False break if aromatic == True and AllCarb == True: ringSize.append(len(ring)) for k, v in temp.items(): if len(ring) == k: temp[k] += 1 if temp[3] >= 2: bits[1] = 1 bits[8] = 1 elif temp[3] == 1: bits[1] = 1 else: pass if temp[4] >= 2: bits[15] = 1 bits[22] = 1 elif temp[4] == 1: bits[15] = 1 else: pass if temp[5] >= 5: bits[29] = 1 bits[36] = 1 bits[43] = 1 bits[50] = 1 bits[57] = 1 elif temp[5] == 4: bits[29] = 1 bits[36] = 1 bits[43] = 1 bits[50] = 1 elif temp[5] == 3: bits[29] = 1 bits[36] = 1 bits[43] = 1 elif temp[5] == 2: bits[29] = 1 bits[36] = 1 elif temp[5] == 1: bits[29] = 1 else: pass if temp[6] >= 5: bits[64] = 1 bits[71] = 1 bits[78] = 1 bits[85] = 1 bits[92] = 1 elif temp[6] == 4: bits[64] = 1 bits[71] = 1 bits[78] = 1 bits[85] = 1 elif temp[6] == 3: bits[64] = 1 bits[71] = 1 bits[78] = 1 elif temp[6] == 2: bits[64] = 1 bits[71] = 1 elif temp[6] == 1: bits[64] = 1 else: pass if temp[7] >= 2: bits[99] = 1 bits[106] = 1 elif temp[7] == 1: bits[99] = 1 else: pass if temp[8] >= 2: bits[113] = 1 bits[120] = 1 elif temp[8] == 1: bits[113] = 1 else: pass if temp[9] >= 1: bits[127] = 1 else: pass if temp[10] >= 1: bits[134] = 1 else: pass return ringSize, bits def func_3(mol, bits): """ *Internal Use Only* saturated or aromatic nitrogen-containing """ AllRingsBond = mol.GetRingInfo().BondRings() ringSize = [] temp = {3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0} for ring in AllRingsBond: ######### saturated nonsingle = False for bondIdx in ring: if mol.GetBondWithIdx(bondIdx).GetBondType().name != 'SINGLE': nonsingle = True break if nonsingle == False: ringSize.append(len(ring)) for k, v in temp.items(): if len(ring) == k: temp[k] += 1 ######## aromatic nitrogen-containing aromatic = True ContainNitro = False for bondIdx in ring: if mol.GetBondWithIdx(bondIdx).GetBondType().name != 'AROMATIC': aromatic = False break for bondIdx in ring: BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom() EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom() if BeginAtom.GetAtomicNum() == 7 or EndAtom.GetAtomicNum() == 7: ContainNitro = True break if aromatic == True and ContainNitro == True: ringSize.append(len(ring)) for k, v in temp.items(): if len(ring) == k: temp[k] += 1 if temp[3] >= 2: bits[2] = 1 bits[9] = 1 elif temp[3] == 1: bits[2] = 1 else: pass if temp[4] >= 2: bits[16] = 1 bits[23] = 1 elif temp[4] == 1: bits[16] = 1 else: pass if temp[5] >= 5: bits[30] = 1 bits[37] = 1 bits[44] = 1 bits[51] = 1 bits[58] = 1 elif temp[5] == 4: bits[30] = 1 bits[37] = 1 bits[44] = 1 bits[51] = 1 elif temp[5] == 3: bits[30] = 1 bits[37] = 1 bits[44] = 1 elif temp[5] == 2: bits[30] = 1 bits[37] = 1 elif temp[5] == 1: bits[30] = 1 else: pass if temp[6] >= 5: bits[65] = 1 bits[72] = 1 bits[79] = 1 bits[86] = 1 bits[93] = 1 elif temp[6] == 4: bits[65] = 1 bits[72] = 1 bits[79] = 1 bits[86] = 1 elif temp[6] == 3: bits[65] = 1 bits[72] = 1 bits[79] = 1 elif temp[6] == 2: bits[65] = 1 bits[72] = 1 elif temp[6] == 1: bits[65] = 1 else: pass if temp[7] >= 2: bits[100] = 1 bits[107] = 1 elif temp[7] == 1: bits[100] = 1 else: pass if temp[8] >= 2: bits[114] = 1 bits[121] = 1 elif temp[8] == 1: bits[114] = 1 else: pass if temp[9] >= 1: bits[128] = 1 else: pass if temp[10] >= 1: bits[135] = 1 else: pass return ringSize, bits def func_4(mol, bits): """ *Internal Use Only* saturated or aromatic heteroatom-containing """ AllRingsBond = mol.GetRingInfo().BondRings() ringSize = [] temp = {3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0} for ring in AllRingsBond: ######### saturated nonsingle = False for bondIdx in ring: if mol.GetBondWithIdx(bondIdx).GetBondType().name != 'SINGLE': nonsingle = True break if nonsingle == False: ringSize.append(len(ring)) for k, v in temp.items(): if len(ring) == k: temp[k] += 1 ######## aromatic heteroatom-containing aromatic = True heteroatom = False for bondIdx in ring: if mol.GetBondWithIdx(bondIdx).GetBondType().name != 'AROMATIC': aromatic = False break for bondIdx in ring: BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom() EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom() if BeginAtom.GetAtomicNum() not in [1, 6] or EndAtom.GetAtomicNum() not in [1, 6]: heteroatom = True break if aromatic == True and heteroatom == True: ringSize.append(len(ring)) for k, v in temp.items(): if len(ring) == k: temp[k] += 1 if temp[3] >= 2: bits[3] = 1 bits[10] = 1 elif temp[3] == 1: bits[3] = 1 else: pass if temp[4] >= 2: bits[17] = 1 bits[24] = 1 elif temp[4] == 1: bits[17] = 1 else: pass if temp[5] >= 5: bits[31] = 1 bits[38] = 1 bits[45] = 1 bits[52] = 1 bits[59] = 1 elif temp[5] == 4: bits[31] = 1 bits[38] = 1 bits[45] = 1 bits[52] = 1 elif temp[5] == 3: bits[31] = 1 bits[38] = 1 bits[45] = 1 elif temp[5] == 2: bits[31] = 1 bits[38] = 1 elif temp[5] == 1: bits[31] = 1 else: pass if temp[6] >= 5: bits[66] = 1 bits[73] = 1 bits[80] = 1 bits[87] = 1 bits[94] = 1 elif temp[6] == 4: bits[66] = 1 bits[73] = 1 bits[80] = 1 bits[87] = 1 elif temp[6] == 3: bits[66] = 1 bits[73] = 1 bits[80] = 1 elif temp[6] == 2: bits[66] = 1 bits[73] = 1 elif temp[6] == 1: bits[66] = 1 else: pass if temp[7] >= 2: bits[101] = 1 bits[108] = 1 elif temp[7] == 1: bits[101] = 1 else: pass if temp[8] >= 2: bits[115] = 1 bits[122] = 1 elif temp[8] == 1: bits[115] = 1 else: pass if temp[9] >= 1: bits[129] = 1 else: pass if temp[10] >= 1: bits[136] = 1 else: pass return ringSize, bits def func_5(mol, bits): """ *Internal Use Only* unsaturated non-aromatic carbon-only """ ringSize = [] AllRingsBond = mol.GetRingInfo().BondRings() temp = {3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0} for ring in AllRingsBond: unsaturated = False nonaromatic = True Allcarb = True ######### unsaturated for bondIdx in ring: if mol.GetBondWithIdx(bondIdx).GetBondType().name != 'SINGLE': unsaturated = True break ######## non-aromatic for bondIdx in ring: if mol.GetBondWithIdx(bondIdx).GetBondType().name == 'AROMATIC': nonaromatic = False break ######## allcarb for bondIdx in ring: BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom() EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom() if BeginAtom.GetAtomicNum() != 6 or EndAtom.GetAtomicNum() != 6: Allcarb = False break if unsaturated == True and nonaromatic == True and Allcarb == True: ringSize.append(len(ring)) for k, v in temp.items(): if len(ring) == k: temp[k] += 1 if temp[3] >= 2: bits[4] = 1 bits[11] = 1 elif temp[3] == 1: bits[4] = 1 else: pass if temp[4] >= 2: bits[18] = 1 bits[25] = 1 elif temp[4] == 1: bits[18] = 1 else: pass if temp[5] >= 5: bits[32] = 1 bits[39] = 1 bits[46] = 1 bits[53] = 1 bits[60] = 1 elif temp[5] == 4: bits[32] = 1 bits[39] = 1 bits[46] = 1 bits[53] = 1 elif temp[5] == 3: bits[32] = 1 bits[39] = 1 bits[46] = 1 elif temp[5] == 2: bits[32] = 1 bits[39] = 1 elif temp[5] == 1: bits[32] = 1 else: pass if temp[6] >= 5: bits[67] = 1 bits[74] = 1 bits[81] = 1 bits[88] = 1 bits[95] = 1 elif temp[6] == 4: bits[67] = 1 bits[74] = 1 bits[81] = 1 bits[88] = 1 elif temp[6] == 3: bits[67] = 1 bits[74] = 1 bits[81] = 1 elif temp[6] == 2: bits[67] = 1 bits[74] = 1 elif temp[6] == 1: bits[67] = 1 else: pass if temp[7] >= 2: bits[102] = 1 bits[109] = 1 elif temp[7] == 1: bits[102] = 1 else: pass if temp[8] >= 2: bits[116] = 1 bits[123] = 1 elif temp[8] == 1: bits[116] = 1 else: pass if temp[9] >= 1: bits[130] = 1 else: pass if temp[10] >= 1: bits[137] = 1 else: pass return ringSize, bits def func_6(mol, bits): """ *Internal Use Only* unsaturated non-aromatic nitrogen-containing """ ringSize = [] AllRingsBond = mol.GetRingInfo().BondRings() temp = {3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0} for ring in AllRingsBond: unsaturated = False nonaromatic = True ContainNitro = False ######### unsaturated for bondIdx in ring: if mol.GetBondWithIdx(bondIdx).GetBondType().name != 'SINGLE': unsaturated = True break ######## non-aromatic for bondIdx in ring: if mol.GetBondWithIdx(bondIdx).GetBondType().name == 'AROMATIC': nonaromatic = False break ######## nitrogen-containing for bondIdx in ring: BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom() EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom() if BeginAtom.GetAtomicNum() == 7 or EndAtom.GetAtomicNum() == 7: ContainNitro = True break if unsaturated == True and nonaromatic == True and ContainNitro == True: ringSize.append(len(ring)) for k, v in temp.items(): if len(ring) == k: temp[k] += 1 if temp[3] >= 2: bits[5] = 1 bits[12] = 1 elif temp[3] == 1: bits[5] = 1 else: pass if temp[4] >= 2: bits[19] = 1 bits[26] = 1 elif temp[4] == 1: bits[19] = 1 else: pass if temp[5] >= 5: bits[33] = 1 bits[40] = 1 bits[47] = 1 bits[54] = 1 bits[61] = 1 elif temp[5] == 4: bits[33] = 1 bits[40] = 1 bits[47] = 1 bits[54] = 1 elif temp[5] == 3: bits[33] = 1 bits[40] = 1 bits[47] = 1 elif temp[5] == 2: bits[33] = 1 bits[40] = 1 elif temp[5] == 1: bits[33] = 1 else: pass if temp[6] >= 5: bits[68] = 1 bits[75] = 1 bits[82] = 1 bits[89] = 1 bits[96] = 1 elif temp[6] == 4: bits[68] = 1 bits[75] = 1 bits[82] = 1 bits[89] = 1 elif temp[6] == 3: bits[68] = 1 bits[75] = 1 bits[82] = 1 elif temp[6] == 2: bits[68] = 1 bits[75] = 1 elif temp[6] == 1: bits[68] = 1 else: pass if temp[7] >= 2: bits[103] = 1 bits[110] = 1 elif temp[7] == 1: bits[103] = 1 else: pass if temp[8] >= 2: bits[117] = 1 bits[124] = 1 elif temp[8] == 1: bits[117] = 1 else: pass if temp[9] >= 1: bits[131] = 1 else: pass if temp[10] >= 1: bits[138] = 1 else: pass return ringSize, bits def func_7(mol, bits): """ *Internal Use Only* unsaturated non-aromatic heteroatom-containing """ ringSize = [] AllRingsBond = mol.GetRingInfo().BondRings() temp = {3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0} for ring in AllRingsBond: unsaturated = False nonaromatic = True heteroatom = False ######### unsaturated for bondIdx in ring: if mol.GetBondWithIdx(bondIdx).GetBondType().name != 'SINGLE': unsaturated = True break ######## non-aromatic for bondIdx in ring: if mol.GetBondWithIdx(bondIdx).GetBondType().name == 'AROMATIC': nonaromatic = False break ######## heteroatom-containing for bondIdx in ring: BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom() EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom() if BeginAtom.GetAtomicNum() not in [1, 6] or EndAtom.GetAtomicNum() not in [1, 6]: heteroatom = True break if unsaturated == True and nonaromatic == True and heteroatom == True: ringSize.append(len(ring)) for k, v in temp.items(): if len(ring) == k: temp[k] += 1 if temp[3] >= 2: bits[6] = 1 bits[13] = 1 elif temp[3] == 1: bits[6] = 1 else: pass if temp[4] >= 2: bits[20] = 1 bits[27] = 1 elif temp[4] == 1: bits[20] = 1 else: pass if temp[5] >= 5: bits[34] = 1 bits[41] = 1 bits[48] = 1 bits[55] = 1 bits[62] = 1 elif temp[5] == 4: bits[34] = 1 bits[41] = 1 bits[48] = 1 bits[55] = 1 elif temp[5] == 3: bits[34] = 1 bits[41] = 1 bits[48] = 1 elif temp[5] == 2: bits[34] = 1 bits[41] = 1 elif temp[5] == 1: bits[34] = 1 else: pass if temp[6] >= 5: bits[69] = 1 bits[76] = 1 bits[83] = 1 bits[90] = 1 bits[97] = 1 elif temp[6] == 4: bits[69] = 1 bits[76] = 1 bits[83] = 1 bits[90] = 1 elif temp[6] == 3: bits[69] = 1 bits[76] = 1 bits[83] = 1 elif temp[6] == 2: bits[69] = 1 bits[76] = 1 elif temp[6] == 1: bits[69] = 1 else: pass if temp[7] >= 2: bits[104] = 1 bits[111] = 1 elif temp[7] == 1: bits[104] = 1 else: pass if temp[8] >= 2: bits[118] = 1 bits[125] = 1 elif temp[8] == 1: bits[118] = 1 else: pass if temp[9] >= 1: bits[132] = 1 else: pass if temp[10] >= 1: bits[139] = 1 else: pass return ringSize, bits def func_8(mol, bits): """ *Internal Use Only* aromatic rings or hetero-aromatic rings """ AllRingsBond = mol.GetRingInfo().BondRings() temp = {'aromatic': 0, 'heteroatom': 0} for ring in AllRingsBond: aromatic = True heteroatom = False for bondIdx in ring: if mol.GetBondWithIdx(bondIdx).GetBondType().name != 'AROMATIC': aromatic = False break if aromatic == True: temp['aromatic'] += 1 for bondIdx in ring: BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom() EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom() if BeginAtom.GetAtomicNum() not in [1, 6] or EndAtom.GetAtomicNum() not in [1, 6]: heteroatom = True break if heteroatom == True: temp['heteroatom'] += 1 if temp['aromatic'] >= 4: bits[140] = 1 bits[142] = 1 bits[144] = 1 bits[146] = 1 elif temp['aromatic'] == 3: bits[140] = 1 bits[142] = 1 bits[144] = 1 elif temp['aromatic'] == 2: bits[140] = 1 bits[142] = 1 elif temp['aromatic'] == 1: bits[140] = 1 else: pass if temp['aromatic'] >= 4 and temp['heteroatom'] >= 4: bits[141] = 1 bits[143] = 1 bits[145] = 1 bits[147] = 1 elif temp['aromatic'] == 3 and temp['heteroatom'] == 3: bits[141] = 1 bits[143] = 1 bits[145] = 1 elif temp['aromatic'] == 2 and temp['heteroatom'] == 2: bits[141] = 1 bits[143] = 1 elif temp['aromatic'] == 1 and temp['heteroatom'] == 1: bits[141] = 1 else: pass return bits def calcPubChemFingerPart2(mol): # 116-263 """ *Internal Use Only* Calculate PubChem Fingerprints (116-263) """ bits = [0] * 148 bits = func_1(mol, bits)[1] bits = func_2(mol, bits)[1] bits = func_3(mol, bits)[1] bits = func_4(mol, bits)[1] bits = func_5(mol, bits)[1] bits = func_6(mol, bits)[1] bits = func_7(mol, bits)[1] bits = func_8(mol, bits) return bits def GetPubChemFPs(mol): """*Internal Use Only* Calculate PubChem Fingerprints """ mol = Chem.AddHs(mol) AllBits = [0] * 881 res1 = list(calcPubChemFingerPart1(mol).ToBitString()) for index, item in enumerate(res1[1:116]): if item == '1': AllBits[index] = 1 for index2, item2 in enumerate(res1[116:734]): if item2 == '1': AllBits[index2 + 115 + 148] = 1 res2 = calcPubChemFingerPart2(mol) for index3, item3 in enumerate(res2): if item3 == 1: AllBits[index3 + 115] = 1 AllBits = np.array(AllBits, dtype=np.bool_) return AllBits # ------------------------------------ file_path = os.path.dirname(__file__) def GetPubChemFPInfos(): return pd.read_excel(os.path.join(file_path, 'pubchemfp.xlsx')) if __name__ == '__main__': print('-' * 10 + 'START' + '-' * 10) SMILES = 'C1=NC2NC3=CNCC3=CC2CC1' mol = Chem.MolFromSmiles(SMILES) mol2 = Chem.AddHs(mol) result = GetPubChemFPs(mol2) print('Molecule: %s' % SMILES) print('-' * 25) print('Results: %s' % result) print('-' * 10 + 'END' + '-' * 10)