Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
""" | |
Created on Sun Aug 25 20:29:36 2019 | |
@author: charleshen | |
@Note: The code are copyed from PyBioMed, with a minor repair | |
https://www.ncbi.nlm.nih.gov/pubmed/29556758 | |
these are SMARTS patterns corresponding to the PubChem fingerprints | |
https://astro.temple.edu/~tua87106/list_fingerprints.pdf | |
ftp://ftp.ncbi.nlm.nih.gov/pubchem/specifications/pubchem_fingerprints.txt | |
""" | |
_type = 'SMARTS-based' | |
import numpy as np | |
from rdkit import Chem | |
from rdkit import DataStructs | |
import os | |
import pandas as pd | |
smartsPatts = { | |
1: ('[H]', 3), # 1-115 | |
2: ('[H]', 7), | |
3: ('[H]', 15), | |
4: ('[H]', 31), | |
5: ('[Li]', 0), | |
6: ('[Li]', 1), | |
7: ('[B]', 0), | |
8: ('[B]', 1), | |
9: ('[B]', 3), | |
10: ('[C]', 1), | |
11: ('[C]', 3), | |
12: ('[C]', 7), | |
13: ('[C]', 15), | |
14: ('[C]', 31), | |
15: ('[N]', 0), | |
16: ('[N]', 1), | |
17: ('[N]', 3), | |
18: ('[N]', 7), | |
19: ('[O]', 0), | |
20: ('[O]', 1), | |
21: ('[O]', 3), | |
22: ('[O]', 7), | |
23: ('[O]', 15), | |
24: ('[F]', 0), | |
25: ('[F]', 1), | |
26: ('[F]', 3), | |
27: ('[Na]', 0), | |
28: ('[Na]', 1), | |
29: ('[Si]', 0), | |
30: ('[Si]', 1), | |
31: ('[P]', 0), | |
32: ('[P]', 1), | |
33: ('[P]', 3), | |
34: ('[S]', 0), | |
35: ('[S]', 1), | |
36: ('[S]', 3), | |
37: ('[S]', 7), | |
38: ('[Cl]', 0), | |
39: ('[Cl]', 1), | |
40: ('[Cl]', 3), | |
41: ('[Cl]', 7), | |
42: ('[K]', 0), | |
43: ('[K]', 1), | |
44: ('[Br]', 0), | |
45: ('[Br]', 1), | |
46: ('[Br]', 3), | |
47: ('[I]', 0), | |
48: ('[I]', 1), | |
49: ('[I]', 3), | |
50: ('[Be]', 0), | |
51: ('[Mg]', 0), | |
52: ('[Al]', 0), | |
53: ('[Ca]', 0), | |
54: ('[Sc]', 0), | |
55: ('[Ti]', 0), | |
56: ('[V]', 0), | |
57: ('[Cr]', 0), | |
58: ('[Mn]', 0), | |
59: ('[Fe]', 0), | |
60: ('[CO]', 0), | |
61: ('[Ni]', 0), | |
62: ('[Cu]', 0), | |
63: ('[Zn]', 0), | |
64: ('[Ga]', 0), | |
65: ('[Ge]', 0), | |
66: ('[As]', 0), | |
67: ('[Se]', 0), | |
68: ('[Kr]', 0), | |
69: ('[Rb]', 0), | |
70: ('[Sr]', 0), | |
71: ('[Y]', 0), | |
72: ('[Zr]', 0), | |
73: ('[Nb]', 0), | |
74: ('[Mo]', 0), | |
75: ('[Ru]', 0), | |
76: ('[Rh]', 0), | |
77: ('[Pd]', 0), | |
78: ('[Ag]', 0), | |
79: ('[Cd]', 0), | |
80: ('[In]', 0), | |
81: ('[Sn]', 0), | |
82: ('[Sb]', 0), | |
83: ('[Te]', 0), | |
84: ('[Xe]', 0), | |
85: ('[Cs]', 0), | |
86: ('[Ba]', 0), | |
87: ('[Lu]', 0), | |
88: ('[Hf]', 0), | |
89: ('[Ta]', 0), | |
90: ('[W]', 0), | |
91: ('[Re]', 0), | |
92: ('[Os]', 0), | |
93: ('[Ir]', 0), | |
94: ('[Pt]', 0), | |
95: ('[Au]', 0), | |
96: ('[Hg]', 0), | |
97: ('[Tl]', 0), | |
98: ('[Pb]', 0), | |
99: ('[Bi]', 0), | |
100: ('[La]', 0), | |
101: ('[Ce]', 0), | |
102: ('[Pr]', 0), | |
103: ('[Nd]', 0), | |
104: ('[Pm]', 0), | |
105: ('[Sm]', 0), | |
106: ('[Eu]', 0), | |
107: ('[Gd]', 0), | |
108: ('[Tb]', 0), | |
109: ('[Dy]', 0), | |
110: ('[Ho]', 0), | |
111: ('[Er]', 0), | |
112: ('[Tm]', 0), | |
113: ('[Yb]', 0), | |
114: ('[Tc]', 0), | |
115: ('[U]', 0), | |
116: ('[Li&!H0]', 0), # 264-881 | |
117: ('[Li]~[Li]', 0), | |
118: ('[Li]~[#5]', 0), | |
119: ('[Li]~[#6]', 0), | |
120: ('[Li]~[#8]', 0), | |
121: ('[Li]~[F]', 0), | |
122: ('[Li]~[#15]', 0), | |
123: ('[Li]~[#16]', 0), | |
124: ('[Li]~[Cl]', 0), | |
125: ('[#5&!H0]', 0), | |
126: ('[#5]~[#5]', 0), | |
127: ('[#5]~[#6]', 0), | |
128: ('[#5]~[#7]', 0), | |
129: ('[#5]~[#8]', 0), | |
130: ('[#5]~[F]', 0), | |
131: ('[#5]~[#14]', 0), | |
132: ('[#5]~[#15]', 0), | |
133: ('[#5]~[#16]', 0), | |
134: ('[#5]~[Cl]', 0), | |
135: ('[#5]~[Br]', 0), | |
136: ('[#6&!H0]', 0), | |
137: ('[#6]~[#6]', 0), | |
138: ('[#6]~[#7]', 0), | |
139: ('[#6]~[#8]', 0), | |
140: ('[#6]~[F]', 0), | |
141: ('[#6]~[Na]', 0), | |
142: ('[#6]~[Mg]', 0), | |
143: ('[#6]~[Al]', 0), | |
144: ('[#6]~[#14]', 0), | |
145: ('[#6]~[#15]', 0), | |
146: ('[#6]~[#16]', 0), | |
147: ('[#6]~[Cl]', 0), | |
148: ('[#6]~[#33]', 0), | |
149: ('[#6]~[#34]', 0), | |
150: ('[#6]~[Br]', 0), | |
151: ('[#6]~[I]', 0), | |
152: ('[#7&!H0]', 0), | |
153: ('[#7]~[#7]', 0), | |
154: ('[#7]~[#8]', 0), | |
155: ('[#7]~[F]', 0), | |
156: ('[#7]~[#14]', 0), | |
157: ('[#7]~[#15]', 0), | |
158: ('[#7]~[#16]', 0), | |
159: ('[#7]~[Cl]', 0), | |
160: ('[#7]~[Br]', 0), | |
161: ('[#8&!H0]', 0), | |
162: ('[#8]~[#8]', 0), | |
163: ('[#8]~[Mg]', 0), | |
164: ('[#8]~[Na]', 0), | |
165: ('[#8]~[Al]', 0), | |
166: ('[#8]~[#14]', 0), | |
167: ('[#8]~[#15]', 0), | |
168: ('[#8]~[K]', 0), | |
169: ('[F]~[#15]', 0), | |
170: ('[F]~[#16]', 0), | |
171: ('[Al&!H0]', 0), | |
172: ('[Al]~[Cl]', 0), | |
173: ('[#14&!H0]', 0), | |
174: ('[#14]~[#14]', 0), | |
175: ('[#14]~[Cl]', 0), | |
176: ('[#15&!H0]', 0), | |
177: ('[#15]~[#15]', 0), | |
178: ('[#33&!H0]', 0), | |
179: ('[#33]~[#33]', 0), | |
180: ('[#6](~Br)(~[#6])', 0), | |
181: ('[#6](~Br)(~[#6])(~[#6])', 0), | |
182: ('[#6&!H0]~[Br]', 0), | |
183: ('[#6](~[Br])(:[c])', 0), | |
184: ('[#6](~[Br])(:[n])', 0), | |
185: ('[#6](~[#6])(~[#6])', 0), | |
186: ('[#6](~[#6])(~[#6])(~[#6])', 0), | |
187: ('[#6](~[#6])(~[#6])(~[#6])(~[#6])', 0), | |
188: ('[#6H1](~[#6])(~[#6])(~[#6])', 0), | |
189: ('[#6](~[#6])(~[#6])(~[#6])(~[#7])', 0), | |
190: ('[#6](~[#6])(~[#6])(~[#6])(~[#8])', 0), | |
191: ('[#6H1](~[#6])(~[#6])(~[#7])', 0), | |
192: ('[#6H1](~[#6])(~[#6])(~[#8])', 0), | |
193: ('[#6](~[#6])(~[#6])(~[#7])', 0), | |
194: ('[#6](~[#6])(~[#6])(~[#8])', 0), | |
195: ('[#6](~[#6])(~[Cl])', 0), | |
196: ('[#6&!H0](~[#6])(~[Cl])', 0), | |
197: ('[#6H,#6H2,#6H3,#6H4]~[#6]', 0), | |
198: ('[#6&!H0](~[#6])(~[#7])', 0), | |
199: ('[#6&!H0](~[#6])(~[#8])', 0), | |
200: ('[#6H1](~[#6])(~[#8])(~[#8])', 0), | |
201: ('[#6&!H0](~[#6])(~[#15])', 0), | |
202: ('[#6&!H0](~[#6])(~[#16])', 0), | |
203: ('[#6](~[#6])(~[I])', 0), | |
204: ('[#6](~[#6])(~[#7])', 0), | |
205: ('[#6](~[#6])(~[#8])', 0), | |
206: ('[#6](~[#6])(~[#16])', 0), | |
207: ('[#6](~[#6])(~[#14])', 0), | |
208: ('[#6](~[#6])(:c)', 0), | |
209: ('[#6](~[#6])(:c)(:c)', 0), | |
210: ('[#6](~[#6])(:c)(:n)', 0), | |
211: ('[#6](~[#6])(:n)', 0), | |
212: ('[#6](~[#6])(:n)(:n)', 0), | |
213: ('[#6](~[Cl])(~[Cl])', 0), | |
214: ('[#6&!H0](~[Cl])', 0), | |
215: ('[#6](~[Cl])(:c)', 0), | |
216: ('[#6](~[F])(~[F])', 0), | |
217: ('[#6](~[F])(:c)', 0), | |
218: ('[#6&!H0](~[#7])', 0), | |
219: ('[#6&!H0](~[#8])', 0), | |
220: ('[#6&!H0](~[#8])(~[#8])', 0), | |
221: ('[#6&!H0](~[#16])', 0), | |
222: ('[#6&!H0](~[#14])', 0), | |
223: ('[#6&!H0]:c', 0), | |
224: ('[#6&!H0](:c)(:c)', 0), | |
225: ('[#6&!H0](:c)(:n)', 0), | |
226: ('[#6&!H0](:n)', 0), | |
227: ('[#6H3]', 0), | |
228: ('[#6](~[#7])(~[#7])', 0), | |
229: ('[#6](~[#7])(:c)', 0), | |
230: ('[#6](~[#7])(:c)(:c)', 0), | |
231: ('[#6](~[#7])(:c)(:n)', 0), | |
232: ('[#6](~[#7])(:n)', 0), | |
233: ('[#6](~[#8])(~[#8])', 0), | |
234: ('[#6](~[#8])(:c)', 0), | |
235: ('[#6](~[#8])(:c)(:c)', 0), | |
236: ('[#6](~[#16])(:c)', 0), | |
237: ('[#6](:c)(:c)', 0), | |
238: ('[#6](:c)(:c)(:c)', 0), | |
239: ('[#6](:c)(:c)(:n)', 0), | |
240: ('[#6](:c)(:n)', 0), | |
241: ('[#6](:c)(:n)(:n)', 0), | |
242: ('[#6](:n)(:n)', 0), | |
243: ('[#7](~[#6])(~[#6])', 0), | |
244: ('[#7](~[#6])(~[#6])(~[#6])', 0), | |
245: ('[#7&!H0](~[#6])(~[#6])', 0), | |
246: ('[#7&!H0](~[#6])', 0), | |
247: ('[#7&!H0](~[#6])(~[#7])', 0), | |
248: ('[#7](~[#6])(~[#8])', 0), | |
249: ('[#7](~[#6])(:c)', 0), | |
250: ('[#7](~[#6])(:c)(:c)', 0), | |
251: ('[#7&!H0](~[#7])', 0), | |
252: ('[#7&!H0](:c)', 0), | |
253: ('[#7&!H0](:c)(:c)', 0), | |
254: ('[#7](~[#8])(~[#8])', 0), | |
255: ('[#7](~[#8])(:o)', 0), | |
256: ('[#7](:c)(:c)', 0), | |
257: ('[#7](:c)(:c)(:c)', 0), | |
258: ('[#8](~[#6])(~[#6])', 0), | |
259: ('[#8&!H0](~[#6])', 0), | |
260: ('[#8](~[#6])(~[#15])', 0), | |
261: ('[#8&!H0](~[#16])', 0), | |
262: ('[#8](:c)(:c)', 0), | |
263: ('[#15](~[#6])(~[#6])', 0), | |
264: ('[#15](~[#8])(~[#8])', 0), | |
265: ('[#16](~[#6])(~[#6])', 0), | |
266: ('[#16&!H0](~[#6])', 0), | |
267: ('[#16](~[#6])(~[#8])', 0), | |
268: ('[#14](~[#6])(~[#6])', 0), | |
269: ('[#6]=,:[#6]', 0), | |
270: ('[#6]#[#6]', 0), | |
271: ('[#6]=,:[#7]', 0), | |
272: ('[#6]#[#7]', 0), | |
273: ('[#6]=,:[#8]', 0), | |
274: ('[#6]=,:[#16]', 0), | |
275: ('[#7]=,:[#7]', 0), | |
276: ('[#7]=,:[#8]', 0), | |
277: ('[#7]=,:[#15]', 0), | |
278: ('[#15]=,:[#8]', 0), | |
279: ('[#15]=,:[#15]', 0), | |
280: ('[#6](#[#6])(-,:[#6])', 0), | |
281: ('[#6&!H0](#[#6])', 0), | |
282: ('[#6](#[#7])(-,:[#6])', 0), | |
283: ('[#6](-,:[#6])(-,:[#6])(=,:[#6])', 0), | |
284: ('[#6](-,:[#6])(-,:[#6])(=,:[#7])', 0), | |
285: ('[#6](-,:[#6])(-,:[#6])(=,:[#8])', 0), | |
286: ('[#6](-,:[#6])([Cl])(=,:[#8])', 0), | |
287: ('[#6&!H0](-,:[#6])(=,:[#6])', 0), | |
288: ('[#6&!H0](-,:[#6])(=,:[#7])', 0), | |
289: ('[#6&!H0](-,:[#6])(=,:[#8])', 0), | |
290: ('[#6](-,:[#6])(-,:[#7])(=,:[#6])', 0), | |
291: ('[#6](-,:[#6])(-,:[#7])(=,:[#7])', 0), | |
292: ('[#6](-,:[#6])(-,:[#7])(=,:[#8])', 0), | |
293: ('[#6](-,:[#6])(-,:[#8])(=,:[#8])', 0), | |
294: ('[#6](-,:[#6])(=,:[#6])', 0), | |
295: ('[#6](-,:[#6])(=,:[#7])', 0), | |
296: ('[#6](-,:[#6])(=,:[#8])', 0), | |
297: ('[#6]([Cl])(=,:[#8])', 0), | |
298: ('[#6&!H0](-,:[#7])(=,:[#6])', 0), | |
299: ('[#6&!H0](=,:[#6])', 0), | |
300: ('[#6&!H0](=,:[#7])', 0), | |
301: ('[#6&!H0](=,:[#8])', 0), | |
302: ('[#6](-,:[#7])(=,:[#6])', 0), | |
303: ('[#6](-,:[#7])(=,:[#7])', 0), | |
304: ('[#6](-,:[#7])(=,:[#8])', 0), | |
305: ('[#6](-,:[#8])(=,:[#8])', 0), | |
306: ('[#7](-,:[#6])(=,:[#6])', 0), | |
307: ('[#7](-,:[#6])(=,:[#8])', 0), | |
308: ('[#7](-,:[#8])(=,:[#8])', 0), | |
309: ('[#15](-,:[#8])(=,:[#8])', 0), | |
310: ('[#16](-,:[#6])(=,:[#8])', 0), | |
311: ('[#16](-,:[#8])(=,:[#8])', 0), | |
312: ('[#16](=,:[#8])(=,:[#8])', 0), | |
313: ('[#6]-,:[#6]-,:[#6]#[#6]', 0), | |
314: ('[#8]-,:[#6]-,:[#6]=,:[#7]', 0), | |
315: ('[#8]-,:[#6]-,:[#6]=,:[#8]', 0), | |
316: ('[#7]:[#6]-,:[#16&!H0]', 0), | |
317: ('[#7]-,:[#6]-,:[#6]=,:[#6]', 0), | |
318: ('[#8]=,:[#16]-,:[#6]-,:[#6]', 0), | |
319: ('[#7]#[#6]-,:[#6]=,:[#6]', 0), | |
320: ('[#6]=,:[#7]-,:[#7]-,:[#6]', 0), | |
321: ('[#8]=,:[#16]-,:[#6]-,:[#7]', 0), | |
322: ('[#16]-,:[#16]-,:[#6]:[#6]', 0), | |
323: ('[#6]:[#6]-,:[#6]=,:[#6]', 0), | |
324: ('[#16]:[#6]:[#6]:[#6]', 0), | |
325: ('[#6]:[#7]:[#6]-,:[#6]', 0), | |
326: ('[#16]-,:[#6]:[#7]:[#6]', 0), | |
327: ('[#16]:[#6]:[#6]:[#7]', 0), | |
328: ('[#16]-,:[#6]=,:[#7]-,:[#6]', 0), | |
329: ('[#6]-,:[#8]-,:[#6]=,:[#6]', 0), | |
330: ('[#7]-,:[#7]-,:[#6]:[#6]', 0), | |
331: ('[#16]-,:[#6]=,:[#7&!H0]', 0), | |
332: ('[#16]-,:[#6]-,:[#16]-,:[#6]', 0), | |
333: ('[#6]:[#16]:[#6]-,:[#6]', 0), | |
334: ('[#8]-,:[#16]-,:[#6]:[#6]', 0), | |
335: ('[#6]:[#7]-,:[#6]:[#6]', 0), | |
336: ('[#7]-,:[#16]-,:[#6]:[#6]', 0), | |
337: ('[#7]-,:[#6]:[#7]:[#6]', 0), | |
338: ('[#7]:[#6]:[#6]:[#7]', 0), | |
339: ('[#7]-,:[#6]:[#7]:[#7]', 0), | |
340: ('[#7]-,:[#6]=,:[#7]-,:[#6]', 0), | |
341: ('[#7]-,:[#6]=,:[#7&!H0]', 0), | |
342: ('[#7]-,:[#6]-,:[#16]-,:[#6]', 0), | |
343: ('[#6]-,:[#6]-,:[#6]=,:[#6]', 0), | |
344: ('[#6]-,:[#7]:[#6&!H0]', 0), | |
345: ('[#7]-,:[#6]:[#8]:[#6]', 0), | |
346: ('[#8]=,:[#6]-,:[#6]:[#6]', 0), | |
347: ('[#8]=,:[#6]-,:[#6]:[#7]', 0), | |
348: ('[#6]-,:[#7]-,:[#6]:[#6]', 0), | |
349: ('[#7]:[#7]-,:[#6&!H0]', 0), | |
350: ('[#8]-,:[#6]:[#6]:[#7]', 0), | |
351: ('[#8]-,:[#6]=,:[#6]-,:[#6]', 0), | |
352: ('[#7]-,:[#6]:[#6]:[#7]', 0), | |
353: ('[#6]-,:[#16]-,:[#6]:[#6]', 0), | |
354: ('[Cl]-,:[#6]:[#6]-,:[#6]', 0), | |
355: ('[#7]-,:[#6]=,:[#6&!H0]', 0), | |
356: ('[Cl]-,:[#6]:[#6&!H0]', 0), | |
357: ('[#7]:[#6]:[#7]-,:[#6]', 0), | |
358: ('[Cl]-,:[#6]:[#6]-,:[#8]', 0), | |
359: ('[#6]-,:[#6]:[#7]:[#6]', 0), | |
360: ('[#6]-,:[#6]-,:[#16]-,:[#6]', 0), | |
361: ('[#16]=,:[#6]-,:[#7]-,:[#6]', 0), | |
362: ('[Br]-,:[#6]:[#6]-,:[#6]', 0), | |
363: ('[#7&!H0]-,:[#7&!H0]', 0), | |
364: ('[#16]=,:[#6]-,:[#7&!H0]', 0), | |
365: ('[#6]-,:[#33]-[#8&!H0]', 0), | |
366: ('[#16]:[#6]:[#6&!H0]', 0), | |
367: ('[#8]-,:[#7]-,:[#6]-,:[#6]', 0), | |
368: ('[#7]-,:[#7]-,:[#6]-,:[#6]', 0), | |
369: ('[#6H,#6H2,#6H3]=,:[#6H,#6H2,#6H3]', 0), | |
370: ('[#7]-,:[#7]-,:[#6]-,:[#7]', 0), | |
371: ('[#8]=,:[#6]-,:[#7]-,:[#7]', 0), | |
372: ('[#7]=,:[#6]-,:[#7]-,:[#6]', 0), | |
373: ('[#6]=,:[#6]-,:[#6]:[#6]', 0), | |
374: ('[#6]:[#7]-,:[#6&!H0]', 0), | |
375: ('[#6]-,:[#7]-,:[#7&!H0]', 0), | |
376: ('[#7]:[#6]:[#6]-,:[#6]', 0), | |
377: ('[#6]-,:[#6]=,:[#6]-,:[#6]', 0), | |
378: ('[#33]-,:[#6]:[#6&!H0]', 0), | |
379: ('[Cl]-,:[#6]:[#6]-,:[Cl]', 0), | |
380: ('[#6]:[#6]:[#7&!H0]', 0), | |
381: ('[#7&!H0]-,:[#6&!H0]', 0), | |
382: ('[Cl]-,:[#6]-,:[#6]-,:[Cl]', 0), | |
383: ('[#7]:[#6]-,:[#6]:[#6]', 0), | |
384: ('[#16]-,:[#6]:[#6]-,:[#6]', 0), | |
385: ('[#16]-,:[#6]:[#6&!H0]', 0), | |
386: ('[#16]-,:[#6]:[#6]-,:[#7]', 0), | |
387: ('[#16]-,:[#6]:[#6]-,:[#8]', 0), | |
388: ('[#8]=,:[#6]-,:[#6]-,:[#6]', 0), | |
389: ('[#8]=,:[#6]-,:[#6]-,:[#7]', 0), | |
390: ('[#8]=,:[#6]-,:[#6]-,:[#8]', 0), | |
391: ('[#7]=,:[#6]-,:[#6]-,:[#6]', 0), | |
392: ('[#7]=,:[#6]-,:[#6&!H0]', 0), | |
393: ('[#6]-,:[#7]-,:[#6&!H0]', 0), | |
394: ('[#8]-,:[#6]:[#6]-,:[#6]', 0), | |
395: ('[#8]-,:[#6]:[#6&!H0]', 0), | |
396: ('[#8]-,:[#6]:[#6]-,:[#7]', 0), | |
397: ('[#8]-,:[#6]:[#6]-,:[#8]', 0), | |
398: ('[#7]-,:[#6]:[#6]-,:[#6]', 0), | |
399: ('[#7]-,:[#6]:[#6&!H0]', 0), | |
400: ('[#7]-,:[#6]:[#6]-,:[#7]', 0), | |
401: ('[#8]-,:[#6]-,:[#6]:[#6]', 0), | |
402: ('[#7]-,:[#6]-,:[#6]:[#6]', 0), | |
403: ('[Cl]-,:[#6]-,:[#6]-,:[#6]', 0), | |
404: ('[Cl]-,:[#6]-,:[#6]-,:[#8]', 0), | |
405: ('[#6]:[#6]-,:[#6]:[#6]', 0), | |
406: ('[#8]=,:[#6]-,:[#6]=,:[#6]', 0), | |
407: ('[Br]-,:[#6]-,:[#6]-,:[#6]', 0), | |
408: ('[#7]=,:[#6]-,:[#6]=,:[#6]', 0), | |
409: ('[#6]=,:[#6]-,:[#6]-,:[#6]', 0), | |
410: ('[#7]:[#6]-,:[#8&!H0]', 0), | |
411: ('[#8]=,:[#7]-,:c:c', 0), | |
412: ('[#8]-,:[#6]-,:[#7&!H0]', 0), | |
413: ('[#7]-,:[#6]-,:[#7]-,:[#6]', 0), | |
414: ('[Cl]-,:[#6]-,:[#6]=,:[#8]', 0), | |
415: ('[Br]-,:[#6]-,:[#6]=,:[#8]', 0), | |
416: ('[#8]-,:[#6]-,:[#8]-,:[#6]', 0), | |
417: ('[#6]=,:[#6]-,:[#6]=,:[#6]', 0), | |
418: ('[#6]:[#6]-,:[#8]-,:[#6]', 0), | |
419: ('[#8]-,:[#6]-,:[#6]-,:[#7]', 0), | |
420: ('[#8]-,:[#6]-,:[#6]-,:[#8]', 0), | |
421: ('N#[#6]-,:[#6]-,:[#6]', 0), | |
422: ('[#7]-,:[#6]-,:[#6]-,:[#7]', 0), | |
423: ('[#6]:[#6]-,:[#6]-,:[#6]', 0), | |
424: ('[#6&!H0]-,:[#8&!H0]', 0), | |
425: ('n:c:n:c', 0), | |
426: ('[#8]-,:[#6]-,:[#6]=,:[#6]', 0), | |
427: ('[#8]-,:[#6]-,:[#6]:[#6]-,:[#6]', 0), | |
428: ('[#8]-,:[#6]-,:[#6]:[#6]-,:[#8]', 0), | |
429: ('[#7]=,:[#6]-,:[#6]:[#6&!H0]', 0), | |
430: ('c:c-,:[#7]-,:c:c', 0), | |
431: ('[#6]-,:[#6]:[#6]-,:c:c', 0), | |
432: ('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0), | |
433: ('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#7]', 0), | |
434: ('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#8]', 0), | |
435: ('[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0), | |
436: ('[Cl]-,:[#6]:[#6]-,:[#8]-,:[#6]', 0), | |
437: ('c:c-,:[#6]=,:[#6]-,:[#6]', 0), | |
438: ('[#6]-,:[#6]:[#6]-,:[#7]-,:[#6]', 0), | |
439: ('[#6]-,:[#16]-,:[#6]-,:[#6]-,:[#6]', 0), | |
440: ('[#7]-,:[#6]:[#6]-,:[#8&!H0]', 0), | |
441: ('[#8]=,:[#6]-,:[#6]-,:[#6]=,:[#8]', 0), | |
442: ('[#6]-,:[#6]:[#6]-,:[#8]-,:[#6]', 0), | |
443: ('[#6]-,:[#6]:[#6]-,:[#8&!H0]', 0), | |
444: ('[Cl]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0), | |
445: ('[#7]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0), | |
446: ('[#7]-,:[#6]-,:[#6]-,:[#6]-,:[#7]', 0), | |
447: ('[#6]-,:[#8]-,:[#6]-,:[#6]=,:[#6]', 0), | |
448: ('c:c-,:[#6]-,:[#6]-,:[#6]', 0), | |
449: ('[#7]=,:[#6]-,:[#7]-,:[#6]-,:[#6]', 0), | |
450: ('[#8]=,:[#6]-,:[#6]-,:c:c', 0), | |
451: ('[Cl]-,:[#6]:[#6]:[#6]-,:[#6]', 0), | |
452: ('[#6H,#6H2,#6H3]-,:[#6]=,:[#6H,#6H2,#6H3]', 0), | |
453: ('[#7]-,:[#6]:[#6]:[#6]-,:[#6]', 0), | |
454: ('[#7]-,:[#6]:[#6]:[#6]-,:[#7]', 0), | |
455: ('[#8]=,:[#6]-,:[#6]-,:[#7]-,:[#6]', 0), | |
456: ('[#6]-,:c:c:[#6]-,:[#6]', 0), | |
457: ('[#6]-,:[#8]-,:[#6]-,:[#6]:c', 0), | |
458: ('[#8]=,:[#6]-,:[#6]-,:[#8]-,:[#6]', 0), | |
459: ('[#8]-,:[#6]:[#6]-,:[#6]-,:[#6]', 0), | |
460: ('[#7]-,:[#6]-,:[#6]-,:[#6]:c', 0), | |
461: ('[#6]-,:[#6]-,:[#6]-,:[#6]:c', 0), | |
462: ('[Cl]-,:[#6]-,:[#6]-,:[#7]-,:[#6]', 0), | |
463: ('[#6]-,:[#8]-,:[#6]-,:[#8]-,:[#6]', 0), | |
464: ('[#7]-,:[#6]-,:[#6]-,:[#7]-,:[#6]', 0), | |
465: ('[#7]-,:[#6]-,:[#8]-,:[#6]-,:[#6]', 0), | |
466: ('[#6]-,:[#7]-,:[#6]-,:[#6]-,:[#6]', 0), | |
467: ('[#6]-,:[#6]-,:[#8]-,:[#6]-,:[#6]', 0), | |
468: ('[#7]-,:[#6]-,:[#6]-,:[#8]-,:[#6]', 0), | |
469: ('c:c:n:n:c', 0), | |
470: ('[#6]-,:[#6]-,:[#6]-,:[#8&!H0]', 0), | |
471: ('c:[#6]-,:[#6]-,:[#6]:c', 0), | |
472: ('[#8]-,:[#6]-,:[#6]=,:[#6]-,:[#6]', 0), | |
473: ('c:c-,:[#8]-,:[#6]-,:[#6]', 0), | |
474: ('[#7]-,:[#6]:c:c:n', 0), | |
475: ('[#8]=,:[#6]-,:[#8]-,:[#6]:c', 0), | |
476: ('[#8]=,:[#6]-,:[#6]:[#6]-,:[#6]', 0), | |
477: ('[#8]=,:[#6]-,:[#6]:[#6]-,:[#7]', 0), | |
478: ('[#8]=,:[#6]-,:[#6]:[#6]-,:[#8]', 0), | |
479: ('[#6]-,:[#8]-,:[#6]:[#6]-,:[#6]', 0), | |
480: ('[#8]=,:[#33]-,:[#6]:c:c', 0), | |
481: ('[#6]-,:[#7]-,:[#6]-,:[#6]:c', 0), | |
482: ('[#16]-,:[#6]:c:c-,:[#7]', 0), | |
483: ('[#8]-,:[#6]:[#6]-,:[#8]-,:[#6]', 0), | |
484: ('[#8]-,:[#6]:[#6]-,:[#8&!H0]', 0), | |
485: ('[#6]-,:[#6]-,:[#8]-,:[#6]:c', 0), | |
486: ('[#7]-,:[#6]-,:[#6]:[#6]-,:[#6]', 0), | |
487: ('[#6]-,:[#6]-,:[#6]:[#6]-,:[#6]', 0), | |
488: ('[#7]-,:[#7]-,:[#6]-,:[#7&!H0]', 0), | |
489: ('[#6]-,:[#7]-,:[#6]-,:[#7]-,:[#6]', 0), | |
490: ('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0), | |
491: ('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#7]', 0), | |
492: ('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#8]', 0), | |
493: ('[#6]=,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0), | |
494: ('[#8]-,:[#6]-,:[#6]-,:[#6]=,:[#6]', 0), | |
495: ('[#8]-,:[#6]-,:[#6]-,:[#6]=,:[#8]', 0), | |
496: ('[#6&!H0]-,:[#6]-,:[#7&!H0]', 0), | |
497: ('[#6]-,:[#6]=,:[#7]-,:[#7]-,:[#6]', 0), | |
498: ('[#8]=,:[#6]-,:[#7]-,:[#6]-,:[#6]', 0), | |
499: ('[#8]=,:[#6]-,:[#7]-,:[#6&!H0]', 0), | |
500: ('[#8]=,:[#6]-,:[#7]-,:[#6]-,:[#7]', 0), | |
501: ('[#8]=,:[#7]-,:[#6]:[#6]-,:[#7]', 0), | |
502: ('[#8]=,:[#7]-,:c:c-,:[#8]', 0), | |
503: ('[#8]=,:[#6]-,:[#7]-,:[#6]=,:[#8]', 0), | |
504: ('[#8]-,:[#6]:[#6]:[#6]-,:[#6]', 0), | |
505: ('[#8]-,:[#6]:[#6]:[#6]-,:[#7]', 0), | |
506: ('[#8]-,:[#6]:[#6]:[#6]-,:[#8]', 0), | |
507: ('[#7]-,:[#6]-,:[#7]-,:[#6]-,:[#6]', 0), | |
508: ('[#8]-,:[#6]-,:[#6]-,:[#6]:c', 0), | |
509: ('[#6]-,:[#6]-,:[#7]-,:[#6]-,:[#6]', 0), | |
510: ('[#6]-,:[#7]-,:[#6]:[#6]-,:[#6]', 0), | |
511: ('[#6]-,:[#6]-,:[#16]-,:[#6]-,:[#6]', 0), | |
512: ('[#8]-,:[#6]-,:[#6]-,:[#7]-,:[#6]', 0), | |
513: ('[#6]-,:[#6]=,:[#6]-,:[#6]-,:[#6]', 0), | |
514: ('[#8]-,:[#6]-,:[#8]-,:[#6]-,:[#6]', 0), | |
515: ('[#8]-,:[#6]-,:[#6]-,:[#8]-,:[#6]', 0), | |
516: ('[#8]-,:[#6]-,:[#6]-,:[#8&!H0]', 0), | |
517: ('[#6]-,:[#6]=,:[#6]-,:[#6]=,:[#6]', 0), | |
518: ('[#7]-,:[#6]:[#6]-,:[#6]-,:[#6]', 0), | |
519: ('[#6]=,:[#6]-,:[#6]-,:[#8]-,:[#6]', 0), | |
520: ('[#6]=,:[#6]-,:[#6]-,:[#8&!H0]', 0), | |
521: ('[#6]-,:[#6]:[#6]-,:[#6]-,:[#6]', 0), | |
522: ('[Cl]-,:[#6]:[#6]-,:[#6]=,:[#8]', 0), | |
523: ('[Br]-,:[#6]:c:c-,:[#6]', 0), | |
524: ('[#8]=,:[#6]-,:[#6]=,:[#6]-,:[#6]', 0), | |
525: ('[#8]=,:[#6]-,:[#6]=,:[#6&!H0]', 0), | |
526: ('[#8]=,:[#6]-,:[#6]=,:[#6]-,:[#7]', 0), | |
527: ('[#7]-,:[#6]-,:[#7]-,:[#6]:c', 0), | |
528: ('[Br]-,:[#6]-,:[#6]-,:[#6]:c', 0), | |
529: ('[#7]#[#6]-,:[#6]-,:[#6]-,:[#6]', 0), | |
530: ('[#6]-,:[#6]=,:[#6]-,:[#6]:c', 0), | |
531: ('[#6]-,:[#6]-,:[#6]=,:[#6]-,:[#6]', 0), | |
532: ('[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0), | |
533: ('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0), | |
534: ('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#8]', 0), | |
535: ('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#7]', 0), | |
536: ('[#7]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0), | |
537: ('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0), | |
538: ('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#7]', 0), | |
539: ('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#8]', 0), | |
540: ('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]=,:[#8]', 0), | |
541: ('[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0), | |
542: ('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0), | |
543: ('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#8]', 0), | |
544: ('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#7]', 0), | |
545: ('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0), | |
546: ('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#8]', 0), | |
547: ('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]=,:[#8]', 0), | |
548: ('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#7]', 0), | |
549: ('[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0), | |
550: ('[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6](-,:[#6])-,:[#6]', 0), | |
551: ('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0), | |
552: ('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6](-,:[#6])-,:[#6]', 0), | |
553: ('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#8]-,:[#6]', 0), | |
554: ('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6](-,:[#8])-,:[#6]', 0), | |
555: ('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#7]-,:[#6]', 0), | |
556: ('[#8]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6](-,:[#7])-,:[#6]', 0), | |
557: ('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6]', 0), | |
558: ('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6](-,:[#8])-,:[#6]', 0), | |
559: ('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6](=,:[#8])-,:[#6]', 0), | |
560: ('[#8]=,:[#6]-,:[#6]-,:[#6]-,:[#6]-,:[#6](-,:[#7])-,:[#6]', 0), | |
561: ('[#6]-,:[#6](-,:[#6])-,:[#6]-,:[#6]', 0), | |
562: ('[#6]-,:[#6](-,:[#6])-,:[#6]-,:[#6]-,:[#6]', 0), | |
563: ('[#6]-,:[#6]-,:[#6](-,:[#6])-,:[#6]-,:[#6]', 0), | |
564: ('[#6]-,:[#6](-,:[#6])(-,:[#6])-,:[#6]-,:[#6]', 0), | |
565: ('[#6]-,:[#6](-,:[#6])-,:[#6](-,:[#6])-,:[#6]', 0), | |
566: ('[#6]c1ccc([#6])cc1', 0), | |
567: ('[#6]c1ccc([#8])cc1', 0), | |
568: ('[#6]c1ccc([#16])cc1', 0), | |
569: ('[#6]c1ccc([#7])cc1', 0), | |
570: ('[#6]c1ccc(Cl)cc1', 0), | |
571: ('[#6]c1ccc(Br)cc1', 0), | |
572: ('[#8]c1ccc([#8])cc1', 0), | |
573: ('[#8]c1ccc([#16])cc1', 0), | |
574: ('[#8]c1ccc([#7])cc1', 0), | |
575: ('[#8]c1ccc(Cl)cc1', 0), | |
576: ('[#8]c1ccc(Br)cc1', 0), | |
577: ('[#16]c1ccc([#16])cc1', 0), | |
578: ('[#16]c1ccc([#7])cc1', 0), | |
579: ('[#16]c1ccc(Cl)cc1', 0), | |
580: ('[#16]c1ccc(Br)cc1', 0), | |
581: ('[#7]c1ccc([#7])cc1', 0), | |
582: ('[#7]c1ccc(Cl)cc1', 0), | |
583: ('[#7]c1ccc(Br)cc1', 0), | |
584: ('Clc1ccc(Cl)cc1', 0), | |
585: ('Clc1ccc(Br)cc1', 0), | |
586: ('Brc1ccc(Br)cc1', 0), | |
587: ('[#6]c1cc([#6])ccc1', 0), | |
588: ('[#6]c1cc([#8])ccc1', 0), | |
589: ('[#6]c1cc([#16])ccc1', 0), | |
590: ('[#6]c1cc([#7])ccc1', 0), | |
591: ('[#6]c1cc(Cl)ccc1', 0), | |
592: ('[#6]c1cc(Br)ccc1', 0), | |
593: ('[#8]c1cc([#8])ccc1', 0), | |
594: ('[#8]c1cc([#16])ccc1', 0), | |
595: ('[#8]c1cc([#7])ccc1', 0), | |
596: ('[#8]c1cc(Cl)ccc1', 0), | |
597: ('[#8]c1cc(Br)ccc1', 0), | |
598: ('[#16]c1cc([#16])ccc1', 0), | |
599: ('[#16]c1cc([#7])ccc1', 0), | |
600: ('[#16]c1cc(Cl)ccc1', 0), | |
601: ('[#16]c1cc(Br)ccc1', 0), | |
602: ('[#7]c1cc([#7])ccc1', 0), | |
603: ('[#7]c1cc(Cl)ccc1', 0), | |
604: ('[#7]c1cc(Br)ccc1', 0), | |
605: ('Clc1cc(Cl)ccc1', 0), | |
606: ('Clc1cc(Br)ccc1', 0), | |
607: ('Brc1cc(Br)ccc1', 0), | |
608: ('[#6]c1c([#6])cccc1', 0), | |
609: ('[#6]c1c([#8])cccc1', 0), | |
610: ('[#6]c1c([#16])cccc1', 0), | |
611: ('[#6]c1c([#7])cccc1', 0), | |
612: ('[#6]c1c(Cl)cccc1', 0), | |
613: ('[#6]c1c(Br)cccc1', 0), | |
614: ('[#8]c1c([#8])cccc1', 0), | |
615: ('[#8]c1c([#16])cccc1', 0), | |
616: ('[#8]c1c([#7])cccc1', 0), | |
617: ('[#8]c1c(Cl)cccc1', 0), | |
618: ('[#8]c1c(Br)cccc1', 0), | |
619: ('[#16]c1c([#16])cccc1', 0), | |
620: ('[#16]c1c([#7])cccc1', 0), | |
621: ('[#16]c1c(Cl)cccc1', 0), | |
622: ('[#16]c1c(Br)cccc1', 0), | |
623: ('[#7]c1c([#7])cccc1', 0), | |
624: ('[#7]c1c(Cl)cccc1', 0), | |
625: ('[#7]c1c(Br)cccc1', 0), | |
626: ('Clc1c(Cl)cccc1', 0), | |
627: ('Clc1c(Br)cccc1', 0), | |
628: ('Brc1c(Br)cccc1', 0), | |
629: ('[#6][#6]1[#6][#6][#6]([#6])[#6][#6]1', 0), | |
630: ('[#6][#6]1[#6][#6][#6]([#8])[#6][#6]1', 0), | |
631: ('[#6][#6]1[#6][#6][#6]([#16])[#6][#6]1', 0), | |
632: ('[#6][#6]1[#6][#6][#6]([#7])[#6][#6]1', 0), | |
633: ('[#6][#6]1[#6][#6][#6](Cl)[#6][#6]1', 0), | |
634: ('[#6][#6]1[#6][#6][#6](Br)[#6][#6]1', 0), | |
635: ('[#8][#6]1[#6][#6][#6]([#8])[#6][#6]1', 0), | |
636: ('[#8][#6]1[#6][#6][#6]([#16])[#6][#6]1', 0), | |
637: ('[#8][#6]1[#6][#6][#6]([#7])[#6][#6]1', 0), | |
638: ('[#8][#6]1[#6][#6][#6](Cl)[#6][#6]1', 0), | |
639: ('[#8][#6]1[#6][#6][#6](Br)[#6][#6]1', 0), | |
640: ('[#16][#6]1[#6][#6][#6]([#16])[#6][#6]1', 0), | |
641: ('[#16][#6]1[#6][#6][#6]([#7])[#6][#6]1', 0), | |
642: ('[#16][#6]1[#6][#6][#6](Cl)[#6][#6]1', 0), | |
643: ('[#16][#6]1[#6][#6][#6](Br)[#6][#6]1', 0), | |
644: ('[#7][#6]1[#6][#6][#6]([#7])[#6][#6]1', 0), | |
645: ('[#7][#6]1[#6][#6][#6](Cl)[#6][#6]1', 0), | |
646: ('[#7][#6]1[#6][#6][#6](Br)[#6][#6]1', 0), | |
647: ('Cl[#6]1[#6][#6][#6](Cl)[#6][#6]1', 0), | |
648: ('Cl[#6]1[#6][#6][#6](Br)[#6][#6]1', 0), | |
649: ('Br[#6]1[#6][#6][#6](Br)[#6][#6]1', 0), | |
650: ('[#6][#6]1[#6][#6]([#6])[#6][#6][#6]1', 0), | |
651: ('[#6][#6]1[#6][#6]([#8])[#6][#6][#6]1', 0), | |
652: ('[#6][#6]1[#6][#6]([#16])[#6][#6][#6]1', 0), | |
653: ('[#6][#6]1[#6][#6]([#7])[#6][#6][#6]1', 0), | |
654: ('[#6][#6]1[#6][#6](Cl)[#6][#6][#6]1', 0), | |
655: ('[#6][#6]1[#6][#6](Br)[#6][#6][#6]1', 0), | |
656: ('[#8][#6]1[#6][#6]([#8])[#6][#6][#6]1', 0), | |
657: ('[#8][#6]1[#6][#6]([#16])[#6][#6][#6]1', 0), | |
658: ('[#8][#6]1[#6][#6]([#7])[#6][#6][#6]1', 0), | |
659: ('[#8][#6]1[#6][#6](Cl)[#6][#6][#6]1', 0), | |
660: ('[#8][#6]1[#6][#6](Br)[#6][#6][#6]1', 0), | |
661: ('[#16][#6]1[#6][#6]([#16])[#6][#6][#6]1', 0), | |
662: ('[#16][#6]1[#6][#6]([#7])[#6][#6][#6]1', 0), | |
663: ('[#16][#6]1[#6][#6](Cl)[#6][#6][#6]1', 0), | |
664: ('[#16][#6]1[#6][#6](Br)[#6][#6][#6]1', 0), | |
665: ('[#7][#6]1[#6][#6]([#7])[#6][#6][#6]1', 0), | |
666: ('[#7][#6]1[#6][#6](Cl)[#6][#6][#6]1', 0), | |
667: ('[#7][#6]1[#6][#6](Br)[#6][#6][#6]1', 0), | |
668: ('Cl[#6]1[#6][#6](Cl)[#6][#6][#6]1', 0), | |
669: ('Cl[#6]1[#6][#6](Br)[#6][#6][#6]1', 0), | |
670: ('Br[#6]1[#6][#6](Br)[#6][#6][#6]1', 0), | |
671: ('[#6][#6]1[#6]([#6])[#6][#6][#6][#6]1', 0), | |
672: ('[#6][#6]1[#6]([#8])[#6][#6][#6][#6]1', 0), | |
673: ('[#6][#6]1[#6]([#16])[#6][#6][#6][#6]1', 0), | |
674: ('[#6][#6]1[#6]([#7])[#6][#6][#6][#6]1', 0), | |
675: ('[#6][#6]1[#6](Cl)[#6][#6][#6][#6]1', 0), | |
676: ('[#6][#6]1[#6](Br)[#6][#6][#6][#6]1', 0), | |
677: ('[#8][#6]1[#6]([#8])[#6][#6][#6][#6]1', 0), | |
678: ('[#8][#6]1[#6]([#16])[#6][#6][#6][#6]1', 0), | |
679: ('[#8][#6]1[#6]([#7])[#6][#6][#6][#6]1', 0), | |
680: ('[#8][#6]1[#6](Cl)[#6][#6][#6][#6]1', 0), | |
681: ('[#8][#6]1[#6](Br)[#6][#6][#6][#6]1', 0), | |
682: ('[#16][#6]1[#6]([#16])[#6][#6][#6][#6]1', 0), | |
683: ('[#16][#6]1[#6]([#7])[#6][#6][#6][#6]1', 0), | |
684: ('[#16][#6]1[#6](Cl)[#6][#6][#6][#6]1', 0), | |
685: ('[#16][#6]1[#6](Br)[#6][#6][#6][#6]1', 0), | |
686: ('[#7][#6]1[#6]([#7])[#6][#6][#6][#6]1', 0), | |
687: ('[#7][#6]1[#6](Cl)[#6][#6][#6][#6]1', 0), | |
688: ('[#7][#6]1[#6](Br)[#6][#6][#6][#6]1', 0), | |
689: ('Cl[#6]1[#6](Cl)[#6][#6][#6][#6]1', 0), | |
690: ('Cl[#6]1[#6](Br)[#6][#6][#6][#6]1', 0), | |
691: ('Br[#6]1[#6](Br)[#6][#6][#6][#6]1', 0), | |
692: ('[#6][#6]1[#6][#6]([#6])[#6][#6]1', 0), | |
693: ('[#6][#6]1[#6][#6]([#8])[#6][#6]1', 0), | |
694: ('[#6][#6]1[#6][#6]([#16])[#6][#6]1', 0), | |
695: ('[#6][#6]1[#6][#6]([#7])[#6][#6]1', 0), | |
696: ('[#6][#6]1[#6][#6](Cl)[#6][#6]1', 0), | |
697: ('[#6][#6]1[#6][#6](Br)[#6][#6]1', 0), | |
698: ('[#8][#6]1[#6][#6]([#8])[#6][#6]1', 0), | |
699: ('[#8][#6]1[#6][#6]([#16])[#6][#6]1', 0), | |
700: ('[#8][#6]1[#6][#6]([#7])[#6][#6]1', 0), | |
701: ('[#8][#6]1[#6][#6](Cl)[#6][#6]1', 0), | |
702: ('[#8][#6]1[#6][#6](Br)[#6][#6]1', 0), | |
703: ('[#16][#6]1[#6][#6]([#16])[#6][#6]1', 0), | |
704: ('[#16][#6]1[#6][#6]([#7])[#6][#6]1', 0), | |
705: ('[#16][#6]1[#6][#6](Cl)[#6][#6]1', 0), | |
706: ('[#16][#6]1[#6][#6](Br)[#6][#6]1', 0), | |
707: ('[#7][#6]1[#6][#6]([#7])[#6][#6]1', 0), | |
708: ('[#7][#6]1[#6][#6](Cl)[#6][#6]1', 0), | |
709: ('[#7][#6]1[#6][#6](Br)[#6][#6]1', 0), | |
710: ('Cl[#6]1[#6][#6](Cl)[#6][#6]1', 0), | |
711: ('Cl[#6]1[#6][#6](Br)[#6][#6]1', 0), | |
712: ('Br[#6]1[#6][#6](Br)[#6][#6]1', 0), | |
713: ('[#6][#6]1[#6]([#6])[#6][#6][#6]1', 0), | |
714: ('[#6][#6]1[#6]([#8])[#6][#6][#6]1', 0), | |
715: ('[#6][#6]1[#6]([#16])[#6][#6][#6]1', 0), | |
716: ('[#6][#6]1[#6]([#7])[#6][#6][#6]1', 0), | |
717: ('[#6][#6]1[#6](Cl)[#6][#6][#6]1', 0), | |
718: ('[#6][#6]1[#6](Br)[#6][#6][#6]1', 0), | |
719: ('[#8][#6]1[#6]([#8])[#6][#6][#6]1', 0), | |
720: ('[#8][#6]1[#6]([#16])[#6][#6][#6]1', 0), | |
721: ('[#8][#6]1[#6]([#7])[#6][#6][#6]1', 0), | |
722: ('[#8][#6]1[#6](Cl)[#6][#6][#6]1', 0), | |
723: ('[#8][#6]1[#6](Br)[#6][#6][#6]1', 0), | |
724: ('[#16][#6]1[#6]([#16])[#6][#6][#6]1', 0), | |
725: ('[#16][#6]1[#6]([#7])[#6][#6][#6]1', 0), | |
726: ('[#16][#6]1[#6](Cl)[#6][#6][#6]1', 0), | |
727: ('[#16][#6]1[#6](Br)[#6][#6][#6]1', 0), | |
728: ('[#7][#6]1[#6]([#7])[#6][#6][#6]1', 0), | |
729: ('[#7][#6]1[#6](Cl)[#6][#6]1', 0), | |
730: ('[#7][#6]1[#6](Br)[#6][#6][#6]1', 0), | |
731: ('Cl[#6]1[#6](Cl)[#6][#6][#6]1', 0), | |
732: ('Cl[#6]1[#6](Br)[#6][#6][#6]1', 0), | |
733: ('Br[#6]1[#6](Br)[#6][#6][#6]1', 0)} | |
PubchemKeys = None | |
def InitKeys(keyList, keyDict): | |
""" *Internal Use Only* | |
generates SMARTS patterns for the keys, run once | |
""" | |
assert len(keyList) == len(keyDict.keys()), 'length mismatch' | |
for key in keyDict.keys(): | |
patt, count = keyDict[key] | |
if patt != '?': | |
sma = Chem.MolFromSmarts(patt) | |
if not sma: | |
print('SMARTS parser error for key #%d: %s' % (key, patt)) | |
else: | |
keyList[key - 1] = sma, count | |
def calcPubChemFingerPart1(mol, **kwargs): | |
""" Calculate PubChem Fingerprints (1-115; 263-881) | |
**Arguments** | |
- mol: the molecule to be fingerprinted | |
- any extra keyword arguments are ignored | |
**Returns** | |
a _DataStructs.SparseBitVect_ containing the fingerprint. | |
>>> m = Chem.MolFromSmiles('CNO') | |
>>> bv = PubChemFingerPart1(m) | |
>>> tuple(bv.GetOnBits()) | |
(24, 68, 69, 71, 93, 94, 102, 124, 131, 139, 151, 158, 160, 161, 164) | |
>>> bv = PubChemFingerPart1(Chem.MolFromSmiles('CCC')) | |
>>> tuple(bv.GetOnBits()) | |
(74, 114, 149, 155, 160) | |
""" | |
global PubchemKeys | |
if PubchemKeys is None: | |
PubchemKeys = [(None, 0)] * len(smartsPatts.keys()) | |
InitKeys(PubchemKeys, smartsPatts) | |
ctor = kwargs.get('ctor', DataStructs.SparseBitVect) | |
res = ctor(len(PubchemKeys) + 1) | |
for i, (patt, count) in enumerate(PubchemKeys): | |
if patt is not None: | |
if count == 0: | |
res[i + 1] = mol.HasSubstructMatch(patt) | |
else: | |
matches = mol.GetSubstructMatches(patt) | |
if len(matches) > count: | |
res[i + 1] = 1 | |
return res | |
def func_1(mol, bits): | |
""" *Internal Use Only* | |
Calculate PubChem Fingerprints (116-263) | |
""" | |
ringSize = [] | |
temp = {3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0} | |
AllRingsAtom = mol.GetRingInfo().AtomRings() | |
for ring in AllRingsAtom: | |
ringSize.append(len(ring)) | |
for k, v in temp.items(): | |
if len(ring) == k: | |
temp[k] += 1 | |
if temp[3] >= 2: | |
bits[0] = 1 | |
bits[7] = 1 | |
elif temp[3] == 1: | |
bits[0] = 1 | |
else: | |
pass | |
if temp[4] >= 2: | |
bits[14] = 1 | |
bits[21] = 1 | |
elif temp[4] == 1: | |
bits[14] = 1 | |
else: | |
pass | |
if temp[5] >= 5: | |
bits[28] = 1 | |
bits[35] = 1 | |
bits[42] = 1 | |
bits[49] = 1 | |
bits[56] = 1 | |
elif temp[5] == 4: | |
bits[28] = 1 | |
bits[35] = 1 | |
bits[42] = 1 | |
bits[49] = 1 | |
elif temp[5] == 3: | |
bits[28] = 1 | |
bits[35] = 1 | |
bits[42] = 1 | |
elif temp[5] == 2: | |
bits[28] = 1 | |
bits[35] = 1 | |
elif temp[5] == 1: | |
bits[28] = 1 | |
else: | |
pass | |
if temp[6] >= 5: | |
bits[63] = 1 | |
bits[70] = 1 | |
bits[77] = 1 | |
bits[84] = 1 | |
bits[91] = 1 | |
elif temp[6] == 4: | |
bits[63] = 1 | |
bits[70] = 1 | |
bits[77] = 1 | |
bits[84] = 1 | |
elif temp[6] == 3: | |
bits[63] = 1 | |
bits[70] = 1 | |
bits[77] = 1 | |
elif temp[6] == 2: | |
bits[63] = 1 | |
bits[70] = 1 | |
elif temp[6] == 1: | |
bits[63] = 1 | |
else: | |
pass | |
if temp[7] >= 2: | |
bits[98] = 1 | |
bits[105] = 1 | |
elif temp[7] == 1: | |
bits[98] = 1 | |
else: | |
pass | |
if temp[8] >= 2: | |
bits[112] = 1 | |
bits[119] = 1 | |
elif temp[8] == 1: | |
bits[112] = 1 | |
else: | |
pass | |
if temp[9] >= 1: | |
bits[126] = 1 | |
else: | |
pass | |
if temp[10] >= 1: | |
bits[133] = 1 | |
else: | |
pass | |
return ringSize, bits | |
def func_2(mol, bits): | |
""" *Internal Use Only* | |
saturated or aromatic carbon-only ring | |
""" | |
AllRingsBond = mol.GetRingInfo().BondRings() | |
ringSize = [] | |
temp = {3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0} | |
for ring in AllRingsBond: | |
######### saturated | |
nonsingle = False | |
for bondIdx in ring: | |
if mol.GetBondWithIdx(bondIdx).GetBondType().name != 'SINGLE': | |
nonsingle = True | |
break | |
if nonsingle == False: | |
ringSize.append(len(ring)) | |
for k, v in temp.items(): | |
if len(ring) == k: | |
temp[k] += 1 | |
######## aromatic carbon-only | |
aromatic = True | |
AllCarb = True | |
for bondIdx in ring: | |
if mol.GetBondWithIdx(bondIdx).GetBondType().name != 'AROMATIC': | |
aromatic = False | |
break | |
for bondIdx in ring: | |
BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom() | |
EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom() | |
if BeginAtom.GetAtomicNum() != 6 or EndAtom.GetAtomicNum() != 6: | |
AllCarb = False | |
break | |
if aromatic == True and AllCarb == True: | |
ringSize.append(len(ring)) | |
for k, v in temp.items(): | |
if len(ring) == k: | |
temp[k] += 1 | |
if temp[3] >= 2: | |
bits[1] = 1 | |
bits[8] = 1 | |
elif temp[3] == 1: | |
bits[1] = 1 | |
else: | |
pass | |
if temp[4] >= 2: | |
bits[15] = 1 | |
bits[22] = 1 | |
elif temp[4] == 1: | |
bits[15] = 1 | |
else: | |
pass | |
if temp[5] >= 5: | |
bits[29] = 1 | |
bits[36] = 1 | |
bits[43] = 1 | |
bits[50] = 1 | |
bits[57] = 1 | |
elif temp[5] == 4: | |
bits[29] = 1 | |
bits[36] = 1 | |
bits[43] = 1 | |
bits[50] = 1 | |
elif temp[5] == 3: | |
bits[29] = 1 | |
bits[36] = 1 | |
bits[43] = 1 | |
elif temp[5] == 2: | |
bits[29] = 1 | |
bits[36] = 1 | |
elif temp[5] == 1: | |
bits[29] = 1 | |
else: | |
pass | |
if temp[6] >= 5: | |
bits[64] = 1 | |
bits[71] = 1 | |
bits[78] = 1 | |
bits[85] = 1 | |
bits[92] = 1 | |
elif temp[6] == 4: | |
bits[64] = 1 | |
bits[71] = 1 | |
bits[78] = 1 | |
bits[85] = 1 | |
elif temp[6] == 3: | |
bits[64] = 1 | |
bits[71] = 1 | |
bits[78] = 1 | |
elif temp[6] == 2: | |
bits[64] = 1 | |
bits[71] = 1 | |
elif temp[6] == 1: | |
bits[64] = 1 | |
else: | |
pass | |
if temp[7] >= 2: | |
bits[99] = 1 | |
bits[106] = 1 | |
elif temp[7] == 1: | |
bits[99] = 1 | |
else: | |
pass | |
if temp[8] >= 2: | |
bits[113] = 1 | |
bits[120] = 1 | |
elif temp[8] == 1: | |
bits[113] = 1 | |
else: | |
pass | |
if temp[9] >= 1: | |
bits[127] = 1 | |
else: | |
pass | |
if temp[10] >= 1: | |
bits[134] = 1 | |
else: | |
pass | |
return ringSize, bits | |
def func_3(mol, bits): | |
""" *Internal Use Only* | |
saturated or aromatic nitrogen-containing | |
""" | |
AllRingsBond = mol.GetRingInfo().BondRings() | |
ringSize = [] | |
temp = {3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0} | |
for ring in AllRingsBond: | |
######### saturated | |
nonsingle = False | |
for bondIdx in ring: | |
if mol.GetBondWithIdx(bondIdx).GetBondType().name != 'SINGLE': | |
nonsingle = True | |
break | |
if nonsingle == False: | |
ringSize.append(len(ring)) | |
for k, v in temp.items(): | |
if len(ring) == k: | |
temp[k] += 1 | |
######## aromatic nitrogen-containing | |
aromatic = True | |
ContainNitro = False | |
for bondIdx in ring: | |
if mol.GetBondWithIdx(bondIdx).GetBondType().name != 'AROMATIC': | |
aromatic = False | |
break | |
for bondIdx in ring: | |
BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom() | |
EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom() | |
if BeginAtom.GetAtomicNum() == 7 or EndAtom.GetAtomicNum() == 7: | |
ContainNitro = True | |
break | |
if aromatic == True and ContainNitro == True: | |
ringSize.append(len(ring)) | |
for k, v in temp.items(): | |
if len(ring) == k: | |
temp[k] += 1 | |
if temp[3] >= 2: | |
bits[2] = 1 | |
bits[9] = 1 | |
elif temp[3] == 1: | |
bits[2] = 1 | |
else: | |
pass | |
if temp[4] >= 2: | |
bits[16] = 1 | |
bits[23] = 1 | |
elif temp[4] == 1: | |
bits[16] = 1 | |
else: | |
pass | |
if temp[5] >= 5: | |
bits[30] = 1 | |
bits[37] = 1 | |
bits[44] = 1 | |
bits[51] = 1 | |
bits[58] = 1 | |
elif temp[5] == 4: | |
bits[30] = 1 | |
bits[37] = 1 | |
bits[44] = 1 | |
bits[51] = 1 | |
elif temp[5] == 3: | |
bits[30] = 1 | |
bits[37] = 1 | |
bits[44] = 1 | |
elif temp[5] == 2: | |
bits[30] = 1 | |
bits[37] = 1 | |
elif temp[5] == 1: | |
bits[30] = 1 | |
else: | |
pass | |
if temp[6] >= 5: | |
bits[65] = 1 | |
bits[72] = 1 | |
bits[79] = 1 | |
bits[86] = 1 | |
bits[93] = 1 | |
elif temp[6] == 4: | |
bits[65] = 1 | |
bits[72] = 1 | |
bits[79] = 1 | |
bits[86] = 1 | |
elif temp[6] == 3: | |
bits[65] = 1 | |
bits[72] = 1 | |
bits[79] = 1 | |
elif temp[6] == 2: | |
bits[65] = 1 | |
bits[72] = 1 | |
elif temp[6] == 1: | |
bits[65] = 1 | |
else: | |
pass | |
if temp[7] >= 2: | |
bits[100] = 1 | |
bits[107] = 1 | |
elif temp[7] == 1: | |
bits[100] = 1 | |
else: | |
pass | |
if temp[8] >= 2: | |
bits[114] = 1 | |
bits[121] = 1 | |
elif temp[8] == 1: | |
bits[114] = 1 | |
else: | |
pass | |
if temp[9] >= 1: | |
bits[128] = 1 | |
else: | |
pass | |
if temp[10] >= 1: | |
bits[135] = 1 | |
else: | |
pass | |
return ringSize, bits | |
def func_4(mol, bits): | |
""" *Internal Use Only* | |
saturated or aromatic heteroatom-containing | |
""" | |
AllRingsBond = mol.GetRingInfo().BondRings() | |
ringSize = [] | |
temp = {3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0} | |
for ring in AllRingsBond: | |
######### saturated | |
nonsingle = False | |
for bondIdx in ring: | |
if mol.GetBondWithIdx(bondIdx).GetBondType().name != 'SINGLE': | |
nonsingle = True | |
break | |
if nonsingle == False: | |
ringSize.append(len(ring)) | |
for k, v in temp.items(): | |
if len(ring) == k: | |
temp[k] += 1 | |
######## aromatic heteroatom-containing | |
aromatic = True | |
heteroatom = False | |
for bondIdx in ring: | |
if mol.GetBondWithIdx(bondIdx).GetBondType().name != 'AROMATIC': | |
aromatic = False | |
break | |
for bondIdx in ring: | |
BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom() | |
EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom() | |
if BeginAtom.GetAtomicNum() not in [1, 6] or EndAtom.GetAtomicNum() not in [1, 6]: | |
heteroatom = True | |
break | |
if aromatic == True and heteroatom == True: | |
ringSize.append(len(ring)) | |
for k, v in temp.items(): | |
if len(ring) == k: | |
temp[k] += 1 | |
if temp[3] >= 2: | |
bits[3] = 1 | |
bits[10] = 1 | |
elif temp[3] == 1: | |
bits[3] = 1 | |
else: | |
pass | |
if temp[4] >= 2: | |
bits[17] = 1 | |
bits[24] = 1 | |
elif temp[4] == 1: | |
bits[17] = 1 | |
else: | |
pass | |
if temp[5] >= 5: | |
bits[31] = 1 | |
bits[38] = 1 | |
bits[45] = 1 | |
bits[52] = 1 | |
bits[59] = 1 | |
elif temp[5] == 4: | |
bits[31] = 1 | |
bits[38] = 1 | |
bits[45] = 1 | |
bits[52] = 1 | |
elif temp[5] == 3: | |
bits[31] = 1 | |
bits[38] = 1 | |
bits[45] = 1 | |
elif temp[5] == 2: | |
bits[31] = 1 | |
bits[38] = 1 | |
elif temp[5] == 1: | |
bits[31] = 1 | |
else: | |
pass | |
if temp[6] >= 5: | |
bits[66] = 1 | |
bits[73] = 1 | |
bits[80] = 1 | |
bits[87] = 1 | |
bits[94] = 1 | |
elif temp[6] == 4: | |
bits[66] = 1 | |
bits[73] = 1 | |
bits[80] = 1 | |
bits[87] = 1 | |
elif temp[6] == 3: | |
bits[66] = 1 | |
bits[73] = 1 | |
bits[80] = 1 | |
elif temp[6] == 2: | |
bits[66] = 1 | |
bits[73] = 1 | |
elif temp[6] == 1: | |
bits[66] = 1 | |
else: | |
pass | |
if temp[7] >= 2: | |
bits[101] = 1 | |
bits[108] = 1 | |
elif temp[7] == 1: | |
bits[101] = 1 | |
else: | |
pass | |
if temp[8] >= 2: | |
bits[115] = 1 | |
bits[122] = 1 | |
elif temp[8] == 1: | |
bits[115] = 1 | |
else: | |
pass | |
if temp[9] >= 1: | |
bits[129] = 1 | |
else: | |
pass | |
if temp[10] >= 1: | |
bits[136] = 1 | |
else: | |
pass | |
return ringSize, bits | |
def func_5(mol, bits): | |
""" *Internal Use Only* | |
unsaturated non-aromatic carbon-only | |
""" | |
ringSize = [] | |
AllRingsBond = mol.GetRingInfo().BondRings() | |
temp = {3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0} | |
for ring in AllRingsBond: | |
unsaturated = False | |
nonaromatic = True | |
Allcarb = True | |
######### unsaturated | |
for bondIdx in ring: | |
if mol.GetBondWithIdx(bondIdx).GetBondType().name != 'SINGLE': | |
unsaturated = True | |
break | |
######## non-aromatic | |
for bondIdx in ring: | |
if mol.GetBondWithIdx(bondIdx).GetBondType().name == 'AROMATIC': | |
nonaromatic = False | |
break | |
######## allcarb | |
for bondIdx in ring: | |
BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom() | |
EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom() | |
if BeginAtom.GetAtomicNum() != 6 or EndAtom.GetAtomicNum() != 6: | |
Allcarb = False | |
break | |
if unsaturated == True and nonaromatic == True and Allcarb == True: | |
ringSize.append(len(ring)) | |
for k, v in temp.items(): | |
if len(ring) == k: | |
temp[k] += 1 | |
if temp[3] >= 2: | |
bits[4] = 1 | |
bits[11] = 1 | |
elif temp[3] == 1: | |
bits[4] = 1 | |
else: | |
pass | |
if temp[4] >= 2: | |
bits[18] = 1 | |
bits[25] = 1 | |
elif temp[4] == 1: | |
bits[18] = 1 | |
else: | |
pass | |
if temp[5] >= 5: | |
bits[32] = 1 | |
bits[39] = 1 | |
bits[46] = 1 | |
bits[53] = 1 | |
bits[60] = 1 | |
elif temp[5] == 4: | |
bits[32] = 1 | |
bits[39] = 1 | |
bits[46] = 1 | |
bits[53] = 1 | |
elif temp[5] == 3: | |
bits[32] = 1 | |
bits[39] = 1 | |
bits[46] = 1 | |
elif temp[5] == 2: | |
bits[32] = 1 | |
bits[39] = 1 | |
elif temp[5] == 1: | |
bits[32] = 1 | |
else: | |
pass | |
if temp[6] >= 5: | |
bits[67] = 1 | |
bits[74] = 1 | |
bits[81] = 1 | |
bits[88] = 1 | |
bits[95] = 1 | |
elif temp[6] == 4: | |
bits[67] = 1 | |
bits[74] = 1 | |
bits[81] = 1 | |
bits[88] = 1 | |
elif temp[6] == 3: | |
bits[67] = 1 | |
bits[74] = 1 | |
bits[81] = 1 | |
elif temp[6] == 2: | |
bits[67] = 1 | |
bits[74] = 1 | |
elif temp[6] == 1: | |
bits[67] = 1 | |
else: | |
pass | |
if temp[7] >= 2: | |
bits[102] = 1 | |
bits[109] = 1 | |
elif temp[7] == 1: | |
bits[102] = 1 | |
else: | |
pass | |
if temp[8] >= 2: | |
bits[116] = 1 | |
bits[123] = 1 | |
elif temp[8] == 1: | |
bits[116] = 1 | |
else: | |
pass | |
if temp[9] >= 1: | |
bits[130] = 1 | |
else: | |
pass | |
if temp[10] >= 1: | |
bits[137] = 1 | |
else: | |
pass | |
return ringSize, bits | |
def func_6(mol, bits): | |
""" *Internal Use Only* | |
unsaturated non-aromatic nitrogen-containing | |
""" | |
ringSize = [] | |
AllRingsBond = mol.GetRingInfo().BondRings() | |
temp = {3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0} | |
for ring in AllRingsBond: | |
unsaturated = False | |
nonaromatic = True | |
ContainNitro = False | |
######### unsaturated | |
for bondIdx in ring: | |
if mol.GetBondWithIdx(bondIdx).GetBondType().name != 'SINGLE': | |
unsaturated = True | |
break | |
######## non-aromatic | |
for bondIdx in ring: | |
if mol.GetBondWithIdx(bondIdx).GetBondType().name == 'AROMATIC': | |
nonaromatic = False | |
break | |
######## nitrogen-containing | |
for bondIdx in ring: | |
BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom() | |
EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom() | |
if BeginAtom.GetAtomicNum() == 7 or EndAtom.GetAtomicNum() == 7: | |
ContainNitro = True | |
break | |
if unsaturated == True and nonaromatic == True and ContainNitro == True: | |
ringSize.append(len(ring)) | |
for k, v in temp.items(): | |
if len(ring) == k: | |
temp[k] += 1 | |
if temp[3] >= 2: | |
bits[5] = 1 | |
bits[12] = 1 | |
elif temp[3] == 1: | |
bits[5] = 1 | |
else: | |
pass | |
if temp[4] >= 2: | |
bits[19] = 1 | |
bits[26] = 1 | |
elif temp[4] == 1: | |
bits[19] = 1 | |
else: | |
pass | |
if temp[5] >= 5: | |
bits[33] = 1 | |
bits[40] = 1 | |
bits[47] = 1 | |
bits[54] = 1 | |
bits[61] = 1 | |
elif temp[5] == 4: | |
bits[33] = 1 | |
bits[40] = 1 | |
bits[47] = 1 | |
bits[54] = 1 | |
elif temp[5] == 3: | |
bits[33] = 1 | |
bits[40] = 1 | |
bits[47] = 1 | |
elif temp[5] == 2: | |
bits[33] = 1 | |
bits[40] = 1 | |
elif temp[5] == 1: | |
bits[33] = 1 | |
else: | |
pass | |
if temp[6] >= 5: | |
bits[68] = 1 | |
bits[75] = 1 | |
bits[82] = 1 | |
bits[89] = 1 | |
bits[96] = 1 | |
elif temp[6] == 4: | |
bits[68] = 1 | |
bits[75] = 1 | |
bits[82] = 1 | |
bits[89] = 1 | |
elif temp[6] == 3: | |
bits[68] = 1 | |
bits[75] = 1 | |
bits[82] = 1 | |
elif temp[6] == 2: | |
bits[68] = 1 | |
bits[75] = 1 | |
elif temp[6] == 1: | |
bits[68] = 1 | |
else: | |
pass | |
if temp[7] >= 2: | |
bits[103] = 1 | |
bits[110] = 1 | |
elif temp[7] == 1: | |
bits[103] = 1 | |
else: | |
pass | |
if temp[8] >= 2: | |
bits[117] = 1 | |
bits[124] = 1 | |
elif temp[8] == 1: | |
bits[117] = 1 | |
else: | |
pass | |
if temp[9] >= 1: | |
bits[131] = 1 | |
else: | |
pass | |
if temp[10] >= 1: | |
bits[138] = 1 | |
else: | |
pass | |
return ringSize, bits | |
def func_7(mol, bits): | |
""" *Internal Use Only* | |
unsaturated non-aromatic heteroatom-containing | |
""" | |
ringSize = [] | |
AllRingsBond = mol.GetRingInfo().BondRings() | |
temp = {3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0} | |
for ring in AllRingsBond: | |
unsaturated = False | |
nonaromatic = True | |
heteroatom = False | |
######### unsaturated | |
for bondIdx in ring: | |
if mol.GetBondWithIdx(bondIdx).GetBondType().name != 'SINGLE': | |
unsaturated = True | |
break | |
######## non-aromatic | |
for bondIdx in ring: | |
if mol.GetBondWithIdx(bondIdx).GetBondType().name == 'AROMATIC': | |
nonaromatic = False | |
break | |
######## heteroatom-containing | |
for bondIdx in ring: | |
BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom() | |
EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom() | |
if BeginAtom.GetAtomicNum() not in [1, 6] or EndAtom.GetAtomicNum() not in [1, 6]: | |
heteroatom = True | |
break | |
if unsaturated == True and nonaromatic == True and heteroatom == True: | |
ringSize.append(len(ring)) | |
for k, v in temp.items(): | |
if len(ring) == k: | |
temp[k] += 1 | |
if temp[3] >= 2: | |
bits[6] = 1 | |
bits[13] = 1 | |
elif temp[3] == 1: | |
bits[6] = 1 | |
else: | |
pass | |
if temp[4] >= 2: | |
bits[20] = 1 | |
bits[27] = 1 | |
elif temp[4] == 1: | |
bits[20] = 1 | |
else: | |
pass | |
if temp[5] >= 5: | |
bits[34] = 1 | |
bits[41] = 1 | |
bits[48] = 1 | |
bits[55] = 1 | |
bits[62] = 1 | |
elif temp[5] == 4: | |
bits[34] = 1 | |
bits[41] = 1 | |
bits[48] = 1 | |
bits[55] = 1 | |
elif temp[5] == 3: | |
bits[34] = 1 | |
bits[41] = 1 | |
bits[48] = 1 | |
elif temp[5] == 2: | |
bits[34] = 1 | |
bits[41] = 1 | |
elif temp[5] == 1: | |
bits[34] = 1 | |
else: | |
pass | |
if temp[6] >= 5: | |
bits[69] = 1 | |
bits[76] = 1 | |
bits[83] = 1 | |
bits[90] = 1 | |
bits[97] = 1 | |
elif temp[6] == 4: | |
bits[69] = 1 | |
bits[76] = 1 | |
bits[83] = 1 | |
bits[90] = 1 | |
elif temp[6] == 3: | |
bits[69] = 1 | |
bits[76] = 1 | |
bits[83] = 1 | |
elif temp[6] == 2: | |
bits[69] = 1 | |
bits[76] = 1 | |
elif temp[6] == 1: | |
bits[69] = 1 | |
else: | |
pass | |
if temp[7] >= 2: | |
bits[104] = 1 | |
bits[111] = 1 | |
elif temp[7] == 1: | |
bits[104] = 1 | |
else: | |
pass | |
if temp[8] >= 2: | |
bits[118] = 1 | |
bits[125] = 1 | |
elif temp[8] == 1: | |
bits[118] = 1 | |
else: | |
pass | |
if temp[9] >= 1: | |
bits[132] = 1 | |
else: | |
pass | |
if temp[10] >= 1: | |
bits[139] = 1 | |
else: | |
pass | |
return ringSize, bits | |
def func_8(mol, bits): | |
""" *Internal Use Only* | |
aromatic rings or hetero-aromatic rings | |
""" | |
AllRingsBond = mol.GetRingInfo().BondRings() | |
temp = {'aromatic': 0, 'heteroatom': 0} | |
for ring in AllRingsBond: | |
aromatic = True | |
heteroatom = False | |
for bondIdx in ring: | |
if mol.GetBondWithIdx(bondIdx).GetBondType().name != 'AROMATIC': | |
aromatic = False | |
break | |
if aromatic == True: | |
temp['aromatic'] += 1 | |
for bondIdx in ring: | |
BeginAtom = mol.GetBondWithIdx(bondIdx).GetBeginAtom() | |
EndAtom = mol.GetBondWithIdx(bondIdx).GetEndAtom() | |
if BeginAtom.GetAtomicNum() not in [1, 6] or EndAtom.GetAtomicNum() not in [1, 6]: | |
heteroatom = True | |
break | |
if heteroatom == True: | |
temp['heteroatom'] += 1 | |
if temp['aromatic'] >= 4: | |
bits[140] = 1 | |
bits[142] = 1 | |
bits[144] = 1 | |
bits[146] = 1 | |
elif temp['aromatic'] == 3: | |
bits[140] = 1 | |
bits[142] = 1 | |
bits[144] = 1 | |
elif temp['aromatic'] == 2: | |
bits[140] = 1 | |
bits[142] = 1 | |
elif temp['aromatic'] == 1: | |
bits[140] = 1 | |
else: | |
pass | |
if temp['aromatic'] >= 4 and temp['heteroatom'] >= 4: | |
bits[141] = 1 | |
bits[143] = 1 | |
bits[145] = 1 | |
bits[147] = 1 | |
elif temp['aromatic'] == 3 and temp['heteroatom'] == 3: | |
bits[141] = 1 | |
bits[143] = 1 | |
bits[145] = 1 | |
elif temp['aromatic'] == 2 and temp['heteroatom'] == 2: | |
bits[141] = 1 | |
bits[143] = 1 | |
elif temp['aromatic'] == 1 and temp['heteroatom'] == 1: | |
bits[141] = 1 | |
else: | |
pass | |
return bits | |
def calcPubChemFingerPart2(mol): # 116-263 | |
""" *Internal Use Only* | |
Calculate PubChem Fingerprints (116-263) | |
""" | |
bits = [0] * 148 | |
bits = func_1(mol, bits)[1] | |
bits = func_2(mol, bits)[1] | |
bits = func_3(mol, bits)[1] | |
bits = func_4(mol, bits)[1] | |
bits = func_5(mol, bits)[1] | |
bits = func_6(mol, bits)[1] | |
bits = func_7(mol, bits)[1] | |
bits = func_8(mol, bits) | |
return bits | |
def GetPubChemFPs(mol): | |
"""*Internal Use Only* | |
Calculate PubChem Fingerprints | |
""" | |
mol = Chem.AddHs(mol) | |
AllBits = [0] * 881 | |
res1 = list(calcPubChemFingerPart1(mol).ToBitString()) | |
for index, item in enumerate(res1[1:116]): | |
if item == '1': | |
AllBits[index] = 1 | |
for index2, item2 in enumerate(res1[116:734]): | |
if item2 == '1': | |
AllBits[index2 + 115 + 148] = 1 | |
res2 = calcPubChemFingerPart2(mol) | |
for index3, item3 in enumerate(res2): | |
if item3 == 1: | |
AllBits[index3 + 115] = 1 | |
AllBits = np.array(AllBits, dtype=np.bool_) | |
return AllBits | |
# ------------------------------------ | |
file_path = os.path.dirname(__file__) | |
def GetPubChemFPInfos(): | |
return pd.read_excel(os.path.join(file_path, 'pubchemfp.xlsx')) | |
if __name__ == '__main__': | |
print('-' * 10 + 'START' + '-' * 10) | |
SMILES = 'C1=NC2NC3=CNCC3=CC2CC1' | |
mol = Chem.MolFromSmiles(SMILES) | |
mol2 = Chem.AddHs(mol) | |
result = GetPubChemFPs(mol2) | |
print('Molecule: %s' % SMILES) | |
print('-' * 25) | |
print('Results: %s' % result) | |
print('-' * 10 + 'END' + '-' * 10) | |