Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 1,991 Bytes
c0ec7e6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Aug 17 16:54:12 2019
@author: [email protected]
Combining a set of chemical features with the 2D (topological) distances between them gives a 2D pharmacophore. When the distances are binned, unique integer ids can be assigned to each of these pharmacophores and they can be stored in a fingerprint. Details of the encoding are in: https://www.rdkit.org/docs/RDKit_Book.html#ph4-figure
"""
_type = 'Pharmacophore-based'
from rdkit.Chem.Pharm2D.SigFactory import SigFactory
from rdkit.Chem.Pharm2D import Generate
from rdkit.Chem import DataStructs
from rdkit.Chem import ChemicalFeatures
import numpy as np
import os
fdef = os.path.join(os.path.dirname(__file__), 'mnimalfatures.fdef')
featFactory = ChemicalFeatures.BuildFeatureFactory(fdef)
def GetPharmacoPFPs(mol,
bins=[(i, i + 1) for i in range(20)],
minPointCount=2,
maxPointCount=2,
return_bitInfo=False):
'''
Note: maxPointCont with 3 is slowly
bins = [(i,i+1) for i in range(20)],
maxPonitCount=2 for large-scale computation
'''
MysigFactory = SigFactory(featFactory,
trianglePruneBins=False,
minPointCount=minPointCount,
maxPointCount=maxPointCount)
MysigFactory.SetBins(bins)
MysigFactory.Init()
res = Generate.Gen2DFingerprint(mol, MysigFactory)
arr = np.array(list(res)).astype(np.bool_)
if return_bitInfo:
description = []
for i in range(len(res)):
description.append(MysigFactory.GetBitDescription(i))
return arr, description
return arr
if __name__ == '__main__':
from rdkit import Chem
mol = Chem.MolFromSmiles('CC#CC(=O)NC1=NC=C2C(=C1)C(=NC=N2)NC3=CC(=C(C=C3)F)Cl')
a = GetPharmacoPFPs(mol, bins=[(i, i + 1) for i in range(20)], minPointCount=2, maxPointCount=2)
|