File size: 1,991 Bytes
c0ec7e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Aug 17 16:54:12 2019

@author: [email protected]

Combining a set of chemical features with the 2D (topological) distances between them gives a 2D pharmacophore. When the distances are binned, unique integer ids can be assigned to each of these pharmacophores and they can be stored in a fingerprint. Details of the encoding are in: https://www.rdkit.org/docs/RDKit_Book.html#ph4-figure
"""

_type = 'Pharmacophore-based'

from rdkit.Chem.Pharm2D.SigFactory import SigFactory
from rdkit.Chem.Pharm2D import Generate
from rdkit.Chem import DataStructs
from rdkit.Chem import ChemicalFeatures

import numpy as np
import os

fdef = os.path.join(os.path.dirname(__file__), 'mnimalfatures.fdef')
featFactory = ChemicalFeatures.BuildFeatureFactory(fdef)


def GetPharmacoPFPs(mol,
                    bins=[(i, i + 1) for i in range(20)],
                    minPointCount=2,
                    maxPointCount=2,
                    return_bitInfo=False):
    '''
    Note: maxPointCont with 3 is slowly
    
    bins = [(i,i+1) for i in range(20)], 
    maxPonitCount=2 for large-scale computation
    
    '''
    MysigFactory = SigFactory(featFactory,
                              trianglePruneBins=False,
                              minPointCount=minPointCount,
                              maxPointCount=maxPointCount)
    MysigFactory.SetBins(bins)
    MysigFactory.Init()

    res = Generate.Gen2DFingerprint(mol, MysigFactory)
    arr = np.array(list(res)).astype(np.bool_)
    if return_bitInfo:
        description = []
        for i in range(len(res)):
            description.append(MysigFactory.GetBitDescription(i))
        return arr, description

    return arr


if __name__ == '__main__':
    from rdkit import Chem

    mol = Chem.MolFromSmiles('CC#CC(=O)NC1=NC=C2C(=C1)C(=NC=N2)NC3=CC(=C(C=C3)F)Cl')
    a = GetPharmacoPFPs(mol, bins=[(i, i + 1) for i in range(20)], minPointCount=2, maxPointCount=2)