File size: 1,124 Bytes
22761bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import numpy as np
from torch import nn
from rdkit.Chem import Descriptors, AllChem, MolFromSmiles

from deepscreen.models.components.mlp import LazyMLP

DeepSCAMs = LazyMLP(
    out_channels=1,
    hidden_channels=[100, 1000, 1000],
    activation=nn.Tanh,
    dropout=0.0
)


def featurizer(smiles, radius=2, n_bits=1024):
    descr = Descriptors._descList[0:2] + Descriptors._descList[3:]
    calc = [x[1] for x in descr]
    try:
        mol = MolFromSmiles(smiles)
        features = []
        fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius=radius, nBits=n_bits)
        fp_list = []
        fp_list.extend(fp.ToBitString())
        fp_expl = [float(x) for x in fp_list]
        ds_n = []
        for d in calc:
            v = d(mol)
            if v > np.finfo(np.float32).max:
                ds_n.append(np.finfo(np.float32).max)
            else:
                ds_n.append(np.float32(v))

        features += [fp_expl + list(ds_n)]
    except:
        log.warning(f'RDKit could not find process SMILES: {smiles}; converted to all 0 features')
        features = np.zeros((n_bits,))

    return features