Spaces:
Sleeping
Sleeping
jannisborn
commited on
Commit
•
480220c
0
Parent(s):
Duplicate from GT4SD/molecular_properties
Browse files- .gitattributes +34 -0
- .gitignore +1 -0
- LICENSE +21 -0
- README.md +16 -0
- app.py +99 -0
- model_cards/article.md +68 -0
- model_cards/description.md +7 -0
- model_cards/examples.smi +13 -0
- requirements.txt +29 -0
- utils.py +57 -0
.gitattributes
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
__pycache__/
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2022 Generative Toolkit 4 Scientific Discovery
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
README.md
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Molecular properties
|
3 |
+
emoji: 💡
|
4 |
+
colorFrom: green
|
5 |
+
colorTo: blue
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 3.9.1
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
python_version: 3.8.13
|
11 |
+
pypi_version: 20.2.4
|
12 |
+
duplicated_from: GT4SD/molecular_properties
|
13 |
+
---
|
14 |
+
|
15 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
16 |
+
|
app.py
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import pathlib
|
3 |
+
|
4 |
+
import gradio as gr
|
5 |
+
import numpy as np
|
6 |
+
import pandas as pd
|
7 |
+
from gt4sd.properties.molecules import MOLECULE_PROPERTY_PREDICTOR_FACTORY
|
8 |
+
|
9 |
+
from utils import draw_grid_predict
|
10 |
+
|
11 |
+
logger = logging.getLogger(__name__)
|
12 |
+
logger.addHandler(logging.NullHandler())
|
13 |
+
|
14 |
+
REMOVE = ["docking", "docking_tdc", "molecule_one", "askcos", "plogp"]
|
15 |
+
REMOVE.extend(["similarity_seed", "activity_against_target", "organtox"])
|
16 |
+
|
17 |
+
MODEL_PROP_DESCRIPTION = {
|
18 |
+
"Tox21": "NR-AR, NR-AR-LBD, NR-AhR, NR-Aromatase, NR-ER, NR-ER-LBD, NR-PPAR-gamma, SR-ARE, SR-ATAD5, SR-HSE, SR-MMP, SR-p53",
|
19 |
+
"Sider": "Hepatobiliary disorders,Metabolism and nutrition disorders,Product issues,Eye disorders,Investigations,Musculoskeletal disorders,Gastrointestinal disorders,Social circumstances,Immune system disorders,Reproductive system and breast disorders,Bening & malignant,General disorders,Endocrine disorders,Surgical & medical procedures,Vascular disorders,Blood & lymphatic disorders,Skin & subcutaneous disorders,Congenital & genetic disorders,Infections,Respiratory & thoracic disorders,Psychiatric disorders,Renal & urinary disorders,Pregnancy conditions,Ear disorders,Cardiac disorders,Nervous system disorders,Injury & procedural complications",
|
20 |
+
"Clintox": "FDA approval, Clinical trial failure",
|
21 |
+
}
|
22 |
+
|
23 |
+
|
24 |
+
def main(property: str, smiles: str, smiles_file: str):
|
25 |
+
|
26 |
+
algo, config = MOLECULE_PROPERTY_PREDICTOR_FACTORY[property.lower()]
|
27 |
+
kwargs = (
|
28 |
+
{"algorithm_version": "v0"} if property in MODEL_PROP_DESCRIPTION.keys() else {}
|
29 |
+
)
|
30 |
+
model = algo(config(**kwargs))
|
31 |
+
if smiles is not None and smiles_file is not None:
|
32 |
+
raise ValueError("Pass either smiles or smiles_file, not both.")
|
33 |
+
elif smiles is not None:
|
34 |
+
smiles = [smiles]
|
35 |
+
elif smiles_file is not None:
|
36 |
+
smiles = pd.read_csv(smiles_file.name, header=None, sep="\t")[0].tolist()
|
37 |
+
props = np.array(list(map(model, smiles))).round(2)
|
38 |
+
|
39 |
+
# Expand to 2D array if needed
|
40 |
+
if len(props.shape) == 1:
|
41 |
+
props = np.expand_dims(np.array(props), -1)
|
42 |
+
|
43 |
+
if property in MODEL_PROP_DESCRIPTION.keys():
|
44 |
+
property_names = MODEL_PROP_DESCRIPTION[property].split(",")
|
45 |
+
else:
|
46 |
+
property_names = [property]
|
47 |
+
|
48 |
+
return draw_grid_predict(
|
49 |
+
smiles, props, property_names=property_names, domain="Molecules"
|
50 |
+
)
|
51 |
+
|
52 |
+
|
53 |
+
if __name__ == "__main__":
|
54 |
+
|
55 |
+
# Preparation (retrieve all available algorithms)
|
56 |
+
properties = list(MOLECULE_PROPERTY_PREDICTOR_FACTORY.keys())[::-1]
|
57 |
+
for prop in REMOVE:
|
58 |
+
prop_to_idx = dict(zip(properties, range(len(properties))))
|
59 |
+
properties.pop(prop_to_idx[prop])
|
60 |
+
properties = list(map(lambda x: x.capitalize(), properties))
|
61 |
+
|
62 |
+
# Load metadata
|
63 |
+
metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
|
64 |
+
|
65 |
+
examples = [
|
66 |
+
["Qed", None, metadata_root.joinpath("examples.smi")],
|
67 |
+
[
|
68 |
+
"Esol",
|
69 |
+
"CN1CCN(CCCOc2ccc(N3C(=O)C(=Cc4ccc(Oc5ccc([N+](=O)[O-])cc5)cc4)SC3=S)cc2)CC1",
|
70 |
+
None,
|
71 |
+
],
|
72 |
+
]
|
73 |
+
|
74 |
+
with open(metadata_root.joinpath("article.md"), "r") as f:
|
75 |
+
article = f.read()
|
76 |
+
with open(metadata_root.joinpath("description.md"), "r") as f:
|
77 |
+
description = f.read()
|
78 |
+
|
79 |
+
demo = gr.Interface(
|
80 |
+
fn=main,
|
81 |
+
title="Molecular properties",
|
82 |
+
inputs=[
|
83 |
+
gr.Dropdown(properties, label="Property", value="qed"),
|
84 |
+
gr.Textbox(
|
85 |
+
label="Single SMILES",
|
86 |
+
placeholder="CC(C#C)N(C)C(=O)NC1=CC=C(Cl)C=C1",
|
87 |
+
lines=1,
|
88 |
+
),
|
89 |
+
gr.File(
|
90 |
+
file_types=[".smi"],
|
91 |
+
label="Multiple SMILES (tab-separated, `.smi` file)",
|
92 |
+
),
|
93 |
+
],
|
94 |
+
outputs=gr.HTML(label="Output"),
|
95 |
+
article=article,
|
96 |
+
description=description,
|
97 |
+
examples=examples,
|
98 |
+
)
|
99 |
+
demo.launch(debug=True, show_error=True)
|
model_cards/article.md
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Supported molecular properties
|
2 |
+
|
3 |
+
|
4 |
+
### ClinTox
|
5 |
+
A [ToxSmi model](https://github.com/PaccMann/toxsmi) trained on [ClinTox](https://moleculenet.org/datasets-1) dataset which has two endpoints: Probability of FDA approval and Probability of failure in clinical trials. When using this model, please cite *Born et al. (2023)* (citation below).
|
6 |
+
|
7 |
+
### SIDER
|
8 |
+
A [ToxSmi model](https://github.com/PaccMann/toxsmi) trained on the [SIDER](https://moleculenet.org/datasets-1) dataset for 27 different types of side effects of drugs. When using this model, please cite *Born et al. (2023)* (citation below).
|
9 |
+
|
10 |
+
### Tox21
|
11 |
+
A [ToxSmi model](https://github.com/PaccMann/toxsmi) trained on the [Tox21](https://tripod.nih.gov/tox/) dataset with 12 different types of environmental toxicities. When using this model, please cite *Born et al. (2023)* (citation below).
|
12 |
+
|
13 |
+
### SCScore
|
14 |
+
Predict the synthetic complexity score (SCScore) as presented in [Coley et al. (*J. Chem. Inf. Model.*; 2018)](https://pubs.acs.org/doi/full/10.1021/acs.jcim.7b00622).
|
15 |
+
|
16 |
+
### SAS
|
17 |
+
Estimate the synthetic accessibility score (SAS) as presented in [Ertl et al. (*Journal of Chemoinformatics*; 2009)](https://jcheminf.biomedcentral.com/articles/10.1186/1758-2946-1-8).
|
18 |
+
|
19 |
+
### Lipinski
|
20 |
+
Measure whether a molecule confirms to the Lipinski-rule-of-five as presented in [Lipinski et al. (*Advanced Drug Delivery Reviews*; 2001)](https://www.sciencedirect.com/science/article/abs/pii/S0169409X00001290?via%3Dihub).
|
21 |
+
|
22 |
+
### Penalized logP
|
23 |
+
Measure the penalized logP (partition coefficient) score as presented in [Gomez-Bombarelli et al. (*ACS Central Science*; 2018)](https://arxiv.org/abs/1610.02415v1). This is the logP minus the number of rings with > 6 atoms minus the SAS.
|
24 |
+
|
25 |
+
### QED
|
26 |
+
Measure the drug-likeness as presented in [Bickerton et al. (*Nature Chemistry*; 2012)](https://www.nature.com/articles/nchem.1243).
|
27 |
+
|
28 |
+
### LogP
|
29 |
+
Measure the logP (partition coefficient) of a molecule as presented in [Wildman et al. (*J. Chem. Inf. Comput. Sci.*; 1999)](https://pubs.acs.org/doi/full/10.1021/ci990307l).
|
30 |
+
|
31 |
+
### Bertz
|
32 |
+
Calculate the total polar surface area of a molecule as presented in [Ertl et al. (*Journal of Medicinal Chemistry*; 2000)](https://pubs.acs.org/doi/full/10.1021/jm000942e).
|
33 |
+
|
34 |
+
### TPSA
|
35 |
+
Calculate the first general index of molecular complexity [Bertz (*Journal of the American Chemical Society*; 1981)](https://pubs.acs.org/doi/pdf/10.1021/ja00402a071).
|
36 |
+
|
37 |
+
### Is-Scaffold
|
38 |
+
Whether the molecule is identical to its [Murcko scaffold](https://rdkit.org/docs/source/rdkit.Chem.Scaffolds.MurckoScaffold.html).
|
39 |
+
|
40 |
+
### Number-Of-X
|
41 |
+
Calculated with [RDKit](https://www.rdkit.org/docs/source/rdkit.Chem.rdchem.html).
|
42 |
+
|
43 |
+
### Molecular Weight
|
44 |
+
Calculated with [RDKit](https://www.rdkit.org/docs/source/rdkit.Chem.rdchem.html).
|
45 |
+
|
46 |
+
|
47 |
+
### ToxSmi citation
|
48 |
+
```bib
|
49 |
+
@article{born2023chemical,
|
50 |
+
title={Chemical representation learning for toxicity prediction},
|
51 |
+
author={Born, Jannis and Markert, Greta and Janakarajan, Nikita and Kimber, Talia B. and Volkamer, Andrea and Rodriguez Martinez, Maria and Manica, Matteo},
|
52 |
+
journal={Under review at Digital Discovery},
|
53 |
+
year={2023}
|
54 |
+
}
|
55 |
+
```
|
56 |
+
|
57 |
+
|
58 |
+
### Unsupported properties
|
59 |
+
The following molecular properties are available via the GT4SD API but not in this UI:
|
60 |
+
- [MoleculeOne](https://tdcommons.ai/functions/oracles/#moleculeone) endpoint for retrosynthesis
|
61 |
+
- [ASKCOS](https://tdcommons.ai/functions/oracles/#askcos) endpoint for retrosynthesis
|
62 |
+
- [TDC-Docking](https://tdcommons.ai/functions/oracles/#docking-scores) endpoint for docking against a user-provided target
|
63 |
+
- [TDC-Docking](https://tdcommons.ai/functions/oracles/#docking-scores) endpoint for docking against *3pbl*.
|
64 |
+
- [Protein-ligand binding](https://tdcommons.ai/functions/oracles/#dopamine-receptor-d2-drd2) against one of the targets *drd2*, *gsk3b*, *jnk3*, *fpscores*, *cyp3a4_veith*, *drd2_current*, *gsk3b_current* or *jnk3_current*.
|
65 |
+
- [Tanimoto similarity](https://tdcommons.ai/functions/oracles/#similaritydissimilarity) to a seed molecule.
|
66 |
+
|
67 |
+
|
68 |
+
Moreover, GT4SD also includes properties on other entities such as [proteins](https://gt4sd.github.io/gt4sd-core/api/gt4sd.properties.proteins.html) and [crystals](https://gt4sd.github.io/gt4sd-core/api/gt4sd.properties.crystals.html).
|
model_cards/description.md
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
<img align="right" src="https://raw.githubusercontent.com/GT4SD/gt4sd-core/main/docs/_static/gt4sd_logo.png" alt="logo" width="120" >
|
4 |
+
|
5 |
+
### Molecular property prediction
|
6 |
+
|
7 |
+
This is the GT4SD web-app for prediction of various molecular properties. For **examples** and **documentation** of the supported properties, please see below. Please note that this API does not expose **all** properties that are supported in GT4SD (a list of the non-supported ones can be found at the bottom).
|
model_cards/examples.smi
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Cc1cc2c(c3oc(CCCC#N)cc13)C(=O)c1c(O)cccc1C2=O
|
2 |
+
C=CCN1C(=O)C(=NNC(=S)NC2OC(COC(C)=O)C(OC(C)=O)C(OC(C)=O)C2OC(C)=O)c2ccccc21
|
3 |
+
O=C1C(=Cc2ccc(F)cc2)CCOc2c1ccc1ccccc21
|
4 |
+
CC(C)CNc1cc(NCC(C)C)nc(NCC(C)C)n1
|
5 |
+
CN1CCN(CCCOc2ccc(N3C(=O)C(=Cc4ccc(Oc5ccc([N+](=O)[O-])cc5)cc4)SC3=S)cc2)CC1
|
6 |
+
COc1ccc2ccccc2c1C1CC1NC(C)=O
|
7 |
+
Cc1ccc(-n2c(=O)[nH]cc(C(=O)Nc3ccc4c(c3)OCCO4)c2=O)cc1
|
8 |
+
Cc1ccc(NCc2nnc(SCC(=O)NCCc3ccccc3)n2C)cc1
|
9 |
+
CCCNC(=O)c1ccc2c(c1)N=C(C)c1c(C)ccc(C)c1S2
|
10 |
+
COc1ccc(Cn2ccn(CC(=O)Nc3cc(C)ccc3C)c(=O)c2=O)cc1
|
11 |
+
Cn1nccc1C(=O)NN=Cc1c(O)ccc2ccccc12
|
12 |
+
CCOC(=O)Nc1cc(N)c2c(n1)NC(C)C(c1ccccc1)=N2
|
13 |
+
Cn1nc(N)c2ncc(C(Cl)(Cl)Cl)nc21
|
requirements.txt
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
-f https://download.pytorch.org/whl/cpu/torch_stable.html
|
2 |
+
-f https://data.pyg.org/whl/torch-1.12.1+cpu.html
|
3 |
+
# pip==20.2.4
|
4 |
+
torch==1.12.1
|
5 |
+
torch-scatter
|
6 |
+
torch-spline-conv
|
7 |
+
torch-sparse
|
8 |
+
torch-geometric
|
9 |
+
torchvision==0.13.1
|
10 |
+
torchaudio==0.12.1
|
11 |
+
gt4sd>=1.1.1
|
12 |
+
molgx>=0.22.0a1
|
13 |
+
molecule_generation
|
14 |
+
nglview
|
15 |
+
PyTDC==0.3.7
|
16 |
+
gradio>=3.9
|
17 |
+
markdown-it-py>=2.1.0
|
18 |
+
mols2grid>=0.2.0
|
19 |
+
pandas>=1.0.0
|
20 |
+
pymatgen>=2023.1.9
|
21 |
+
terminator @ git+https://github.com/IBM/regression-transformer@gt4sd
|
22 |
+
guacamol_baselines @ git+https://github.com/GT4SD/[email protected]
|
23 |
+
moses @ git+https://github.com/GT4SD/[email protected]
|
24 |
+
paccmann_chemistry @ git+https://github.com/PaccMann/[email protected]
|
25 |
+
paccmann_generator @ git+https://github.com/PaccMann/[email protected]
|
26 |
+
paccmann_gp @ git+https://github.com/PaccMann/[email protected]
|
27 |
+
paccmann_omics @ git+https://github.com/PaccMann/[email protected]
|
28 |
+
paccmann_predictor @ git+https://github.com/PaccMann/paccmann_predictor@sarscov2
|
29 |
+
reinvent_models @ git+https://github.com/GT4SD/[email protected]
|
utils.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
from typing import List
|
3 |
+
import numpy as np
|
4 |
+
import mols2grid
|
5 |
+
import pandas as pd
|
6 |
+
from rdkit import Chem
|
7 |
+
|
8 |
+
logger = logging.getLogger(__name__)
|
9 |
+
logger.addHandler(logging.NullHandler())
|
10 |
+
|
11 |
+
|
12 |
+
def draw_grid_predict(
|
13 |
+
sequences: List[str], properties: np.array, property_names: List[str], domain: str
|
14 |
+
) -> str:
|
15 |
+
"""
|
16 |
+
Uses mols2grid to draw a HTML grid for the prediction
|
17 |
+
|
18 |
+
Args:
|
19 |
+
sequences: Sequences for which properties are predicted.
|
20 |
+
properties: Predicted properties. Array of shape (n_samples, n_properties).
|
21 |
+
names: List of property names
|
22 |
+
domain: Domain of the prediction (molecules or proteins).
|
23 |
+
|
24 |
+
Returns:
|
25 |
+
HTML to display
|
26 |
+
"""
|
27 |
+
|
28 |
+
if domain not in ["Molecules", "Proteins"]:
|
29 |
+
raise ValueError(f"Unsupported domain {domain}")
|
30 |
+
|
31 |
+
if domain == "Proteins":
|
32 |
+
converter = lambda x: Chem.MolToSmiles(Chem.MolFromFASTA(x))
|
33 |
+
else:
|
34 |
+
converter = lambda x: x
|
35 |
+
|
36 |
+
smiles = []
|
37 |
+
for sequence in sequences:
|
38 |
+
try:
|
39 |
+
seq = converter(sequence)
|
40 |
+
smiles.append(seq)
|
41 |
+
except Exception:
|
42 |
+
logger.warning(f"Could not draw sequence {seq}")
|
43 |
+
|
44 |
+
result = pd.DataFrame({"SMILES": smiles})
|
45 |
+
for i, name in enumerate(property_names):
|
46 |
+
result[name] = properties[:, i]
|
47 |
+
n_cols = min(3, len(result))
|
48 |
+
size = (140, 200) if len(result) > 3 else (600, 700)
|
49 |
+
obj = mols2grid.display(
|
50 |
+
result,
|
51 |
+
tooltip=list(result.keys()),
|
52 |
+
height=1100,
|
53 |
+
n_cols=n_cols,
|
54 |
+
name="Results",
|
55 |
+
size=size,
|
56 |
+
)
|
57 |
+
return obj.data
|