jannisborn commited on
Commit
5984d9a
0 Parent(s):

Duplicate from jannisborn/gt4sd-paccmann-rl

Browse files
Files changed (10) hide show
  1. .gitattributes +34 -0
  2. .gitignore +1 -0
  3. LICENSE +21 -0
  4. README.md +15 -0
  5. app.py +131 -0
  6. model_cards/article.md +96 -0
  7. model_cards/description.md +9 -0
  8. model_cards/examples.csv +3 -0
  9. requirements.txt +29 -0
  10. utils.py +48 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__/
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Generative Toolkit 4 Scientific Discovery
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: GT4SD - PaccMannRL
3
+ emoji: 💡
4
+ colorFrom: green
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 3.9.1
8
+ app_file: app.py
9
+ pinned: false
10
+ python_version: 3.8.13
11
+ pypi_version: 20.2.4
12
+ duplicated_from: jannisborn/gt4sd-paccmann-rl
13
+ ---
14
+
15
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import pathlib
3
+ from typing import List
4
+
5
+ import gradio as gr
6
+ import numpy as np
7
+ import pandas as pd
8
+ from gt4sd.algorithms.conditional_generation.paccmann_rl import (
9
+ PaccMannRL,
10
+ PaccMannRLOmicBasedGenerator,
11
+ PaccMannRLProteinBasedGenerator,
12
+ )
13
+ from gt4sd.algorithms.generation.paccmann_vae import PaccMannVAE, PaccMannVAEGenerator
14
+ from gt4sd.algorithms.registry import ApplicationsRegistry
15
+
16
+ from utils import draw_grid_generate
17
+
18
+ logger = logging.getLogger(__name__)
19
+ logger.addHandler(logging.NullHandler())
20
+
21
+
22
+ def run_inference(
23
+ algorithm_version: str,
24
+ inference_type: str,
25
+ protein_target: str,
26
+ omics_target: str,
27
+ temperature: float,
28
+ length: float,
29
+ number_of_samples: int,
30
+ ):
31
+ if inference_type == "Unbiased":
32
+ algorithm_class = PaccMannVAEGenerator
33
+ model_class = PaccMannVAE
34
+ target = None
35
+ elif inference_type == "Conditional":
36
+ if "Protein" in algorithm_version:
37
+ algorithm_class = PaccMannRLProteinBasedGenerator
38
+ target = protein_target
39
+ elif "Omic" in algorithm_version:
40
+ algorithm_class = PaccMannRLOmicBasedGenerator
41
+ try:
42
+ test_target = [float(x) for x in omics_target.split(" ")]
43
+ except Exception:
44
+ raise ValueError(
45
+ f"Expected 2128 space-separated omics values, got {omics_target}"
46
+ )
47
+ if len(test_target) != 2128:
48
+ raise ValueError(
49
+ f"Expected 2128 omics values, got {len(target)}: {target}"
50
+ )
51
+ target = f"[{omics_target.replace(' ', ',')}]"
52
+ else:
53
+ raise ValueError(f"Unknown algorithm version {algorithm_version}")
54
+ model_class = PaccMannRL
55
+ else:
56
+ raise ValueError(f"Unknown inference type {inference_type}")
57
+
58
+ config = algorithm_class(
59
+ algorithm_version.split("_")[-1],
60
+ temperature=temperature,
61
+ generated_length=length,
62
+ )
63
+ print("Target is ", target)
64
+ print(type(target), len(target))
65
+ model = model_class(config, target=target)
66
+ samples = list(model.sample(number_of_samples))
67
+
68
+ return draw_grid_generate(samples=samples, n_cols=5)
69
+
70
+
71
+ if __name__ == "__main__":
72
+
73
+ # Preparation (retrieve all available algorithms)
74
+ all_algos = ApplicationsRegistry.list_available()
75
+ algos = [
76
+ x["algorithm_application"].split("Based")[0].split("PaccMannRL")[-1]
77
+ + "_"
78
+ + x["algorithm_version"]
79
+ for x in list(filter(lambda x: "PaccMannRL" in x["algorithm_name"], all_algos))
80
+ ]
81
+
82
+ # Load metadata
83
+ metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
84
+
85
+ examples = pd.read_csv(metadata_root.joinpath("examples.csv"), header=None).fillna(
86
+ ""
87
+ )
88
+
89
+ with open(metadata_root.joinpath("article.md"), "r") as f:
90
+ article = f.read()
91
+ with open(metadata_root.joinpath("description.md"), "r") as f:
92
+ description = f.read()
93
+
94
+ demo = gr.Interface(
95
+ fn=run_inference,
96
+ title="PaccMannRL",
97
+ inputs=[
98
+ gr.Dropdown(algos, label="Algorithm version", value="Protein_v0"),
99
+ gr.Radio(
100
+ choices=["Conditional", "Unbiased"],
101
+ label="Inference type",
102
+ value="Conditional",
103
+ ),
104
+ gr.Textbox(
105
+ label="Protein target",
106
+ placeholder="MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTT",
107
+ lines=1,
108
+ ),
109
+ gr.Textbox(
110
+ label="Gene expression target",
111
+ placeholder=f"{' '.join(map(str, np.round(np.random.rand(2128), 2)))}",
112
+ lines=1,
113
+ ),
114
+ gr.Slider(minimum=0.5, maximum=2, value=1, label="Decoding temperature"),
115
+ gr.Slider(
116
+ minimum=5,
117
+ maximum=400,
118
+ value=100,
119
+ label="Maximal sequence length",
120
+ step=1,
121
+ ),
122
+ gr.Slider(
123
+ minimum=1, maximum=50, value=10, label="Number of samples", step=1
124
+ ),
125
+ ],
126
+ outputs=gr.HTML(label="Output"),
127
+ article=article,
128
+ description=description,
129
+ examples=examples.values.tolist(),
130
+ )
131
+ demo.launch(debug=True, show_error=True)
model_cards/article.md ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model documentation & parameters
2
+
3
+ **Algorithm Version**: Which model version (either protein-target-driven or gene-expression-profile-driven) to use and which checkpoint to rely on.
4
+
5
+ **Inference type**: Whether the model should be conditioned on the target (default) or whether the model is used in an `Unbiased` manner.
6
+
7
+ **Protein target**: An AAS of a protein target used for conditioning. Only use if `Inference type` is `Conditional` and if the `Algorithm version` is a Protein model.
8
+
9
+ **Gene expression target**: A list of 2128 floats, representing the embedding of gene expression profile to be used for conditioning. Only use if `Inference type` is `Conditional` and if the `Algorithm version` is a Omic model.
10
+
11
+ **Decoding temperature**: The temperature parameter in the SMILES/SELFIES decoder. Higher values lead to more explorative choices, smaller values culminate in mode collapse.
12
+
13
+ **Maximal sequence length**: The maximal number of SMILES tokens in the generated molecule.
14
+
15
+ **Number of samples**: How many samples should be generated (between 1 and 50).
16
+
17
+
18
+ # Model card -- PaccMannRL
19
+
20
+ **Model Details**: PaccMannRL is a language model for conditional molecular design. It consists of a domain-specific encoder (for protein targets or gene expression profiles) and a generic molecular decoder. Both components are finetuned together using RL to convert the context representation into a molecule with high affinity toward the context (i.e., binding affinity to the protein or high inhibitory effect for the cell profile).
21
+
22
+ **Developers**: Jannis Born, Matteo Manica and colleagues from IBM Research.
23
+
24
+ **Distributors**: Original authors' code wrapped and distributed by GT4SD Team (2023) from IBM Research.
25
+
26
+ **Model date**: Published in 2021.
27
+
28
+ **Model version**: Models trained and distribuetd by the original authors.
29
+ - **Protein_v0**: Molecular decoder pretrained on 1.5M molecules from ChEMBL. Protein encoder pretrained on 404k proteins from UniProt. Encoder and decoder finetuned on 41 SARS-CoV-2-related protein targets with a binding affinity predictor trained on BindingDB.
30
+ - **Omic_v0**: Molecular decoder pretrained on 1.5M molecules from ChEMBL. Gene expression encoder pretrained on 12k gene expression profiles from TCGA. Encoder and decoder finetuned on a few hundred cancer cell profiles from GDSC with a IC50 predictor trained on GDSC.
31
+
32
+ **Model type**: A language-based molecular generative model that can be optimized with RL to generate molecules with high affinity toward a context.
33
+
34
+ **Information about training algorithms, parameters, fairness constraints or other applied approaches, and features**:
35
+ - **Protein**: Parameters as provided on [(GitHub repo)](https://github.com/PaccMann/paccmann_sarscov2).
36
+ - **Omics**: Parameters as provided on [(GitHub repo)](https://github.com/PaccMann/paccmann_rl).
37
+
38
+ **Paper or other resource for more information**:
39
+ - **Protein**: [PaccMannRL: De novo generation of hit-like anticancer molecules from transcriptomic data via reinforcement learning (2021; *iScience*)](https://www.cell.com/iscience/fulltext/S2589-0042(21)00237-6).
40
+ - **Omics**: [Data-driven molecular design for discovery and synthesis of novel ligands: a case study on SARS-CoV-2 (2021; *Machine Learning: Science and Technology*)](https://iopscience.iop.org/article/10.1088/2632-2153/abe808/meta).
41
+
42
+ **License**: MIT
43
+
44
+ **Where to send questions or comments about the model**: Open an issue on [GT4SD repository](https://github.com/GT4SD/gt4sd-core).
45
+
46
+ **Intended Use. Use cases that were envisioned during development**: Chemical research, in particular drug discovery.
47
+
48
+ **Primary intended uses/users**: Researchers and computational chemists using the model for model comparison or research exploration purposes.
49
+
50
+ **Out-of-scope use cases**: Production-level inference, producing molecules with harmful properties.
51
+
52
+ **Factors**: Not applicable.
53
+
54
+ **Metrics**: High reward on generating molecules with high affinity toward context.
55
+
56
+ **Datasets**: ChEMBL, UniProt, GDSC and BindingDB (see above).
57
+
58
+ **Ethical Considerations**: Unclear, please consult with original authors in case of questions.
59
+
60
+ **Caveats and Recommendations**: Unclear, please consult with original authors in case of questions.
61
+
62
+ Model card prototype inspired by [Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)
63
+
64
+ ## Citation
65
+
66
+ **Omics**:
67
+ ```bib
68
+ @article{born2021paccmannrl,
69
+ title = {PaccMann\textsuperscript{RL}: De novo generation of hit-like anticancer molecules from transcriptomic data via reinforcement learning},
70
+ journal = {iScience},
71
+ volume = {24},
72
+ number = {4},
73
+ pages = {102269},
74
+ year = {2021},
75
+ issn = {2589-0042},
76
+ doi = {https://doi.org/10.1016/j.isci.2021.102269},
77
+ url = {https://www.cell.com/iscience/fulltext/S2589-0042(21)00237-6},
78
+ author = {Born, Jannis and Manica, Matteo and Oskooei, Ali and Cadow, Joris and Markert, Greta and {Rodr{\'{i}}guez Mart{\'{i}}nez}, Mar{\'{i}}a}
79
+ }
80
+ ```
81
+
82
+ **Proteins**:
83
+ ```bib
84
+ @article{born2021datadriven,
85
+ author = {Born, Jannis and Manica, Matteo and Cadow, Joris and Markert, Greta and Mill, Nil Adell and Filipavicius, Modestas and Janakarajan, Nikita and Cardinale, Antonio and Laino, Teodoro and {Rodr{\'{i}}guez Mart{\'{i}}nez}, Mar{\'{i}}a},
86
+ doi = {10.1088/2632-2153/abe808},
87
+ issn = {2632-2153},
88
+ journal = {Machine Learning: Science and Technology},
89
+ number = {2},
90
+ pages = {025024},
91
+ title = {{Data-driven molecular design for discovery and synthesis of novel ligands: a case study on SARS-CoV-2}},
92
+ url = {https://iopscience.iop.org/article/10.1088/2632-2153/abe808},
93
+ volume = {2},
94
+ year = {2021}
95
+ }
96
+ ```
model_cards/description.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ <img align="right" src="https://raw.githubusercontent.com/GT4SD/gt4sd-core/main/docs/_static/gt4sd_logo.png" alt="logo" width="120" >
2
+
3
+ [PaccMann<sup>RL</sup>](https://github.com/PaccMann/paccmann_rl) is a language-based molecular generative model that can be conditioned (primed) on protein targets or gene expression profiles and produces molecules with high affinity toward the context vector. This model has been developed at IBM Research and is distributed by the **GT4SD** (Generative Toolkit for Scientific Discovery) team. For details please see the two publications:
4
+ - [Born et al., (2021), *iScience*](https://www.cell.com/iscience/fulltext/S2589-0042(21)00237-6) for the model conditionable on gene expression profiles.
5
+ - [Born et al., (2021), *Machine Learning: Science & Technology*](https://iopscience.iop.org/article/10.1088/2632-2153/abe808/meta) for the model conditionable on protein targets.
6
+
7
+
8
+ For **examples** and **documentation** of the model parameters, please see below.
9
+ Moreover, we provide a **model card** ([Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)) at the bottom of this page.
model_cards/examples.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Protein_v0,Conditional,MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTT,,1.2,100,10
2
+ Protein_v0,Unbiased,,,1.4,250,10
3
+ Omic_v0,Conditional,,0.08 0.9 0.47 0.91 0.7 0.88 0.95 0.37 0.72 0.42 0.63 0.77 0.65 0.83 0.48 0.31 0.36 0.33 0.64 0.33 1.0 0.82 0.49 0.98 0.96 0.86 0.1 0.92 0.13 0.41 0.88 0.79 0.88 0.01 0.3 0.98 0.91 0.83 0.06 0.77 0.56 0.87 0.78 0.27 0.97 0.14 0.71 0.1 0.08 0.63 0.53 0.6 0.66 0.04 0.46 0.6 0.59 0.36 0.65 0.57 0.96 0.42 0.37 0.18 0.71 0.5 0.54 0.22 0.21 0.53 0.66 0.9 0.4 0.95 0.48 0.81 0.47 0.27 0.56 0.77 0.32 0.66 0.01 0.82 0.29 0.81 0.7 0.77 0.65 0.36 0.78 0.31 0.85 0.69 0.12 0.04 0.39 0.11 0.13 0.15 0.35 0.97 0.66 0.35 0.78 0.33 0.48 0.8 0.26 0.05 0.69 0.07 0.92 0.22 0.35 0.13 0.22 0.94 0.73 0.81 0.29 0.3 0.13 0.06 0.9 0.62 0.19 0.69 0.72 0.55 0.34 0.26 0.72 0.95 0.81 0.78 0.5 0.47 0.67 0.49 0.48 0.75 0.52 0.91 0.42 0.62 0.8 0.17 1.0 0.35 0.63 0.02 0.79 0.67 0.99 0.86 0.71 0.15 0.13 0.54 0.19 0.81 0.56 0.98 0.16 0.15 0.69 0.17 0.66 0.74 0.65 0.9 0.73 0.61 0.69 0.19 0.04 0.72 0.41 0.35 0.93 0.91 0.34 0.35 0.92 0.45 0.34 0.52 0.73 0.39 0.54 0.83 0.99 0.68 0.16 0.6 0.48 0.18 0.96 0.7 0.18 0.77 0.6 0.07 0.99 0.97 0.41 0.25 0.98 0.85 0.95 0.59 0.77 0.18 0.22 0.39 0.33 0.46 0.07 0.16 0.81 0.0 0.53 0.49 0.9 0.57 0.03 0.26 0.24 0.57 0.63 0.88 0.57 0.73 0.6 0.71 0.29 0.25 0.94 0.23 0.93 0.07 0.35 0.59 0.66 0.51 0.25 0.51 0.47 0.04 0.85 0.15 0.4 0.51 0.0 0.29 0.29 0.07 0.14 0.77 0.1 0.31 0.95 0.52 0.48 0.24 0.71 0.27 0.93 0.77 0.04 0.92 0.08 0.92 0.68 0.32 0.15 0.77 0.63 0.73 0.14 0.83 0.76 0.96 0.72 0.57 0.92 0.35 0.62 0.21 0.46 0.66 0.89 0.52 0.35 0.71 0.0 0.78 0.51 0.34 0.05 0.57 0.34 0.54 0.57 0.81 0.88 0.61 0.53 0.98 0.26 0.34 0.57 0.94 0.09 0.94 0.15 0.81 0.15 0.83 0.83 0.73 0.33 0.69 0.89 0.46 0.96 0.12 0.82 0.89 0.45 0.26 0.84 0.48 0.51 0.43 0.12 0.74 0.32 0.19 0.8 0.04 0.61 0.63 0.23 0.22 0.7 0.14 0.63 0.35 0.89 0.4 0.1 0.1 0.56 0.98 0.7 0.41 0.78 0.14 0.04 0.97 0.32 0.66 0.54 0.66 0.8 0.86 0.36 0.99 0.01 0.41 0.62 0.81 0.14 0.84 0.49 0.3 0.4 0.13 0.2 0.05 0.29 0.11 0.75 0.87 0.71 0.25 0.43 0.67 0.49 0.2 0.77 0.85 0.32 0.94 0.51 0.95 0.54 0.22 0.7 0.97 0.71 0.24 0.88 0.9 0.61 0.99 0.57 0.25 0.01 0.09 0.83 0.83 0.89 0.58 0.95 0.86 0.06 0.88 0.27 0.12 0.7 0.17 0.23 0.43 0.61 0.51 0.65 0.02 0.19 0.61 0.69 0.14 0.89 0.3 0.86 0.55 0.06 0.46 0.78 0.82 0.34 0.63 0.38 0.12 0.15 0.45 0.93 0.08 0.54 0.94 0.64 0.74 0.4 0.23 0.18 0.27 0.44 0.6 0.82 0.19 0.13 0.48 0.19 0.99 0.66 0.69 0.86 0.47 0.15 0.94 0.53 0.07 0.61 0.44 0.62 0.85 0.16 0.66 0.58 0.63 0.55 0.38 0.02 0.68 0.91 0.89 0.63 0.25 0.58 0.93 0.52 0.7 0.64 0.81 0.47 0.21 0.18 0.17 0.78 0.46 0.31 0.2 0.31 0.37 0.66 0.46 0.11 1.0 0.21 0.39 0.12 0.36 0.83 0.52 0.76 0.23 0.62 0.17 0.21 0.07 0.78 0.12 0.59 0.76 0.33 0.49 0.13 0.67 0.44 0.92 0.84 0.18 0.73 0.81 0.68 0.27 0.28 0.14 0.23 0.98 0.07 0.34 0.2 0.78 0.44 0.27 0.7 0.88 0.28 0.96 0.07 0.33 0.65 0.9 0.99 0.75 0.32 0.68 0.54 0.57 0.28 0.57 0.96 0.91 0.0 0.0 0.32 0.66 0.08 0.7 0.14 0.88 0.91 0.85 0.17 0.91 0.31 0.47 0.69 0.41 0.8 0.08 0.59 0.66 0.79 0.82 0.28 0.11 0.05 0.11 0.61 0.66 0.25 0.32 0.53 0.8 0.11 0.5 0.6 0.73 0.31 0.11 0.2 1.0 0.79 0.88 0.77 0.37 0.51 0.25 0.89 0.79 0.8 0.79 0.96 0.45 0.36 0.14 0.64 0.85 0.75 0.23 0.64 0.23 0.64 0.41 0.76 0.78 0.13 0.37 0.48 0.61 0.32 0.58 0.98 0.58 0.27 0.06 0.78 0.05 0.56 0.14 0.57 0.2 0.68 0.61 0.58 0.36 0.39 0.99 0.63 0.12 0.82 0.05 0.54 0.96 0.27 0.2 0.94 0.03 0.55 0.9 0.47 0.61 0.83 0.72 0.9 0.94 0.53 0.11 0.57 0.96 0.64 0.35 0.81 0.72 0.59 0.45 0.85 0.98 0.44 0.08 0.12 0.5 0.17 0.31 0.8 0.49 0.13 0.63 0.83 0.32 0.22 0.13 0.76 0.18 0.4 0.81 0.65 0.02 0.94 0.39 0.0 0.58 0.96 0.93 0.33 0.22 0.12 0.78 0.22 0.65 0.82 0.83 0.79 0.09 0.86 0.55 0.16 0.95 0.76 0.22 0.06 0.21 0.58 0.63 0.31 0.21 0.99 0.19 0.13 0.68 0.33 0.82 0.91 0.42 0.37 0.55 0.66 0.29 0.36 0.75 0.62 1.0 0.71 0.21 0.17 0.73 0.23 0.6 0.99 0.85 0.22 0.58 0.4 0.97 0.46 0.69 0.19 0.78 0.26 0.0 0.74 0.43 0.17 0.05 0.74 0.46 0.23 0.64 0.13 0.47 0.14 0.54 0.48 0.88 0.64 0.23 0.48 0.82 0.81 0.56 0.99 0.07 0.07 0.53 0.74 0.67 0.52 0.66 0.14 0.52 0.46 0.85 0.44 0.05 0.13 0.56 0.38 0.57 0.15 0.84 0.99 0.97 0.0 0.12 0.07 0.79 0.29 0.02 0.54 0.39 0.26 0.28 0.44 0.88 0.62 0.63 0.16 0.67 0.66 0.03 0.97 0.83 0.95 0.84 0.95 0.56 0.67 0.38 0.71 0.16 0.43 0.29 0.34 0.71 0.44 0.63 0.7 0.11 0.72 0.23 0.94 0.02 0.33 0.33 0.92 0.35 0.31 0.17 0.36 0.91 0.75 0.1 0.65 0.83 0.79 0.58 0.43 0.8 0.19 0.64 0.3 0.57 0.01 0.41 0.9 0.46 0.31 0.88 0.19 0.02 0.75 0.07 0.45 0.18 0.25 0.01 0.97 0.75 0.64 0.23 0.34 0.07 0.21 0.22 0.02 0.92 0.02 0.69 0.1 0.86 0.05 0.02 0.81 0.96 0.85 0.13 0.55 0.99 0.49 0.89 0.13 0.52 0.91 0.69 0.97 0.95 0.81 0.12 0.92 0.44 0.89 0.57 0.47 0.47 0.78 0.12 0.26 0.24 0.44 0.74 0.43 0.06 0.32 0.89 0.03 0.64 0.18 0.22 0.25 0.14 0.24 0.72 0.96 0.72 0.96 0.52 0.7 0.66 0.88 0.25 0.91 0.14 0.52 0.7 0.56 0.59 0.43 0.21 0.8 0.67 0.33 0.63 0.55 0.55 0.92 0.16 0.31 0.61 0.29 0.9 0.06 0.69 0.89 0.12 0.58 0.74 0.83 0.8 0.14 0.04 0.69 0.28 0.62 0.77 0.11 0.62 0.18 0.59 0.17 0.58 0.1 0.08 0.61 0.46 0.2 0.6 0.94 0.65 0.1 0.47 0.35 0.51 0.8 0.2 0.06 0.86 1.0 0.73 0.43 0.41 0.88 0.46 0.83 0.5 0.15 0.22 0.85 0.79 0.5 0.67 0.99 0.89 0.75 0.82 0.07 0.45 0.54 0.82 0.34 0.01 0.97 0.41 0.53 0.18 0.56 0.02 0.63 0.64 0.21 0.84 0.25 0.41 0.46 0.73 0.91 0.71 0.16 0.01 0.09 0.95 0.7 0.45 0.86 0.9 0.04 0.98 0.66 0.93 0.58 0.37 0.62 0.73 0.37 0.3 0.71 0.95 0.41 0.79 0.45 0.71 0.57 0.24 0.43 0.07 0.85 0.53 0.57 0.58 0.45 0.82 0.92 0.17 0.23 0.29 0.62 0.03 0.36 0.68 0.5 0.69 0.07 0.07 0.36 0.94 0.06 0.4 0.93 0.48 0.17 0.78 0.66 0.45 0.82 0.93 0.99 0.51 0.19 0.32 0.47 0.69 0.19 0.35 0.19 0.62 0.34 0.52 0.42 0.76 0.05 0.9 0.53 0.59 0.52 0.43 0.73 0.43 0.37 0.09 0.47 0.59 0.78 0.83 0.85 0.21 0.95 0.47 0.87 0.43 0.95 0.18 0.13 0.95 0.79 0.62 0.02 0.79 0.28 0.87 0.71 0.13 0.53 0.02 0.73 0.6 0.13 0.75 0.07 0.02 0.34 0.58 0.55 0.4 0.42 0.46 0.43 0.98 0.86 0.31 0.77 0.64 0.97 0.6 0.91 0.94 0.9 0.34 0.78 0.0 0.49 0.17 0.86 0.47 0.3 0.62 0.33 0.86 0.62 0.65 0.36 0.4 0.08 0.67 0.92 0.76 0.87 0.61 0.41 0.3 0.65 0.25 0.37 0.3 0.57 0.77 0.64 0.1 0.3 0.6 0.52 0.45 0.1 0.02 0.83 0.57 0.41 0.46 0.55 0.41 0.77 0.39 0.03 0.0 0.9 0.42 0.22 0.73 0.48 0.94 0.15 0.14 0.32 0.65 0.6 0.03 0.64 0.15 0.42 0.96 0.41 0.53 0.43 0.3 0.76 0.93 0.32 0.53 0.62 0.31 0.54 0.2 0.66 0.68 0.39 0.01 0.99 0.25 0.71 0.19 0.52 0.93 0.96 0.68 1.0 0.4 0.66 0.64 0.09 0.28 0.47 0.01 0.99 0.36 0.09 0.57 0.79 0.41 0.35 0.3 0.5 0.28 0.71 0.27 0.13 0.06 0.46 0.39 0.37 0.88 0.99 0.3 0.09 0.01 0.98 0.74 0.12 0.01 0.15 0.64 0.68 0.27 0.09 0.89 0.3 0.64 0.34 0.44 0.71 0.01 0.0 0.33 0.12 0.05 0.74 0.81 0.49 0.45 0.94 0.86 0.58 0.56 0.07 0.91 0.54 0.64 0.82 0.17 0.69 0.7 0.99 0.35 0.62 0.6 0.93 0.38 0.32 0.01 0.79 0.62 0.97 0.74 0.71 0.54 0.08 0.01 0.09 0.95 0.53 0.52 0.15 0.18 0.38 0.71 0.57 0.2 0.87 1.0 0.43 0.93 0.49 0.65 0.42 0.29 0.63 0.53 0.34 0.84 0.23 0.38 0.51 0.88 0.07 0.17 0.9 0.13 0.83 0.54 0.54 0.07 0.49 0.83 0.94 0.04 0.79 0.18 0.46 0.51 0.73 0.68 0.04 0.89 0.4 0.16 0.9 0.36 0.73 0.36 0.39 0.42 0.03 0.6 0.85 0.2 0.88 0.64 0.07 0.04 0.58 0.11 0.36 0.19 0.12 0.74 0.54 0.65 0.37 0.31 0.78 0.94 0.02 0.56 0.72 0.18 0.03 0.12 0.3 0.55 0.74 0.22 0.14 0.42 0.23 0.71 0.78 0.66 0.82 0.12 0.83 0.73 0.7 0.22 0.89 0.81 0.34 0.61 0.2 0.68 0.22 0.84 0.03 0.99 0.06 0.23 0.68 0.71 0.41 0.97 0.04 0.78 0.88 0.8 0.72 0.63 0.68 0.94 0.58 0.07 0.53 0.51 0.04 0.45 0.19 0.05 0.23 0.67 0.13 0.41 0.62 0.18 0.01 0.34 0.91 0.88 0.21 0.71 0.47 0.61 0.51 0.65 0.95 0.33 0.0 0.16 0.56 0.21 0.06 0.06 0.06 0.8 0.39 0.83 0.29 0.04 0.74 0.27 0.25 0.35 0.78 0.44 0.23 0.95 0.97 0.89 0.83 0.85 0.41 0.95 0.69 0.09 0.91 0.63 0.96 0.76 0.16 0.75 0.41 0.83 0.63 0.83 0.86 0.82 0.04 0.32 0.3 0.21 0.39 0.48 0.8 0.21 0.4 0.96 0.71 0.63 0.54 0.95 0.81 0.11 0.83 0.63 0.41 0.33 0.32 0.58 0.72 0.82 0.73 0.01 0.5 0.93 0.69 0.91 0.44 0.18 0.28 0.61 0.5 0.98 0.93 0.91 0.72 0.59 0.63 0.03 0.82 0.62 0.07 0.51 0.53 0.89 0.47 0.04 0.08 0.17 0.2 0.88 0.78 0.93 0.71 0.24 0.22 0.32 0.87 0.03 0.01 0.85 0.77 0.82 0.64 0.2 0.83 0.88 0.23 0.44 0.72 0.2 0.98 0.11 0.46 0.59 0.3 0.82 0.01 0.66 0.8 0.91 0.0 0.86 0.84 0.56 0.49 0.22 0.27 0.02 0.62 0.55 0.62 0.79 0.94 0.89 0.56 0.87 0.96 0.43 0.58 0.63 0.22 0.37 0.44 0.85 0.28 0.25 0.4 0.34 0.14 0.8 0.84 0.89 0.06 0.45 0.02 0.07 0.85 0.43 0.13 0.21 0.21 0.05 0.23 0.85 0.44 0.8 0.52 0.39 0.65 0.67 0.64 0.79 0.3 0.01 0.3 0.11 0.02 0.96 0.05 0.44 0.06 0.01 0.77 0.19 0.06 0.31 0.48 0.97 0.64 0.92 0.76 0.07 0.77 0.95 0.98 0.63 0.25 0.27 0.76 0.96 0.24 0.18 0.8 0.0 0.96 0.24 0.52 0.59 0.65 0.17 0.32 0.55 0.59 0.62 0.82 0.59 0.29 0.42 0.12 0.24 0.02 0.66 0.59 0.78 0.37 0.19 0.96 0.18 0.2 0.99 0.76 0.58 0.35 0.54 0.89 0.14 0.58 0.1 0.97 0.38 0.82 0.48 0.06 0.83 1.0 0.99 0.77 0.41 0.08 0.87 0.75 0.13 0.52 0.58 0.68 0.03 0.92 0.55 0.04 0.56 0.63 0.28 0.8 0.39 0.68 0.58 0.01 0.23 0.28 0.98 0.96 0.05 0.28 0.44 0.31 0.91 0.81 0.18 0.65 0.53 0.02 0.41 0.98 0.09 0.12 0.84 0.6 0.17 0.2 0.58 0.35 0.25 0.74 0.83 0.55 0.18 0.8 0.33 0.04 0.56 0.85 0.22 0.83 0.48 0.53 0.54 0.51 0.06 0.76 0.1 0.43 0.21 0.46 0.97 0.48 0.77 0.11 0.36 0.9 0.52 0.06 0.23 0.8 0.09 0.11 0.57 0.59 0.76 0.44 0.15 0.46 0.07 0.86 0.01 0.49 0.05 0.54 0.14 0.29 0.01 0.81 0.45 0.45 0.12 0.82 0.47 0.93 0.51 0.04 0.26 0.14 0.5 0.06 0.25 0.62 0.95 0.07 0.28 0.32 0.03 0.28 0.45 0.86 0.24 0.22 0.78 0.63 0.4 0.33 0.56 0.26 0.41 0.63 0.73 0.73 0.35 0.44 0.67 0.03 0.07 0.68 0.86 0.35 0.58 0.75 0.16 0.37 0.87 0.66 0.59 0.67 0.46 0.64 0.78 0.97 0.45 0.98 0.64 0.41 0.58 0.51 0.97 0.95 0.9 0.34 0.1 0.76 0.37 0.05 0.57 0.72 0.91 0.4 0.43 0.78 0.78 0.39 0.3 0.21 0.88 0.36 0.54 0.87 0.84 0.19 0.22 0.89 0.89 0.85 0.77 0.86 0.46 0.5 0.88 0.18 0.4 0.61 0.07 0.06 0.65 0.05 0.31 0.55 0.87 0.05 0.54 0.28 0.28 0.35 0.1 0.55 0.82 0.86 0.12 0.17 0.69 0.74 0.13 0.08 0.6 0.4 0.97 0.32 0.81 0.14 0.97 0.65 0.72 0.32 0.57 0.69 0.74 0.65 0.75 0.37 0.88 0.97 0.88 0.7 0.98 0.36 0.1 0.35 0.15 0.23 0.09 0.3 1.0 0.21 0.99 0.44 0.23 0.21 0.15 0.43 0.77 0.17 0.32 0.55 0.8 0.08 0.72 0.49 0.31 0.39 0.48 0.29 0.78 0.64 0.04 0.11 0.69 0.76 0.9 0.79 0.32 0.03 0.68 0.67 0.35 0.55 0.01 0.03 0.22 0.31 0.3 0.28 0.14 0.01 0.73 0.86 0.67 0.06 0.45 0.32 0.78 0.22 0.84 0.19 0.29 0.8 0.61 0.23 0.71 0.94 0.04 0.86 0.87 0.88 0.65 0.04 0.93 0.1 0.73 0.38 0.88 0.8 0.54 0.62 0.2 0.76 0.66 0.46 0.0 0.32 0.38 0.92 0.85 0.84 0.9 0.85 0.08 0.32 0.98 0.57 0.72 0.48 0.86 0.23 1.0 0.56 0.48 0.13 0.61 0.46 0.38 0.58 0.06 0.95 0.37 0.94 0.11 0.44 0.53 0.26 0.98 0.67 0.28 0.65 0.28 0.48 0.52 0.58 0.01 0.1 0.03 0.29 0.14 0.33 0.5 0.98 0.99 0.68 0.28 0.12 0.6 0.65 0.77 0.69 0.66 0.5 0.76 0.79 0.79 0.64 0.67 0.35 0.78 0.71 0.47 0.5 0.79 0.69 0.13 0.18 0.89 0.29 0.79 0.92 0.54,1.2,100,10
requirements.txt ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -f https://download.pytorch.org/whl/cpu/torch_stable.html
2
+ -f https://data.pyg.org/whl/torch-1.12.1+cpu.html
3
+ # pip==20.2.4
4
+ torch==1.12.1
5
+ torch-scatter
6
+ torch-spline-conv
7
+ torch-sparse
8
+ torch-geometric
9
+ torchvision==0.13.1
10
+ torchaudio==0.12.1
11
+ gt4sd>=1.0.5
12
+ molgx>=0.22.0a1
13
+ molecule_generation
14
+ nglview
15
+ PyTDC==0.3.7
16
+ gradio==3.12.0
17
+ markdown-it-py>=2.1.0
18
+ mols2grid>=0.2.0
19
+ numpy==1.23.5
20
+ pandas>=1.0.0
21
+ terminator @ git+https://github.com/IBM/regression-transformer@gt4sd
22
+ guacamol_baselines @ git+https://github.com/GT4SD/[email protected]
23
+ moses @ git+https://github.com/GT4SD/[email protected]
24
+ paccmann_chemistry @ git+https://github.com/PaccMann/[email protected]
25
+ paccmann_generator @ git+https://github.com/PaccMann/[email protected]
26
+ paccmann_gp @ git+https://github.com/PaccMann/[email protected]
27
+ paccmann_omics @ git+https://github.com/PaccMann/[email protected]
28
+ paccmann_predictor @ git+https://github.com/PaccMann/paccmann_predictor@sarscov2
29
+ reinvent_models @ git+https://github.com/GT4SD/[email protected]
utils.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from collections import defaultdict
3
+ from typing import List
4
+
5
+ import mols2grid
6
+ import pandas as pd
7
+
8
+ logger = logging.getLogger(__name__)
9
+ logger.addHandler(logging.NullHandler())
10
+
11
+
12
+ def draw_grid_generate(
13
+ samples: List[str],
14
+ seeds: List[str] = [],
15
+ n_cols: int = 3,
16
+ size=(140, 200),
17
+ ) -> str:
18
+ """
19
+ Uses mols2grid to draw a HTML grid for the generated molecules
20
+
21
+ Args:
22
+ samples: The generated samples.
23
+ n_cols: Number of columns in grid. Defaults to 5.
24
+ size: Size of molecule in grid. Defaults to (140, 200).
25
+
26
+ Returns:
27
+ HTML to display
28
+ """
29
+
30
+ result = defaultdict(list)
31
+ result.update(
32
+ {
33
+ "SMILES": seeds + samples,
34
+ "Name": [f"Seed_{i}" for i in range(len(seeds))]
35
+ + [f"Generated_{i}" for i in range(len(samples))],
36
+ },
37
+ )
38
+
39
+ result_df = pd.DataFrame(result)
40
+ obj = mols2grid.display(
41
+ result_df,
42
+ tooltip=list(result.keys()),
43
+ height=1100,
44
+ n_cols=n_cols,
45
+ name="Results",
46
+ size=size,
47
+ )
48
+ return obj.data