jannisborn commited on
Commit
997984a
0 Parent(s):

Duplicate from jannisborn/gt4sd-polymer-blocks

Browse files
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__/
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Generative Toolkit 4 Scientific Discovery
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: GT4SD - Polymer Blocks
3
+ emoji: 💡
4
+ colorFrom: green
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 3.9.1
8
+ app_file: app.py
9
+ pinned: false
10
+ python_version: 3.8.13
11
+ pypi_version: 20.2.4
12
+ duplicated_from: jannisborn/gt4sd-polymer-blocks
13
+ ---
14
+
15
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import pathlib
3
+ import gradio as gr
4
+ import pandas as pd
5
+ from gt4sd.algorithms.generation.polymer_blocks import (
6
+ PolymerBlocksGenerator,
7
+ PolymerBlocks,
8
+ )
9
+
10
+ from gt4sd.algorithms.registry import ApplicationsRegistry
11
+
12
+ from utils import draw_grid_generate
13
+
14
+ logger = logging.getLogger(__name__)
15
+ logger.addHandler(logging.NullHandler())
16
+
17
+
18
+ def run_inference(algorithm_version: str, length: float, number_of_samples: int):
19
+
20
+ config = PolymerBlocksGenerator(
21
+ algorithm_version=algorithm_version,
22
+ batch_size=32,
23
+ generated_length=length,
24
+ )
25
+ model = PolymerBlocks(config)
26
+ samples = list(model.sample(number_of_samples))
27
+
28
+ return draw_grid_generate(samples=samples, n_cols=5, seeds=[])
29
+
30
+
31
+ if __name__ == "__main__":
32
+
33
+ # Preparation (retrieve all available algorithms)
34
+ all_algos = ApplicationsRegistry.list_available()
35
+ algos = [
36
+ x["algorithm_version"]
37
+ for x in list(
38
+ filter(lambda x: "PolymerBlocks" in x["algorithm_name"], all_algos)
39
+ )
40
+ ]
41
+
42
+ # Load metadata
43
+ metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
44
+
45
+ examples = pd.read_csv(metadata_root.joinpath("examples.csv"), header=None).fillna(
46
+ ""
47
+ )
48
+
49
+ with open(metadata_root.joinpath("article.md"), "r") as f:
50
+ article = f.read()
51
+ with open(metadata_root.joinpath("description.md"), "r") as f:
52
+ description = f.read()
53
+
54
+ demo = gr.Interface(
55
+ fn=run_inference,
56
+ title="Polymer Blocks",
57
+ inputs=[
58
+ gr.Dropdown(algos, label="Algorithm version", value="v0"),
59
+ gr.Slider(
60
+ minimum=5,
61
+ maximum=400,
62
+ value=100,
63
+ label="Maximal sequence length",
64
+ step=1,
65
+ ),
66
+ gr.Slider(
67
+ minimum=1, maximum=50, value=10, label="Number of samples", step=1
68
+ ),
69
+ ],
70
+ outputs=gr.HTML(label="Output"),
71
+ article=article,
72
+ description=description,
73
+ examples=examples.values.tolist(),
74
+ )
75
+ demo.launch(debug=True, show_error=True)
model_cards/article.md ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model documentation & parameters
2
+
3
+ **Algorithm Version**: Which model version to use.
4
+
5
+ **Maximal sequence length**: The maximal number of SMILES tokens in the generated molecule.
6
+
7
+ **Number of samples**: How many samples should be generated (between 1 and 50).
8
+
9
+
10
+
11
+ # Model card -- PolymerBlocks
12
+
13
+ **Model Details**: *PolymerBlocks* is a sequence-based molecular generator tuned to generate blocks of polymers (e.g., catalysts and monomers). The model relies on a Variational Autoencoder architecture as described in [Born et al. (2021; *iScience*)](https://www.sciencedirect.com/science/article/pii/S2589004221002376)
14
+
15
+ **Developers**: Matteo Manica and colleagues from IBM Research.
16
+
17
+ **Distributors**: Original authors' code integrated into GT4SD.
18
+
19
+ **Model date**: Not yet published.
20
+
21
+ **Model version**: Only initial model version.
22
+
23
+ **Model type**: A sequence-based molecular generator tuned to generate blocks of polymers (e.g., catalysts and monomers).
24
+
25
+ **Information about training algorithms, parameters, fairness constraints or other applied approaches, and features**:
26
+ N.A.
27
+
28
+ **Paper or other resource for more information**:
29
+ TBD
30
+
31
+ **License**: MIT
32
+
33
+ **Where to send questions or comments about the model**: Open an issue on [GT4SD repository](https://github.com/GT4SD/gt4sd-core).
34
+
35
+ **Intended Use. Use cases that were envisioned during development**: Chemical research, in particular drug discovery.
36
+
37
+ **Primary intended uses/users**: Researchers and computational chemists using the model for model comparison or research exploration purposes.
38
+
39
+ **Out-of-scope use cases**: Production-level inference, producing molecules with harmful properties.
40
+
41
+ **Metrics**: N.A.
42
+
43
+ **Datasets**: N.A.
44
+
45
+ **Ethical Considerations**: Unclear, please consult with original authors in case of questions.
46
+
47
+ **Caveats and Recommendations**: Unclear, please consult with original authors in case of questions.
48
+
49
+ Model card prototype inspired by [Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)
50
+
51
+ ## Citation
52
+ TBD, temporarily please cite:
53
+ ```bib
54
+ @article{manica2022gt4sd,
55
+ title={GT4SD: Generative Toolkit for Scientific Discovery},
56
+ author={Manica, Matteo and Cadow, Joris and Christofidellis, Dimitrios and Dave, Ashish and Born, Jannis and Clarke, Dean and Teukam, Yves Gaetan Nana and Hoffman, Samuel C and Buchan, Matthew and Chenthamarakshan, Vijil and others},
57
+ journal={arXiv preprint arXiv:2207.03928},
58
+ year={2022}
59
+ }
60
+ ```
model_cards/description.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <img align="right" src="https://raw.githubusercontent.com/GT4SD/gt4sd-core/main/docs/_static/gt4sd_logo.png" alt="logo" width="120" >
2
+
3
+ *PolymerBlocks* is a sequence-based molecular generator tuned to generate blocks of polymers (e.g., catalysts and monomers). The model relies on a Variational Autoencoder architecture as described in [Born et al. (2021; *iScience*)](https://www.sciencedirect.com/science/article/pii/S2589004221002376)
4
+
5
+ For **examples** and **documentation** of the model parameters, please see below.
6
+ Moreover, we provide a **model card** ([Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)) at the bottom of this page.
model_cards/examples.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ v0,100,10
requirements.txt ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -f https://download.pytorch.org/whl/cpu/torch_stable.html
2
+ -f https://data.pyg.org/whl/torch-1.12.1+cpu.html
3
+ # pip==20.2.4
4
+ torch==1.12.1
5
+ torch-scatter
6
+ torch-spline-conv
7
+ torch-sparse
8
+ torch-geometric
9
+ torchvision==0.13.1
10
+ torchaudio==0.12.1
11
+ gt4sd>=1.0.5
12
+ molgx>=0.22.0a1
13
+ molecule_generation
14
+ nglview
15
+ PyTDC==0.3.7
16
+ gradio==3.12.0
17
+ markdown-it-py>=2.1.0
18
+ mols2grid>=0.2.0
19
+ numpy==1.23.5
20
+ pandas>=1.0.0
21
+ terminator @ git+https://github.com/IBM/regression-transformer@gt4sd
22
+ guacamol_baselines @ git+https://github.com/GT4SD/[email protected]
23
+ moses @ git+https://github.com/GT4SD/[email protected]
24
+ paccmann_chemistry @ git+https://github.com/PaccMann/[email protected]
25
+ paccmann_generator @ git+https://github.com/PaccMann/[email protected]
26
+ paccmann_gp @ git+https://github.com/PaccMann/[email protected]
27
+ paccmann_omics @ git+https://github.com/PaccMann/[email protected]
28
+ paccmann_predictor @ git+https://github.com/PaccMann/paccmann_predictor@sarscov2
29
+ reinvent_models @ git+https://github.com/GT4SD/[email protected]
utils.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from collections import defaultdict
3
+ from typing import List
4
+
5
+ import mols2grid
6
+ import pandas as pd
7
+
8
+ logger = logging.getLogger(__name__)
9
+ logger.addHandler(logging.NullHandler())
10
+
11
+
12
+ def draw_grid_generate(
13
+ samples: List[str],
14
+ seeds: List[str] = [],
15
+ n_cols: int = 3,
16
+ size=(140, 200),
17
+ ) -> str:
18
+ """
19
+ Uses mols2grid to draw a HTML grid for the generated molecules
20
+
21
+ Args:
22
+ samples: The generated samples.
23
+ n_cols: Number of columns in grid. Defaults to 5.
24
+ size: Size of molecule in grid. Defaults to (140, 200).
25
+
26
+ Returns:
27
+ HTML to display
28
+ """
29
+
30
+ result = defaultdict(list)
31
+ result.update(
32
+ {
33
+ "SMILES": seeds + samples,
34
+ "Name": [f"Seed_{i}" for i in range(len(seeds))]
35
+ + [f"Generated_{i}" for i in range(len(samples))],
36
+ },
37
+ )
38
+
39
+ result_df = pd.DataFrame(result)
40
+ obj = mols2grid.display(
41
+ result_df,
42
+ tooltip=list(result.keys()),
43
+ height=1100,
44
+ n_cols=n_cols,
45
+ name="Results",
46
+ size=size,
47
+ )
48
+ return obj.data