Spaces:
Sleeping
Sleeping
jannisborn
commited on
Commit
•
1634315
1
Parent(s):
39bc9b9
update
Browse files- README.md +2 -2
- app.py +21 -27
- model_cards/article.md +23 -28
- model_cards/description.md +1 -1
- model_cards/examples.csv +1 -5
- requirements.txt +1 -1
- utils.py +2 -6
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title: GT4SD -
|
3 |
emoji: 💡
|
4 |
colorFrom: green
|
5 |
colorTo: blue
|
@@ -9,7 +9,7 @@ app_file: app.py
|
|
9 |
pinned: false
|
10 |
python_version: 3.8.13
|
11 |
pypi_version: 20.2.4
|
12 |
-
duplicated_from: jannisborn/gt4sd-
|
13 |
---
|
14 |
|
15 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: GT4SD - Polymer Blocks
|
3 |
emoji: 💡
|
4 |
colorFrom: green
|
5 |
colorTo: blue
|
|
|
9 |
pinned: false
|
10 |
python_version: 3.8.13
|
11 |
pypi_version: 20.2.4
|
12 |
+
duplicated_from: jannisborn/gt4sd-paccmann-gp
|
13 |
---
|
14 |
|
15 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
@@ -1,39 +1,31 @@
|
|
1 |
import logging
|
2 |
import pathlib
|
3 |
-
|
4 |
import gradio as gr
|
5 |
import pandas as pd
|
6 |
-
from gt4sd.algorithms.generation.
|
|
|
|
|
|
|
7 |
|
8 |
from gt4sd.algorithms.registry import ApplicationsRegistry
|
|
|
9 |
from utils import draw_grid_generate
|
10 |
|
11 |
logger = logging.getLogger(__name__)
|
12 |
logger.addHandler(logging.NullHandler())
|
13 |
|
14 |
-
TITLE = "MoLeR"
|
15 |
|
|
|
16 |
|
17 |
-
|
18 |
-
algorithm_version: str,
|
19 |
-
scaffolds: str,
|
20 |
-
beam_size: int,
|
21 |
-
number_of_samples: int,
|
22 |
-
seed: int,
|
23 |
-
):
|
24 |
-
config = MoLeRDefaultGenerator(
|
25 |
algorithm_version=algorithm_version,
|
26 |
-
|
27 |
-
|
28 |
-
num_samples=4,
|
29 |
-
seed=seed,
|
30 |
-
num_workers=1,
|
31 |
)
|
32 |
-
model =
|
33 |
samples = list(model.sample(number_of_samples))
|
34 |
|
35 |
-
|
36 |
-
return draw_grid_generate(seed_mols, samples)
|
37 |
|
38 |
|
39 |
if __name__ == "__main__":
|
@@ -42,7 +34,9 @@ if __name__ == "__main__":
|
|
42 |
all_algos = ApplicationsRegistry.list_available()
|
43 |
algos = [
|
44 |
x["algorithm_version"]
|
45 |
-
for x in list(
|
|
|
|
|
46 |
]
|
47 |
|
48 |
# Load metadata
|
@@ -59,19 +53,19 @@ if __name__ == "__main__":
|
|
59 |
|
60 |
demo = gr.Interface(
|
61 |
fn=run_inference,
|
62 |
-
title="
|
63 |
inputs=[
|
64 |
gr.Dropdown(algos, label="Algorithm version", value="v0"),
|
65 |
-
gr.
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
|
|
69 |
),
|
70 |
-
gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Beam_size"),
|
71 |
gr.Slider(
|
72 |
minimum=1, maximum=50, value=10, label="Number of samples", step=1
|
73 |
),
|
74 |
-
gr.Number(value=42, label="Seed", precision=0),
|
75 |
],
|
76 |
outputs=gr.HTML(label="Output"),
|
77 |
article=article,
|
|
|
1 |
import logging
|
2 |
import pathlib
|
|
|
3 |
import gradio as gr
|
4 |
import pandas as pd
|
5 |
+
from gt4sd.algorithms.generation.polymer_blocks import (
|
6 |
+
PolymerBlocksGenerator,
|
7 |
+
PolymerBlocks,
|
8 |
+
)
|
9 |
|
10 |
from gt4sd.algorithms.registry import ApplicationsRegistry
|
11 |
+
|
12 |
from utils import draw_grid_generate
|
13 |
|
14 |
logger = logging.getLogger(__name__)
|
15 |
logger.addHandler(logging.NullHandler())
|
16 |
|
|
|
17 |
|
18 |
+
def run_inference(algorithm_version: str, length: float, number_of_samples: int):
|
19 |
|
20 |
+
config = PolymerBlocksGenerator(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
algorithm_version=algorithm_version,
|
22 |
+
batch_size=32,
|
23 |
+
generated_length=length,
|
|
|
|
|
|
|
24 |
)
|
25 |
+
model = PolymerBlocks(config)
|
26 |
samples = list(model.sample(number_of_samples))
|
27 |
|
28 |
+
return draw_grid_generate(samples=samples, n_cols=5, seeds=[])
|
|
|
29 |
|
30 |
|
31 |
if __name__ == "__main__":
|
|
|
34 |
all_algos = ApplicationsRegistry.list_available()
|
35 |
algos = [
|
36 |
x["algorithm_version"]
|
37 |
+
for x in list(
|
38 |
+
filter(lambda x: "PolymerBlocks" in x["algorithm_name"], all_algos)
|
39 |
+
)
|
40 |
]
|
41 |
|
42 |
# Load metadata
|
|
|
53 |
|
54 |
demo = gr.Interface(
|
55 |
fn=run_inference,
|
56 |
+
title="Polymer Blocks",
|
57 |
inputs=[
|
58 |
gr.Dropdown(algos, label="Algorithm version", value="v0"),
|
59 |
+
gr.Slider(
|
60 |
+
minimum=5,
|
61 |
+
maximum=400,
|
62 |
+
value=100,
|
63 |
+
label="Maximal sequence length",
|
64 |
+
step=1,
|
65 |
),
|
|
|
66 |
gr.Slider(
|
67 |
minimum=1, maximum=50, value=10, label="Number of samples", step=1
|
68 |
),
|
|
|
69 |
],
|
70 |
outputs=gr.HTML(label="Output"),
|
71 |
article=article,
|
model_cards/article.md
CHANGED
@@ -1,37 +1,36 @@
|
|
1 |
# Model documentation & parameters
|
2 |
|
3 |
-
**Algorithm Version**: Which model
|
4 |
|
5 |
-
**
|
6 |
|
7 |
**Number of samples**: How many samples should be generated (between 1 and 50).
|
8 |
|
9 |
-
**Beam size**: Beam size used in beam search decoding (the higher the slower but better).
|
10 |
|
11 |
-
**Seed**: The random seed used for initialization.
|
12 |
|
|
|
13 |
|
14 |
-
|
15 |
|
16 |
-
**
|
17 |
|
18 |
-
**
|
19 |
|
20 |
-
**
|
21 |
|
22 |
-
**Model
|
23 |
|
24 |
-
**Model
|
25 |
|
26 |
-
**
|
|
|
27 |
|
28 |
-
**
|
29 |
-
|
30 |
-
**Paper or other resource for more information**: [Learning to Extend Molecular Scaffolds with Structural Motifs (ICLR 2022)](https://openreview.net/forum?id=ZTsoE8G3GG).
|
31 |
|
32 |
**License**: MIT
|
33 |
|
34 |
-
**Where to send questions or comments about the model**: Open an issue on
|
35 |
|
36 |
**Intended Use. Use cases that were envisioned during development**: Chemical research, in particular drug discovery.
|
37 |
|
@@ -39,11 +38,9 @@
|
|
39 |
|
40 |
**Out-of-scope use cases**: Production-level inference, producing molecules with harmful properties.
|
41 |
|
42 |
-
**
|
43 |
-
|
44 |
-
**Metrics**: Validation loss on decoding correct molecules. Evaluated on several downstream tasks.
|
45 |
|
46 |
-
**Datasets**:
|
47 |
|
48 |
**Ethical Considerations**: Unclear, please consult with original authors in case of questions.
|
49 |
|
@@ -52,14 +49,12 @@
|
|
52 |
Model card prototype inspired by [Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)
|
53 |
|
54 |
## Citation
|
55 |
-
|
56 |
```bib
|
57 |
-
@
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
year = {2022}
|
63 |
}
|
64 |
-
```
|
65 |
-
|
|
|
1 |
# Model documentation & parameters
|
2 |
|
3 |
+
**Algorithm Version**: Which model version to use.
|
4 |
|
5 |
+
**Maximal sequence length**: The maximal number of SMILES tokens in the generated molecule.
|
6 |
|
7 |
**Number of samples**: How many samples should be generated (between 1 and 50).
|
8 |
|
|
|
9 |
|
|
|
10 |
|
11 |
+
# Model card -- PolymerBlocks
|
12 |
|
13 |
+
**Model Details**: *PolymerBlocks* is a sequence-based molecular generator tuned to generate blocks of polymers (e.g., catalysts and monomers). The model relies on a Variational Autoencoder architecture as described in [Born et al. (2021; *iScience*)](https://www.sciencedirect.com/science/article/pii/S2589004221002376)
|
14 |
|
15 |
+
**Developers**: Matteo Manica and colleagues from IBM Research.
|
16 |
|
17 |
+
**Distributors**: Original authors' code integrated into GT4SD.
|
18 |
|
19 |
+
**Model date**: Not yet published.
|
20 |
|
21 |
+
**Model version**: Only initial model version.
|
22 |
|
23 |
+
**Model type**: A sequence-based molecular generator tuned to generate blocks of polymers (e.g., catalysts and monomers).
|
24 |
|
25 |
+
**Information about training algorithms, parameters, fairness constraints or other applied approaches, and features**:
|
26 |
+
N.A.
|
27 |
|
28 |
+
**Paper or other resource for more information**:
|
29 |
+
TBD
|
|
|
30 |
|
31 |
**License**: MIT
|
32 |
|
33 |
+
**Where to send questions or comments about the model**: Open an issue on [GT4SD repository](https://github.com/GT4SD/gt4sd-core).
|
34 |
|
35 |
**Intended Use. Use cases that were envisioned during development**: Chemical research, in particular drug discovery.
|
36 |
|
|
|
38 |
|
39 |
**Out-of-scope use cases**: Production-level inference, producing molecules with harmful properties.
|
40 |
|
41 |
+
**Metrics**: N.A.
|
|
|
|
|
42 |
|
43 |
+
**Datasets**: N.A.
|
44 |
|
45 |
**Ethical Considerations**: Unclear, please consult with original authors in case of questions.
|
46 |
|
|
|
49 |
Model card prototype inspired by [Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)
|
50 |
|
51 |
## Citation
|
52 |
+
TBD, temporarily please cite:
|
53 |
```bib
|
54 |
+
@article{manica2022gt4sd,
|
55 |
+
title={GT4SD: Generative Toolkit for Scientific Discovery},
|
56 |
+
author={Manica, Matteo and Cadow, Joris and Christofidellis, Dimitrios and Dave, Ashish and Born, Jannis and Clarke, Dean and Teukam, Yves Gaetan Nana and Hoffman, Samuel C and Buchan, Matthew and Chenthamarakshan, Vijil and others},
|
57 |
+
journal={arXiv preprint arXiv:2207.03928},
|
58 |
+
year={2022}
|
|
|
59 |
}
|
60 |
+
```
|
|
model_cards/description.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
<img align="right" src="https://raw.githubusercontent.com/GT4SD/gt4sd-core/main/docs/_static/gt4sd_logo.png" alt="logo" width="120" >
|
2 |
|
3 |
-
|
4 |
|
5 |
For **examples** and **documentation** of the model parameters, please see below.
|
6 |
Moreover, we provide a **model card** ([Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)) at the bottom of this page.
|
|
|
1 |
<img align="right" src="https://raw.githubusercontent.com/GT4SD/gt4sd-core/main/docs/_static/gt4sd_logo.png" alt="logo" width="120" >
|
2 |
|
3 |
+
*PolymerBlocks* is a sequence-based molecular generator tuned to generate blocks of polymers (e.g., catalysts and monomers). The model relies on a Variational Autoencoder architecture as described in [Born et al. (2021; *iScience*)](https://www.sciencedirect.com/science/article/pii/S2589004221002376)
|
4 |
|
5 |
For **examples** and **documentation** of the model parameters, please see below.
|
6 |
Moreover, we provide a **model card** ([Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)) at the bottom of this page.
|
model_cards/examples.csv
CHANGED
@@ -1,5 +1 @@
|
|
1 |
-
v0
|
2 |
-
v0,CC(=O)NC1=NC2=CC(OCC3=CC=CN(CC4=CC=C(Cl)C=C4)C3=O)=CC=C2N1,1,10,0
|
3 |
-
v0,C12C=CC=NN1C(C#CC1=C(C)C=CC3C(NC4=CC(C(F)(F)F)=CC=C4)=NOC1=3)=CN=2.CCO,3,5,5
|
4 |
-
|
5 |
-
|
|
|
1 |
+
v0,100,10
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -8,7 +8,7 @@ torch-sparse
|
|
8 |
torch-geometric
|
9 |
torchvision==0.13.1
|
10 |
torchaudio==0.12.1
|
11 |
-
gt4sd>=1.0.
|
12 |
molgx>=0.22.0a1
|
13 |
molecule_generation
|
14 |
nglview
|
|
|
8 |
torch-geometric
|
9 |
torchvision==0.13.1
|
10 |
torchaudio==0.12.1
|
11 |
+
gt4sd>=1.0.5
|
12 |
molgx>=0.22.0a1
|
13 |
molecule_generation
|
14 |
nglview
|
utils.py
CHANGED
@@ -1,21 +1,17 @@
|
|
1 |
-
import json
|
2 |
import logging
|
3 |
-
import os
|
4 |
from collections import defaultdict
|
5 |
-
from typing import
|
6 |
|
7 |
import mols2grid
|
8 |
import pandas as pd
|
9 |
-
from rdkit import Chem
|
10 |
-
from terminator.selfies import decoder
|
11 |
|
12 |
logger = logging.getLogger(__name__)
|
13 |
logger.addHandler(logging.NullHandler())
|
14 |
|
15 |
|
16 |
def draw_grid_generate(
|
17 |
-
seeds: List[str],
|
18 |
samples: List[str],
|
|
|
19 |
n_cols: int = 3,
|
20 |
size=(140, 200),
|
21 |
) -> str:
|
|
|
|
|
1 |
import logging
|
|
|
2 |
from collections import defaultdict
|
3 |
+
from typing import List
|
4 |
|
5 |
import mols2grid
|
6 |
import pandas as pd
|
|
|
|
|
7 |
|
8 |
logger = logging.getLogger(__name__)
|
9 |
logger.addHandler(logging.NullHandler())
|
10 |
|
11 |
|
12 |
def draw_grid_generate(
|
|
|
13 |
samples: List[str],
|
14 |
+
seeds: List[str] = [],
|
15 |
n_cols: int = 3,
|
16 |
size=(140, 200),
|
17 |
) -> str:
|