Spaces:
Sleeping
Sleeping
jannisborn
commited on
Commit
•
c351b1e
1
Parent(s):
480220c
update
Browse files- README.md +1 -1
- app.py +33 -47
- model_cards/description.md +2 -2
- model_cards/examples.smi +14 -13
- utils.py +2 -0
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
emoji: 💡
|
4 |
colorFrom: green
|
5 |
colorTo: blue
|
|
|
1 |
---
|
2 |
+
title: Protein properties
|
3 |
emoji: 💡
|
4 |
colorFrom: green
|
5 |
colorTo: blue
|
app.py
CHANGED
@@ -4,71 +4,60 @@ import pathlib
|
|
4 |
import gradio as gr
|
5 |
import numpy as np
|
6 |
import pandas as pd
|
7 |
-
from gt4sd.properties.
|
8 |
|
9 |
from utils import draw_grid_predict
|
10 |
|
11 |
logger = logging.getLogger(__name__)
|
12 |
logger.addHandler(logging.NullHandler())
|
13 |
|
14 |
-
REMOVE = ["docking", "docking_tdc", "molecule_one", "askcos", "plogp"]
|
15 |
-
REMOVE.extend(["similarity_seed", "activity_against_target", "organtox"])
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
"Sider": "Hepatobiliary disorders,Metabolism and nutrition disorders,Product issues,Eye disorders,Investigations,Musculoskeletal disorders,Gastrointestinal disorders,Social circumstances,Immune system disorders,Reproductive system and breast disorders,Bening & malignant,General disorders,Endocrine disorders,Surgical & medical procedures,Vascular disorders,Blood & lymphatic disorders,Skin & subcutaneous disorders,Congenital & genetic disorders,Infections,Respiratory & thoracic disorders,Psychiatric disorders,Renal & urinary disorders,Pregnancy conditions,Ear disorders,Cardiac disorders,Nervous system disorders,Injury & procedural complications",
|
20 |
-
"Clintox": "FDA approval, Clinical trial failure",
|
21 |
-
}
|
22 |
|
23 |
|
24 |
-
def main(property: str,
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
30 |
model = algo(config(**kwargs))
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
smiles
|
35 |
-
elif
|
36 |
-
|
37 |
-
|
|
|
|
|
38 |
|
39 |
# Expand to 2D array if needed
|
40 |
if len(props.shape) == 1:
|
41 |
props = np.expand_dims(np.array(props), -1)
|
42 |
|
43 |
-
|
44 |
-
property_names = MODEL_PROP_DESCRIPTION[property].split(",")
|
45 |
-
else:
|
46 |
-
property_names = [property]
|
47 |
-
|
48 |
-
return draw_grid_predict(
|
49 |
-
smiles, props, property_names=property_names, domain="Molecules"
|
50 |
-
)
|
51 |
|
52 |
|
53 |
if __name__ == "__main__":
|
54 |
|
55 |
# Preparation (retrieve all available algorithms)
|
56 |
-
properties = list(
|
57 |
-
for prop in REMOVE:
|
58 |
-
prop_to_idx = dict(zip(properties, range(len(properties))))
|
59 |
-
properties.pop(prop_to_idx[prop])
|
60 |
properties = list(map(lambda x: x.capitalize(), properties))
|
61 |
|
62 |
# Load metadata
|
63 |
metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
|
64 |
|
65 |
examples = [
|
66 |
-
["
|
67 |
-
[
|
68 |
-
|
69 |
-
"CN1CCN(CCCOc2ccc(N3C(=O)C(=Cc4ccc(Oc5ccc([N+](=O)[O-])cc5)cc4)SC3=S)cc2)CC1",
|
70 |
-
None,
|
71 |
-
],
|
72 |
]
|
73 |
|
74 |
with open(metadata_root.joinpath("article.md"), "r") as f:
|
@@ -78,18 +67,15 @@ if __name__ == "__main__":
|
|
78 |
|
79 |
demo = gr.Interface(
|
80 |
fn=main,
|
81 |
-
title="
|
82 |
inputs=[
|
83 |
-
gr.Dropdown(properties, label="Property", value="
|
84 |
gr.Textbox(
|
85 |
-
label="Single
|
86 |
-
placeholder="CC(C#C)N(C)C(=O)NC1=CC=C(Cl)C=C1",
|
87 |
-
lines=1,
|
88 |
-
),
|
89 |
-
gr.File(
|
90 |
-
file_types=[".smi"],
|
91 |
-
label="Multiple SMILES (tab-separated, `.smi` file)",
|
92 |
),
|
|
|
|
|
|
|
93 |
],
|
94 |
outputs=gr.HTML(label="Output"),
|
95 |
article=article,
|
|
|
4 |
import gradio as gr
|
5 |
import numpy as np
|
6 |
import pandas as pd
|
7 |
+
from gt4sd.properties.proteins import PROTEIN_PROPERTY_PREDICTOR_FACTORY
|
8 |
|
9 |
from utils import draw_grid_predict
|
10 |
|
11 |
logger = logging.getLogger(__name__)
|
12 |
logger.addHandler(logging.NullHandler())
|
13 |
|
|
|
|
|
14 |
|
15 |
+
AMIDE_FNS = ["protein_weight", "charge", "charge_density", "isoelectric_point"]
|
16 |
+
PH_FNS = ["charge", "charge_density", "isoelectric_point"]
|
|
|
|
|
|
|
17 |
|
18 |
|
19 |
+
def main(property: str, seq: str, seq_file: str, amide: bool, ph: float):
|
20 |
|
21 |
+
prop_name = property.lower()
|
22 |
+
algo, config = PROTEIN_PROPERTY_PREDICTOR_FACTORY[prop_name]
|
23 |
+
|
24 |
+
# Pass hyperparameters if applicable
|
25 |
+
kwargs = {}
|
26 |
+
if prop_name in AMIDE_FNS:
|
27 |
+
kwargs["amide"] = amide
|
28 |
+
if prop_name in PH_FNS:
|
29 |
+
kwargs["ph"] = ph
|
30 |
model = algo(config(**kwargs))
|
31 |
+
|
32 |
+
# Read and parse data
|
33 |
+
if seq is not None and seq_file is not None:
|
34 |
+
raise ValueError("Pass either smiles or seq_file, not both.")
|
35 |
+
elif seq is not None:
|
36 |
+
seqs = [seq]
|
37 |
+
elif seq_file is not None:
|
38 |
+
seqs = pd.read_csv(seq_file.name, header=None, sep="\t")[0].tolist()
|
39 |
+
props = np.array(list(map(model, seqs))).round(2)
|
40 |
|
41 |
# Expand to 2D array if needed
|
42 |
if len(props.shape) == 1:
|
43 |
props = np.expand_dims(np.array(props), -1)
|
44 |
|
45 |
+
return draw_grid_predict(seqs, props, property_names=[property], domain="Proteins")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
|
48 |
if __name__ == "__main__":
|
49 |
|
50 |
# Preparation (retrieve all available algorithms)
|
51 |
+
properties = list(PROTEIN_PROPERTY_PREDICTOR_FACTORY.keys())[::-1]
|
|
|
|
|
|
|
52 |
properties = list(map(lambda x: x.capitalize(), properties))
|
53 |
|
54 |
# Load metadata
|
55 |
metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
|
56 |
|
57 |
examples = [
|
58 |
+
["Aliphaticity", None, metadata_root.joinpath("examples.smi"), False, 7],
|
59 |
+
["Isoelectric_point", "KFLIYQMECSTMIFGL", None, False, 7],
|
60 |
+
["Charge", "KFLIYQMECSTMIFGL", None, True, 12],
|
|
|
|
|
|
|
61 |
]
|
62 |
|
63 |
with open(metadata_root.joinpath("article.md"), "r") as f:
|
|
|
67 |
|
68 |
demo = gr.Interface(
|
69 |
fn=main,
|
70 |
+
title="Protein properties",
|
71 |
inputs=[
|
72 |
+
gr.Dropdown(properties, label="Property", value="Instability"),
|
73 |
gr.Textbox(
|
74 |
+
label="Single Protein sequence", placeholder="KFLIYQMECSTMIFGL", lines=1
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
),
|
76 |
+
gr.File(file_types=[".smi"], label="One AAS per line"),
|
77 |
+
gr.Radio(choices=[True, False], label="Amide", value=True),
|
78 |
+
gr.Slider(minimum=0, maximum=14, value=7, label="pH", description="Blub"),
|
79 |
],
|
80 |
outputs=gr.HTML(label="Output"),
|
81 |
article=article,
|
model_cards/description.md
CHANGED
@@ -2,6 +2,6 @@
|
|
2 |
|
3 |
<img align="right" src="https://raw.githubusercontent.com/GT4SD/gt4sd-core/main/docs/_static/gt4sd_logo.png" alt="logo" width="120" >
|
4 |
|
5 |
-
###
|
6 |
|
7 |
-
This is the GT4SD web-app for prediction of various
|
|
|
2 |
|
3 |
<img align="right" src="https://raw.githubusercontent.com/GT4SD/gt4sd-core/main/docs/_static/gt4sd_logo.png" alt="logo" width="120" >
|
4 |
|
5 |
+
### Protein property prediction
|
6 |
|
7 |
+
This is the GT4SD web-app for prediction of various protein (or peptide) properties. For **examples** and **documentation** of the supported properties, please see below. Please note that this API does not expose **all** properties that are supported in GT4SD (a list of the non-supported ones can be found at the bottom).
|
model_cards/examples.smi
CHANGED
@@ -1,13 +1,14 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
1 |
+
|
2 |
+
NEGVKAAW
|
3 |
+
FPRPWLHGL
|
4 |
+
HPVGEADYFEY
|
5 |
+
TPGPGVRYPL
|
6 |
+
EEYLKAWTF
|
7 |
+
RMFPNAPYL
|
8 |
+
GPGMKARVL
|
9 |
+
RLRPGGKKK
|
10 |
+
VMAPRTLIL
|
11 |
+
ARMILMTHF
|
12 |
+
FLYNLLTRV
|
13 |
+
SLYNTVATL
|
14 |
+
ILKEPVHGV
|
utils.py
CHANGED
@@ -42,6 +42,8 @@ def draw_grid_predict(
|
|
42 |
logger.warning(f"Could not draw sequence {seq}")
|
43 |
|
44 |
result = pd.DataFrame({"SMILES": smiles})
|
|
|
|
|
45 |
for i, name in enumerate(property_names):
|
46 |
result[name] = properties[:, i]
|
47 |
n_cols = min(3, len(result))
|
|
|
42 |
logger.warning(f"Could not draw sequence {seq}")
|
43 |
|
44 |
result = pd.DataFrame({"SMILES": smiles})
|
45 |
+
if domain == "Proteins":
|
46 |
+
result["Seqs"] = sequences
|
47 |
for i, name in enumerate(property_names):
|
48 |
result[name] = properties[:, i]
|
49 |
n_cols = min(3, len(result))
|