File size: 2,784 Bytes
480220c
 
 
 
 
 
c351b1e
480220c
 
 
 
 
 
 
c351b1e
 
480220c
 
c351b1e
 
 
 
 
 
 
 
 
 
480220c
c351b1e
 
453d7ec
c351b1e
453d7ec
c351b1e
 
 
 
480220c
 
 
 
 
c351b1e
480220c
 
 
 
c351b1e
480220c
 
 
 
 
 
321305d
c351b1e
 
480220c
 
 
 
 
 
 
 
 
c351b1e
480220c
c351b1e
480220c
c351b1e
480220c
c351b1e
 
ef5346f
480220c
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import logging
import pathlib

import gradio as gr
import numpy as np
import pandas as pd
from gt4sd.properties.proteins import PROTEIN_PROPERTY_PREDICTOR_FACTORY

from utils import draw_grid_predict

logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())


AMIDE_FNS = ["protein_weight", "charge", "charge_density", "isoelectric_point"]
PH_FNS = ["charge", "charge_density", "isoelectric_point"]


def main(property: str, seq: str, seq_file: str, amide: bool, ph: float):
    prop_name = property.lower()
    algo, config = PROTEIN_PROPERTY_PREDICTOR_FACTORY[prop_name]

    # Pass hyperparameters if applicable
    kwargs = {}
    if prop_name in AMIDE_FNS:
        kwargs["amide"] = amide
    if prop_name in PH_FNS:
        kwargs["ph"] = ph
    model = algo(config(**kwargs))

    # Read and parse data
    if seq != "" and seq_file is not None:
        raise ValueError("Pass either smiles or seq_file, not both.")
    elif seq != "":
        seqs = [seq]
    elif seq_file is not None:
        seqs = pd.read_csv(seq_file.name, header=None, sep="\t")[0].tolist()
    props = np.array(list(map(model, seqs))).round(2)

    # Expand to 2D array if needed
    if len(props.shape) == 1:
        props = np.expand_dims(np.array(props), -1)

    return draw_grid_predict(seqs, props, property_names=[property], domain="Proteins")


if __name__ == "__main__":
    # Preparation (retrieve all available algorithms)
    properties = list(PROTEIN_PROPERTY_PREDICTOR_FACTORY.keys())[::-1]
    properties = list(map(lambda x: x.capitalize(), properties))

    # Load metadata
    metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")

    examples = [
        ["Aliphaticity", "", str(metadata_root.joinpath("examples.smi")), False, 7],
        ["Isoelectric_point", "KFLIYQMECSTMIFGL", None, False, 7],
        ["Charge", "KFLIYQMECSTMIFGL", None, True, 12],
    ]

    with open(metadata_root.joinpath("article.md"), "r") as f:
        article = f.read()
    with open(metadata_root.joinpath("description.md"), "r") as f:
        description = f.read()

    demo = gr.Interface(
        fn=main,
        title="Protein properties",
        inputs=[
            gr.Dropdown(properties, label="Property", value="Instability"),
            gr.Textbox(
                label="Single Protein sequence", placeholder="KFLIYQMECSTMIFGL", lines=1
            ),
            gr.File(file_types=[".smi"], label="One AAS per line"),
            gr.Radio(choices=[True, False], label="Amide", value=True),
            gr.Slider(minimum=0, maximum=14, value=7, label="pH"),
        ],
        outputs=gr.HTML(label="Output"),
        article=article,
        description=description,
        examples=examples,
    )
    demo.launch(debug=True, show_error=True)