File size: 2,815 Bytes
480220c
 
 
 
 
 
c351b1e
480220c
 
 
 
 
 
 
c351b1e
 
480220c
 
c351b1e
480220c
c351b1e
 
 
 
 
 
 
 
 
480220c
c351b1e
 
 
 
 
 
 
 
 
480220c
 
 
 
 
c351b1e
480220c
 
 
 
 
c351b1e
480220c
 
 
 
 
 
c351b1e
 
 
480220c
 
 
 
 
 
 
 
 
c351b1e
480220c
c351b1e
480220c
c351b1e
480220c
c351b1e
 
 
480220c
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import logging
import pathlib

import gradio as gr
import numpy as np
import pandas as pd
from gt4sd.properties.proteins import PROTEIN_PROPERTY_PREDICTOR_FACTORY

from utils import draw_grid_predict

logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())


AMIDE_FNS = ["protein_weight", "charge", "charge_density", "isoelectric_point"]
PH_FNS = ["charge", "charge_density", "isoelectric_point"]


def main(property: str, seq: str, seq_file: str, amide: bool, ph: float):

    prop_name = property.lower()
    algo, config = PROTEIN_PROPERTY_PREDICTOR_FACTORY[prop_name]

    # Pass hyperparameters if applicable
    kwargs = {}
    if prop_name in AMIDE_FNS:
        kwargs["amide"] = amide
    if prop_name in PH_FNS:
        kwargs["ph"] = ph
    model = algo(config(**kwargs))

    # Read and parse data
    if seq is not None and seq_file is not None:
        raise ValueError("Pass either smiles or seq_file, not both.")
    elif seq is not None:
        seqs = [seq]
    elif seq_file is not None:
        seqs = pd.read_csv(seq_file.name, header=None, sep="\t")[0].tolist()
    props = np.array(list(map(model, seqs))).round(2)

    # Expand to 2D array if needed
    if len(props.shape) == 1:
        props = np.expand_dims(np.array(props), -1)

    return draw_grid_predict(seqs, props, property_names=[property], domain="Proteins")


if __name__ == "__main__":

    # Preparation (retrieve all available algorithms)
    properties = list(PROTEIN_PROPERTY_PREDICTOR_FACTORY.keys())[::-1]
    properties = list(map(lambda x: x.capitalize(), properties))

    # Load metadata
    metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")

    examples = [
        ["Aliphaticity", None, metadata_root.joinpath("examples.smi"), False, 7],
        ["Isoelectric_point", "KFLIYQMECSTMIFGL", None, False, 7],
        ["Charge", "KFLIYQMECSTMIFGL", None, True, 12],
    ]

    with open(metadata_root.joinpath("article.md"), "r") as f:
        article = f.read()
    with open(metadata_root.joinpath("description.md"), "r") as f:
        description = f.read()

    demo = gr.Interface(
        fn=main,
        title="Protein properties",
        inputs=[
            gr.Dropdown(properties, label="Property", value="Instability"),
            gr.Textbox(
                label="Single Protein sequence", placeholder="KFLIYQMECSTMIFGL", lines=1
            ),
            gr.File(file_types=[".smi"], label="One AAS per line"),
            gr.Radio(choices=[True, False], label="Amide", value=True),
            gr.Slider(minimum=0, maximum=14, value=7, label="pH", description="Blub"),
        ],
        outputs=gr.HTML(label="Output"),
        article=article,
        description=description,
        examples=examples,
    )
    demo.launch(debug=True, show_error=True)