protein_properties

Running

File size: 2,784 Bytes

import logging
import pathlib

import gradio as gr
import numpy as np
import pandas as pd
from gt4sd.properties.proteins import PROTEIN_PROPERTY_PREDICTOR_FACTORY

from utils import draw_grid_predict

logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())


AMIDE_FNS = ["protein_weight", "charge", "charge_density", "isoelectric_point"]
PH_FNS = ["charge", "charge_density", "isoelectric_point"]


def main(property: str, seq: str, seq_file: str, amide: bool, ph: float):
    prop_name = property.lower()
    algo, config = PROTEIN_PROPERTY_PREDICTOR_FACTORY[prop_name]

    # Pass hyperparameters if applicable
    kwargs = {}
    if prop_name in AMIDE_FNS:
        kwargs["amide"] = amide
    if prop_name in PH_FNS:
        kwargs["ph"] = ph
    model = algo(config(**kwargs))

    # Read and parse data
    if seq != "" and seq_file is not None:
        raise ValueError("Pass either smiles or seq_file, not both.")
    elif seq != "":
        seqs = [seq]
    elif seq_file is not None:
        seqs = pd.read_csv(seq_file.name, header=None, sep="\t")[0].tolist()
    props = np.array(list(map(model, seqs))).round(2)

    # Expand to 2D array if needed
    if len(props.shape) == 1:
        props = np.expand_dims(np.array(props), -1)

    return draw_grid_predict(seqs, props, property_names=[property], domain="Proteins")


if __name__ == "__main__":
    # Preparation (retrieve all available algorithms)
    properties = list(PROTEIN_PROPERTY_PREDICTOR_FACTORY.keys())[::-1]
    properties = list(map(lambda x: x.capitalize(), properties))

    # Load metadata
    metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")

    examples = [
        ["Aliphaticity", "", str(metadata_root.joinpath("examples.smi")), False, 7],
        ["Isoelectric_point", "KFLIYQMECSTMIFGL", None, False, 7],
        ["Charge", "KFLIYQMECSTMIFGL", None, True, 12],
    ]

    with open(metadata_root.joinpath("article.md"), "r") as f:
        article = f.read()
    with open(metadata_root.joinpath("description.md"), "r") as f:
        description = f.read()

    demo = gr.Interface(
        fn=main,
        title="Protein properties",
        inputs=[
            gr.Dropdown(properties, label="Property", value="Instability"),
            gr.Textbox(
                label="Single Protein sequence", placeholder="KFLIYQMECSTMIFGL", lines=1
            ),
            gr.File(file_types=[".smi"], label="One AAS per line"),
            gr.Radio(choices=[True, False], label="Amide", value=True),
            gr.Slider(minimum=0, maximum=14, value=7, label="pH"),
        ],
        outputs=gr.HTML(label="Output"),
        article=article,
        description=description,
        examples=examples,
    )
    demo.launch(debug=True, show_error=True)