Spaces:
Sleeping
Sleeping
File size: 2,784 Bytes
480220c c351b1e 480220c c351b1e 480220c c351b1e 480220c c351b1e 453d7ec c351b1e 453d7ec c351b1e 480220c c351b1e 480220c c351b1e 480220c 321305d c351b1e 480220c c351b1e 480220c c351b1e 480220c c351b1e 480220c c351b1e ef5346f 480220c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import logging
import pathlib
import gradio as gr
import numpy as np
import pandas as pd
from gt4sd.properties.proteins import PROTEIN_PROPERTY_PREDICTOR_FACTORY
from utils import draw_grid_predict
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
AMIDE_FNS = ["protein_weight", "charge", "charge_density", "isoelectric_point"]
PH_FNS = ["charge", "charge_density", "isoelectric_point"]
def main(property: str, seq: str, seq_file: str, amide: bool, ph: float):
prop_name = property.lower()
algo, config = PROTEIN_PROPERTY_PREDICTOR_FACTORY[prop_name]
# Pass hyperparameters if applicable
kwargs = {}
if prop_name in AMIDE_FNS:
kwargs["amide"] = amide
if prop_name in PH_FNS:
kwargs["ph"] = ph
model = algo(config(**kwargs))
# Read and parse data
if seq != "" and seq_file is not None:
raise ValueError("Pass either smiles or seq_file, not both.")
elif seq != "":
seqs = [seq]
elif seq_file is not None:
seqs = pd.read_csv(seq_file.name, header=None, sep="\t")[0].tolist()
props = np.array(list(map(model, seqs))).round(2)
# Expand to 2D array if needed
if len(props.shape) == 1:
props = np.expand_dims(np.array(props), -1)
return draw_grid_predict(seqs, props, property_names=[property], domain="Proteins")
if __name__ == "__main__":
# Preparation (retrieve all available algorithms)
properties = list(PROTEIN_PROPERTY_PREDICTOR_FACTORY.keys())[::-1]
properties = list(map(lambda x: x.capitalize(), properties))
# Load metadata
metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
examples = [
["Aliphaticity", "", str(metadata_root.joinpath("examples.smi")), False, 7],
["Isoelectric_point", "KFLIYQMECSTMIFGL", None, False, 7],
["Charge", "KFLIYQMECSTMIFGL", None, True, 12],
]
with open(metadata_root.joinpath("article.md"), "r") as f:
article = f.read()
with open(metadata_root.joinpath("description.md"), "r") as f:
description = f.read()
demo = gr.Interface(
fn=main,
title="Protein properties",
inputs=[
gr.Dropdown(properties, label="Property", value="Instability"),
gr.Textbox(
label="Single Protein sequence", placeholder="KFLIYQMECSTMIFGL", lines=1
),
gr.File(file_types=[".smi"], label="One AAS per line"),
gr.Radio(choices=[True, False], label="Amide", value=True),
gr.Slider(minimum=0, maximum=14, value=7, label="pH"),
],
outputs=gr.HTML(label="Output"),
article=article,
description=description,
examples=examples,
)
demo.launch(debug=True, show_error=True)
|