Spaces:
Sleeping
Sleeping
File size: 2,815 Bytes
480220c c351b1e 480220c c351b1e 480220c c351b1e 480220c c351b1e 480220c c351b1e 480220c c351b1e 480220c c351b1e 480220c c351b1e 480220c c351b1e 480220c c351b1e 480220c c351b1e 480220c c351b1e 480220c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import logging
import pathlib
import gradio as gr
import numpy as np
import pandas as pd
from gt4sd.properties.proteins import PROTEIN_PROPERTY_PREDICTOR_FACTORY
from utils import draw_grid_predict
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
AMIDE_FNS = ["protein_weight", "charge", "charge_density", "isoelectric_point"]
PH_FNS = ["charge", "charge_density", "isoelectric_point"]
def main(property: str, seq: str, seq_file: str, amide: bool, ph: float):
prop_name = property.lower()
algo, config = PROTEIN_PROPERTY_PREDICTOR_FACTORY[prop_name]
# Pass hyperparameters if applicable
kwargs = {}
if prop_name in AMIDE_FNS:
kwargs["amide"] = amide
if prop_name in PH_FNS:
kwargs["ph"] = ph
model = algo(config(**kwargs))
# Read and parse data
if seq is not None and seq_file is not None:
raise ValueError("Pass either smiles or seq_file, not both.")
elif seq is not None:
seqs = [seq]
elif seq_file is not None:
seqs = pd.read_csv(seq_file.name, header=None, sep="\t")[0].tolist()
props = np.array(list(map(model, seqs))).round(2)
# Expand to 2D array if needed
if len(props.shape) == 1:
props = np.expand_dims(np.array(props), -1)
return draw_grid_predict(seqs, props, property_names=[property], domain="Proteins")
if __name__ == "__main__":
# Preparation (retrieve all available algorithms)
properties = list(PROTEIN_PROPERTY_PREDICTOR_FACTORY.keys())[::-1]
properties = list(map(lambda x: x.capitalize(), properties))
# Load metadata
metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
examples = [
["Aliphaticity", None, metadata_root.joinpath("examples.smi"), False, 7],
["Isoelectric_point", "KFLIYQMECSTMIFGL", None, False, 7],
["Charge", "KFLIYQMECSTMIFGL", None, True, 12],
]
with open(metadata_root.joinpath("article.md"), "r") as f:
article = f.read()
with open(metadata_root.joinpath("description.md"), "r") as f:
description = f.read()
demo = gr.Interface(
fn=main,
title="Protein properties",
inputs=[
gr.Dropdown(properties, label="Property", value="Instability"),
gr.Textbox(
label="Single Protein sequence", placeholder="KFLIYQMECSTMIFGL", lines=1
),
gr.File(file_types=[".smi"], label="One AAS per line"),
gr.Radio(choices=[True, False], label="Amide", value=True),
gr.Slider(minimum=0, maximum=14, value=7, label="pH", description="Blub"),
],
outputs=gr.HTML(label="Output"),
article=article,
description=description,
examples=examples,
)
demo.launch(debug=True, show_error=True)
|