import logging import pathlib import gradio as gr import numpy as np import pandas as pd from gt4sd.properties.proteins import PROTEIN_PROPERTY_PREDICTOR_FACTORY from utils import draw_grid_predict logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) AMIDE_FNS = ["protein_weight", "charge", "charge_density", "isoelectric_point"] PH_FNS = ["charge", "charge_density", "isoelectric_point"] def main(property: str, seq: str, seq_file: str, amide: bool, ph: float): prop_name = property.lower() algo, config = PROTEIN_PROPERTY_PREDICTOR_FACTORY[prop_name] # Pass hyperparameters if applicable kwargs = {} if prop_name in AMIDE_FNS: kwargs["amide"] = amide if prop_name in PH_FNS: kwargs["ph"] = ph model = algo(config(**kwargs)) # Read and parse data if seq is not None and seq_file is not None: raise ValueError("Pass either smiles or seq_file, not both.") elif seq is not None: seqs = [seq] elif seq_file is not None: seqs = pd.read_csv(seq_file.name, header=None, sep="\t")[0].tolist() props = np.array(list(map(model, seqs))).round(2) # Expand to 2D array if needed if len(props.shape) == 1: props = np.expand_dims(np.array(props), -1) return draw_grid_predict(seqs, props, property_names=[property], domain="Proteins") if __name__ == "__main__": # Preparation (retrieve all available algorithms) properties = list(PROTEIN_PROPERTY_PREDICTOR_FACTORY.keys())[::-1] properties = list(map(lambda x: x.capitalize(), properties)) # Load metadata metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards") examples = [ ["Aliphaticity", None, metadata_root.joinpath("examples.smi"), False, 7], ["Isoelectric_point", "KFLIYQMECSTMIFGL", None, False, 7], ["Charge", "KFLIYQMECSTMIFGL", None, True, 12], ] with open(metadata_root.joinpath("article.md"), "r") as f: article = f.read() with open(metadata_root.joinpath("description.md"), "r") as f: description = f.read() demo = gr.Interface( fn=main, title="Protein properties", inputs=[ gr.Dropdown(properties, label="Property", value="Instability"), gr.Textbox( label="Single Protein sequence", placeholder="KFLIYQMECSTMIFGL", lines=1 ), gr.File(file_types=[".smi"], label="One AAS per line"), gr.Radio(choices=[True, False], label="Amide", value=True), gr.Slider(minimum=0, maximum=14, value=7, label="pH", description="Blub"), ], outputs=gr.HTML(label="Output"), article=article, description=description, examples=examples, ) demo.launch(debug=True, show_error=True)