import logging import pathlib from typing import List import gradio as gr import numpy as np import pandas as pd from gt4sd.algorithms.conditional_generation.paccmann_rl import ( PaccMannRL, PaccMannRLOmicBasedGenerator, PaccMannRLProteinBasedGenerator, ) from gt4sd.algorithms.generation.paccmann_vae import PaccMannVAE, PaccMannVAEGenerator from gt4sd.algorithms.registry import ApplicationsRegistry from utils import draw_grid_generate logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) TITLE = "MoLeR" def run_inference( algorithm_version: str, inference_type: str, protein_target: str, omics_target: str, temperature: float, length: float, number_of_samples: int, ): if inference_type == "Unbiased": algorithm_class = PaccMannVAEGenerator model_class = PaccMannVAE target = None elif inference_type == "Conditional": if "Protein" in algorithm_version: algorithm_class = PaccMannRLProteinBasedGenerator target = protein_target elif "Omic" in algorithm_version: algorithm_class = PaccMannRLOmicBasedGenerator try: test_target = [float(x) for x in omics_target.split(" ")] except Exception: raise ValueError( f"Expected 2128 space-separated omics values, got {omics_target}" ) if len(test_target) != 2128: raise ValueError( f"Expected 2128 omics values, got {len(target)}: {target}" ) target = omics_target else: raise ValueError(f"Unknown algorithm version {algorithm_version}") model_class = PaccMannRL else: raise ValueError(f"Unknown inference type {inference_type}") config = algorithm_class( algorithm_version.split("_")[-1], temperature=temperature, generated_length=length, ) model = model_class(config, target=target) samples = list(model.sample(number_of_samples)) return draw_grid_generate(samples=samples, n_cols=5) if __name__ == "__main__": # Preparation (retrieve all available algorithms) all_algos = ApplicationsRegistry.list_available() algos = [ x["algorithm_application"].split("Based")[0].split("PaccMannRL")[-1] + "_" + x["algorithm_version"] for x in list(filter(lambda x: "PaccMannRL" in x["algorithm_name"], all_algos)) ] # Load metadata metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards") examples = pd.read_csv(metadata_root.joinpath("examples.csv"), header=None).fillna( "" ) with open(metadata_root.joinpath("article.md"), "r") as f: article = f.read() with open(metadata_root.joinpath("description.md"), "r") as f: description = f.read() demo = gr.Interface( fn=run_inference, title="PaccMannRL", inputs=[ gr.Dropdown(algos, label="Algorithm version", value="Protein_v0"), gr.Radio( choices=["Conditional", "Unbiased"], label="Inference type", value="Conditional", ), gr.Textbox( label="Protein target", placeholder="MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTT", lines=1, ), gr.Textbox( label="Gene expression target", placeholder=f"{' '.join(map(str, np.round(np.random.rand(2128), 2)))}", lines=1, ), gr.Slider(minimum=0.5, maximum=2, value=1, label="Decoding temperature"), gr.Slider( minimum=5, maximum=400, value=100, label="Maximal sequence length", step=1, ), gr.Slider( minimum=1, maximum=50, value=10, label="Number of samples", step=1 ), ], outputs=gr.HTML(label="Output"), article=article, description=description, examples=examples.values.tolist(), ) demo.launch(debug=True, show_error=True)