paccmann_rl / app.py
jannisborn's picture
update
780dd35 unverified
import logging
import pathlib
from typing import List
import gradio as gr
import numpy as np
import pandas as pd
from gt4sd.algorithms.conditional_generation.paccmann_rl import (
PaccMannRL,
PaccMannRLOmicBasedGenerator,
PaccMannRLProteinBasedGenerator,
)
from gt4sd.algorithms.generation.paccmann_vae import PaccMannVAE, PaccMannVAEGenerator
from gt4sd.algorithms.registry import ApplicationsRegistry
from utils import draw_grid_generate
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
def run_inference(
algorithm_version: str,
inference_type: str,
protein_target: str,
omics_target: str,
temperature: float,
length: float,
number_of_samples: int,
):
if inference_type == "Unbiased":
algorithm_class = PaccMannVAEGenerator
model_class = PaccMannVAE
target = None
elif inference_type == "Conditional":
if "Protein" in algorithm_version:
algorithm_class = PaccMannRLProteinBasedGenerator
target = protein_target
elif "Omic" in algorithm_version:
algorithm_class = PaccMannRLOmicBasedGenerator
try:
test_target = [float(x) for x in omics_target.split(" ")]
except Exception:
raise ValueError(
f"Expected 2128 space-separated omics values, got {omics_target}"
)
if len(test_target) != 2128:
raise ValueError(
f"Expected 2128 omics values, got {len(target)}: {target}"
)
target = f"[{omics_target.replace(' ', ',')}]"
else:
raise ValueError(f"Unknown algorithm version {algorithm_version}")
model_class = PaccMannRL
else:
raise ValueError(f"Unknown inference type {inference_type}")
config = algorithm_class(
algorithm_version.split("_")[-1],
temperature=temperature,
generated_length=length,
)
model = model_class(config, target=target)
samples = list(model.sample(number_of_samples))
return draw_grid_generate(samples=samples, n_cols=5)
if __name__ == "__main__":
# Preparation (retrieve all available algorithms)
all_algos = ApplicationsRegistry.list_available()
algos = [
x["algorithm_application"].split("Based")[0].split("PaccMannRL")[-1]
+ "_"
+ x["algorithm_version"]
for x in list(filter(lambda x: "PaccMannRL" in x["algorithm_name"], all_algos))
]
# Load metadata
metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
examples = pd.read_csv(metadata_root.joinpath("examples.csv"), header=None).fillna(
""
)
with open(metadata_root.joinpath("article.md"), "r") as f:
article = f.read()
with open(metadata_root.joinpath("description.md"), "r") as f:
description = f.read()
demo = gr.Interface(
fn=run_inference,
title="PaccMannRL",
inputs=[
gr.Dropdown(algos, label="Algorithm version", value="Protein_v0"),
gr.Radio(
choices=["Conditional", "Unbiased"],
label="Inference type",
value="Conditional",
),
gr.Textbox(
label="Protein target",
placeholder="MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTT",
lines=1,
),
gr.Textbox(
label="Gene expression target",
placeholder=f"{' '.join(map(str, np.round(np.random.rand(2128), 2)))}",
lines=1,
),
gr.Slider(minimum=0.5, maximum=2, value=1, label="Decoding temperature"),
gr.Slider(
minimum=5,
maximum=400,
value=100,
label="Maximal sequence length",
step=1,
),
gr.Slider(
minimum=1, maximum=50, value=10, label="Number of samples", step=1
),
],
outputs=gr.HTML(label="Output"),
article=article,
description=description,
examples=examples.values.tolist(),
)
demo.launch(debug=True, show_error=True)