Spaces:
Running
Running
File size: 6,285 Bytes
02970c0 db541e4 02970c0 4d5beeb f73076c 4d5beeb 02970c0 2ec65d5 8505e9d db541e4 4d5beeb c72f5fe 7f5c48e 8505e9d 7f5c48e 8505e9d 7f5c48e 4d5beeb 7f5c48e affd796 2ec65d5 2466cb5 7f5c48e 2466cb5 7f5c48e 2466cb5 7f5c48e 2466cb5 7f5c48e 2466cb5 7f5c48e 2466cb5 02970c0 4d5beeb 1082445 8505e9d 4d5beeb 7f5c48e 4d5beeb 2466cb5 4d5beeb 7f5c48e d92a3e6 4d5beeb affd796 2f14da2 f73076c 4d5beeb 2f14da2 affd796 4d5beeb db541e4 affd796 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
import re
from pathlib import Path
import gradio as gr
from evodiff.pretrained import OA_DM_38M, D3PM_UNIFORM_38M, MSA_OA_DM_MAXSUB
from evodiff.generate import generate_oaardm, generate_d3pm
from evodiff.generate_msa import generate_query_oadm_msa_simple
import py3Dmol
from colabfold.download import download_alphafold_params
from colabfold.batch import run
def a3m_file(file):
return "tmp.a3m"
def predict_protein(sequence):
download_alphafold_params("alphafold2_ptm", Path("."))
results = run(
queries=[('evodiff_protein', sequence, None)],
result_dir='evodiff_protein',
use_templates=False,
num_relax=0,
msa_mode="mmseqs2_uniref_env",
model_type="alphafold2_ptm",
num_models=1,
num_recycles=1,
model_order=[1],
is_complex=False,
data_dir=Path("."),
keep_existing_results=False,
rank_by="auto",
stop_at_score=float(100),
zip_results=False,
user_agent="colabfold/google-colab-main"
)
return f"evodiff_protein/evodiff_protein_unrelaxed_rank_001_alphafold2_ptm_model_1_seed_000.pdb"
def display_pdb(path_to_pdb):
'''
#function to display pdb in py3dmol
SOURCE: https://huggingface.co/spaces/merle/PROTEIN_GENERATOR/blob/main/app.py
'''
pdb = open(path_to_pdb, "r").read()
view = py3Dmol.view(width=500, height=500)
view.addModel(pdb, "pdb")
view.setStyle({'model': -1}, {"cartoon": {'colorscheme':{'prop':'b','gradient':'roygb','min':0,'max':1}}})#'linear', 'min': 0, 'max': 1, 'colors': ["#ff9ef0","#a903fc",]}}})
view.zoomTo()
output = view._make_html().replace("'", '"')
print(view._make_html())
x = f"""<!DOCTYPE html><html></center> {output} </center></html>""" # do not use ' in this input
return f"""<iframe height="500px" width="100%" name="result" allow="midi; geolocation; microphone; camera;
display-capture; encrypted-media;" sandbox="allow-modals allow-forms
allow-scripts allow-same-origin allow-popups
allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
allowpaymentrequest="" frameborder="0" srcdoc='{x}'></iframe>"""
'''
return f"""<iframe style="width: 100%; height:700px" name="result" allow="midi; geolocation; microphone; camera;
display-capture; encrypted-media;" sandbox="allow-modals allow-forms
allow-scripts allow-same-origin allow-popups
allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
allowpaymentrequest="" frameborder="0" srcdoc='{x}'></iframe>"""
'''
def make_uncond_seq(seq_len, model_type, pred_structure):
if model_type == "EvoDiff-Seq-OADM 38M":
checkpoint = OA_DM_38M()
model, collater, tokenizer, scheme = checkpoint
tokeinzed_sample, generated_sequence = generate_oaardm(model, tokenizer, seq_len, batch_size=1, device='cpu')
if model_type == "EvoDiff-D3PM-Uniform 38M":
checkpoint = D3PM_UNIFORM_38M(return_all=True)
model, collater, tokenizer, scheme, timestep, Q_bar, Q = checkpoint
tokeinzed_sample, generated_sequence = generate_d3pm(model, tokenizer, Q, Q_bar, timestep, seq_len, batch_size=1, device='cpu')
if pred_structure:
path_to_pdb = predict_protein(generated_sequence)
molhtml = display_pdb(path_to_pdb)
return generated_sequence, molhtml
else:
return generated_sequence
def make_cond_seq(seq_len, msa_file, model_type, pred_structure):
if model_type == "EvoDiff-MSA":
checkpoint = MSA_OA_DM_MAXSUB()
model, collater, tokenizer, scheme = checkpoint
tokeinzed_sample, generated_sequence = generate_query_oadm_msa_simple(msa_file.name, model, tokenizer, n_sequences=64, seq_length=seq_len, device='cpu', selection_type='random')
if pred_structure:
path_to_pdb = predict_protein(generated_sequence)
molhtml = display_pdb(path_to_pdb)
return generated_sequence, molhtml
else:
return generated_sequence
usg_app = gr.Interface(
fn=make_uncond_seq,
inputs=[
gr.Slider(10, 100, step=1, label = "Sequence Length"),
gr.Dropdown(["EvoDiff-Seq-OADM 38M", "EvoDiff-D3PM-Uniform 38M"], value="EvoDiff-Seq-OADM 38M", type="value", label = "Model")
],
outputs=[
"text",
gr.HTML()
],
title = "Unconditional sequence generation",
description="Generate a sequence with `EvoDiff-Seq-OADM 38M` (smaller/faster) or `EvoDiff-D3PM-Uniform 38M` (larger/slower) models."
)
csg_app = gr.Interface(
fn=make_cond_seq,
inputs=[
gr.Slider(10, 100, label = "Sequence Length"),
gr.File(file_types=["a3m"], label = "MSA File"),
gr.Dropdown(["EvoDiff-MSA"], value="EvoDiff-MSA", type="value", label = "Model"),
gr.Checkbox(value=False, label = "Predict Structure?")
],
outputs=[
"text",
gr.HTML()
],
# examples=[["https://github.com/microsoft/evodiff/raw/main/examples/example_files/bfd_uniclust_hits.a3m"]],
title = "Conditional sequence generation",
description="Evolutionary guided sequence generation with the `EvoDiff-MSA` model."
)
with gr.Blocks() as edapp:
with gr.Row():
gr.Markdown(
"""
# EvoDiff
## Generation of protein sequences and evolutionary alignments via discrete diffusion models
Created By: Microsoft Research [Sarah Alamdari, Nitya Thakkar, Rianne van den Berg, Alex X. Lu, Nicolo Fusi, ProfileAva P. Amini, and Kevin K. Yang]
Spaces App By: [Colby T. Ford](httos://github.com/colbyford)
"""
)
with gr.Row():
gr.TabbedInterface([usg_app, csg_app], ["Unconditional sequence generation", "Conditional generation"])
if __name__ == "__main__":
edapp.launch() |