Spaces:
Sleeping
Sleeping
File size: 7,852 Bytes
7b093ca 5cce9a4 7b093ca 242f3e4 7b093ca 5cce9a4 31ae121 7b093ca 8ee7dbf 3687063 137a7d5 3687063 242f3e4 4c6688c 3f2e816 242f3e4 0fba50f 3f2e816 242f3e4 3f2e816 7b093ca 5cce9a4 7b093ca 242f3e4 4c6688c 242f3e4 4c6688c 242f3e4 3687063 8ee7dbf 3687063 242f3e4 0fba50f 3687063 8ee7dbf 0fba50f 3f2e816 242f3e4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
import gradio as gr
from transformers import AutoTokenizer, EsmForProteinFolding
from transformers.models.esm.openfold_utils.protein import to_pdb, Protein as OFProtein
from transformers.models.esm.openfold_utils.feats import atom14_to_atom37
import torch
from logging import getLogger
logger = getLogger(__name__)
def convert_outputs_to_pdb(outputs):
final_atom_positions = atom14_to_atom37(outputs["positions"][-1], outputs)
outputs = {k: v.to("cpu").numpy() for k, v in outputs.items()}
final_atom_positions = final_atom_positions.cpu().numpy()
final_atom_mask = outputs["atom37_atom_exists"]
pdbs = []
for i in range(outputs["aatype"].shape[0]):
aa = outputs["aatype"][i]
pred_pos = final_atom_positions[i]
mask = final_atom_mask[i]
resid = outputs["residue_index"][i] + 1
pred = OFProtein(
aatype=aa,
atom_positions=pred_pos,
atom_mask=mask,
residue_index=resid,
b_factors=outputs["plddt"][i],
chain_index=outputs["chain_index"][i] if "chain_index" in outputs else None,
)
pdbs.append(to_pdb(pred))
return pdbs[0]
def fold_prot_locally(sequence):
logger.info("Folding: " + sequence)
tokenized_input = tokenizer([sequence], return_tensors="pt", add_special_tokens=False)['input_ids'].cuda()
with torch.no_grad():
output = model(tokenized_input)
pdb = convert_outputs_to_pdb(output)
return pdb
def get_esm2_embeddings(sequence):
logger.info("Getting embeddings for: " + sequence)
tokenized_input = tokenizer([sequence], return_tensors="pt", add_special_tokens=False)['input_ids'].cuda()
with torch.no_grad():
aa = tokenized_input
L = aa.shape[1]
device = tokenized_input.device
attention_mask = torch.ones_like(aa, device=device)
# === ESM ===
esmaa = model.af2_idx_to_esm_idx(aa, attention_mask)
esm_s = model.compute_language_model_representations(esmaa)
return {"res": esm_s.cpu().tolist()}
def get_esmfold_embeddings(sequence):
logger.info("Getting embeddings for: " + sequence)
tokenized_input = tokenizer([sequence], return_tensors="pt", add_special_tokens=False)['input_ids'].cuda()
with torch.no_grad():
output = model(tokenized_input)
return {"res": output["s_s"].cpu().tolist()}
def suggest(option):
if option == "Plastic degradation protein":
suggestion = "MGSSHHHHHHSSGLVPRGSHMRGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPSSRSSQQMAALRQVASLNGTSSSPIYGKVDTARMGVMGWSMGGGGSLISAANNPSLKAAAPQAPWDSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANSGNSNQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTRVSDFRTANCSLEDPAANKARKEAELAAATAEQ"
elif option == "Antifreeze protein":
suggestion = "QCTGGADCTSCTGACTGCGNCPNAVTCTNSQHCVKANTCTGSTDCNTAQTCTNSKDCFEANTCTDSTNCYKATACTNSSGCPGH"
elif option == "AI Generated protein":
suggestion = "MSGMKKLYEYTVTTLDEFLEKLKEFILNTSKDKIYKLTITNPKLIKDIGKAIAKAAEIADVDPKEIEEMIKAVEENELTKLVITIEQTDDKYVIKVELENEDGLVHSFEIYFKNKEEMEKFLELLEKLISKLSGS"
elif option == "7-bladed propeller fold":
suggestion = "VKLAGNSSLCPINGWAVYSKDNSIRIGSKGDVFVIREPFISCSHLECRTFFLTQGALLNDKHSNGTVKDRSPHRTLMSCPVGEAPSPYNSRFESVAWSASACHDGTSWLTIGISGPDNGAVAVLKYNGIITDTIKSWRNNILRTQESECACVNGSCFTVMTDGPSNGQASYKIFKMEKGKVVKSVELDAPNYHYEECSCYPNAGEITCVCRDNWHGSNRPWVSFNQNLEYQIGYICSGVFGDNPRPNDGTGSCGPVSSNGAYGVKGFSFKYGNGVWIGRTKSTNSRSGFEMIWDPNGWTETDSSFSVKQDIVAITDWSGYSGSFVQHPELTGLDCIRPCFWVELIRGRPKESTIWTSGSSISFCGVNSDTVGWSWPDGAELPFTIDK"
else:
suggestion = ""
return suggestion
def molecule(mol):
x = (
"""<!DOCTYPE html>
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8" />
<style>
body{
font-family:sans-serif
}
.mol-container {
width: 100%;
height: 600px;
position: relative;
}
.mol-container select{
background-image:None;
}
</style>
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js" integrity="sha512-STof4xm1wgkfm7heWqFJVn58Hm3EtS31XFaagaa8VMReCXAkQnJZ+jEy8PCC/iT18dFy95WcExNHFTqLyp72eQ==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
<script src="https://3Dmol.csb.pitt.edu/build/3Dmol-min.js"></script>
</head>
<body>
<div id="container" class="mol-container"></div>
<script>
let pdb = `"""
+ mol
+ """`
$(document).ready(function () {
let element = $("#container");
let config = { backgroundColor: "white" };
let viewer = $3Dmol.createViewer(element, config);
viewer.addModel(pdb, "pdb");
viewer.getModel(0).setStyle({}, { cartoon: { colorscheme:"whiteCarbon" } });
viewer.zoomTo();
viewer.render();
viewer.zoom(0.8, 2000);
})
</script>
</body></html>"""
)
return f"""<iframe style="width: 100%; height: 600px" name="result" allow="midi; geolocation; microphone; camera;
display-capture; encrypted-media;" sandbox="allow-modals allow-forms
allow-scripts allow-same-origin allow-popups
allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
allowpaymentrequest="" frameborder="0" srcdoc='{x}'></iframe>"""
sample_code = """
from gradio_client import Client
client = Client("https://wwydmanski-esmfold.hf.space/")
def fold_huggingface(sequence, fname=None):
result = client.predict(
sequence, # str in 'sequence' Textbox component
api_name="/pdb")
if fname is None:
with tempfile.NamedTemporaryFile("w", delete=False, suffix=".pdb", prefix="esmfold_") as fp:
fp.write(result)
fp.flush()
return fp.name
else:
with open(fname, "w") as fp:
fp.write(result)
fp.flush()
return fname
pdb_fname = fold_huggingface("MALWMRLLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGERGFFYTPKTRREAEDLQVGQVELGGGPGAGSLQPLALEGSLQKRGIVEQCCTSICSLYQLENYCN")
"""
tokenizer = AutoTokenizer.from_pretrained("facebook/esmfold_v1")
model = EsmForProteinFolding.from_pretrained("facebook/esmfold_v1", low_cpu_mem_usage=True).cuda()
model.esm = model.esm.half()
torch.backends.cuda.matmul.allow_tf32 = True
with gr.Blocks() as demo:
gr.Markdown("# ESMFold")
with gr.Row():
with gr.Column():
inp = gr.Textbox(lines=1, label="Sequence")
name = gr.Dropdown(label="Choose a Sample Protein", value="Plastic degradation protein", choices=["Antifreeze protein", "Plastic degradation protein", "AI Generated protein", "7-bladed propeller fold", "custom"])
btn = gr.Button("🔬 Predict Structure ")
with gr.Row():
with gr.Column():
gr.Markdown("## Sample code")
gr.Code(sample_code, label="Sample usage", language="python", interactive=False)
with gr.Row():
gr.Markdown("## Output")
with gr.Row():
with gr.Column():
out = gr.Code(label="Output", interactive=False)
with gr.Column():
out_mol = gr.HTML(label="3D Structure")
with gr.Row(visible=False):
with gr.Column():
gr.Markdown("## Embeddings")
embs = gr.JSON(label="Embeddings")
name.change(fn=suggest, inputs=name, outputs=inp)
btn.click(fold_prot_locally, inputs=[inp], outputs=[out], api_name="pdb")
btn.click(get_esmfold_embeddings, inputs=[inp], outputs=[embs], api_name="embeddings")
btn.click(get_esm2_embeddings, inputs=[inp], outputs=[embs], api_name="esm2_embeddings")
out.change(fn=molecule, inputs=[out], outputs=[out_mol], api_name="3d_fold")
demo.launch() |