Spaces:
Sleeping
Sleeping
File size: 6,894 Bytes
bebad14 dffaf30 bebad14 5e24610 c1225dc 5e24610 d35b73e bebad14 28cb117 5e24610 40a2fdf c1225dc ad31760 c1225dc 40a2fdf d35b73e 4a15a26 d35b73e 4a15a26 d35b73e 5b72455 d35b73e 48ff744 bebad14 5e24610 bebad14 c1225dc 5e24610 48ff744 40a2fdf d35b73e 3f86c33 d35b73e 244f1ce d35b73e c1225dc d35b73e 5e24610 d35b73e 5e24610 4853a01 dadbe2e d35b73e 5e24610 bebad14 dadbe2e bebad14 f624b87 bebad14 f354223 bebad14 c0df2f3 43105fe f354223 bebad14 28cb117 bebad14 44470f9 28cb117 48ff744 28cb117 063629c a8e9ffd 28cb117 5e24610 28cb117 296aa30 4853a01 c0df2f3 4853a01 28cb117 4853a01 28cb117 dadbe2e bebad14 48ff744 dffaf30 bebad14 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 |
import time
import gradio as gr
from gradio_molecule3d import Molecule3D
import numpy as np
from scipy.optimize import differential_evolution, NonlinearConstraint
from biotite.structure.io.pdb import PDBFile
from rdkit import Chem
from rdkit.Chem import AllChem
from biotite.structure import AtomArrayStack
def generate_input_conformer(
ligand_smiles: str,
addHs: bool = False,
minimize_maxIters: int = -1,
) -> Chem.Mol:
_mol = Chem.MolFromSmiles(ligand_smiles)
# need to add Hs to generate sensible conformers
_mol = Chem.AddHs(_mol)
# try embedding molecule using ETKDGv2 (default)
confid = AllChem.EmbedMolecule(
_mol,
useRandomCoords=True,
useBasicKnowledge=True,
maxAttempts=100,
randomSeed=42,
)
if confid != -1:
if minimize_maxIters > 0:
# molecule successfully embedded - minimize
success = AllChem.MMFFOptimizeMolecule(_mol, maxIters=minimize_maxIters)
# 0 if the optimization converged,
# -1 if the forcefield could not be set up,
# 1 if more iterations are required.
if success == 1:
# extend optimization to double the steps (extends by the same amount)
AllChem.MMFFOptimizeMolecule(_mol, maxIters=minimize_maxIters)
else:
# this means EmbedMolecule failed
# try less optimal approach
confid = AllChem.EmbedMolecule(
_mol,
useRandomCoords=True,
useBasicKnowledge=False,
maxAttempts=100,
randomSeed=42,
)
return _mol
def optimize_coordinate(points, bound_buffer=15, dmin=6.02):
bounds = list(
zip(
np.average(points, axis=0) - [bound_buffer]*3,
np.average(points, axis=0) + [bound_buffer]*3
)
)
# Define the constraint function (ensure dmin distance)
con = NonlinearConstraint(lambda x: np.min(np.linalg.norm(points - x, axis=1)), dmin, 8)
# Define the objective function (minimize pairwise distance)
def objective(x):
return np.sum(np.linalg.norm(points - x, axis=1))
# Perform differential evolution to find the optimal coordinate
result = differential_evolution(objective, bounds, constraints=con)
return result.x, result.fun
def optimize_decoy_coordinate(points, bound_buffer=15, dmin=6.02, decoy_min=4.0, decoy_max=4.98):
bounds = list(
zip(
np.average(points, axis=0) - [bound_buffer]*3,
np.average(points, axis=0) + [bound_buffer]*3
)
)
# Define the constraint function (ensure dmin distance for all but one atom)
con1 = NonlinearConstraint(lambda x: np.sum(np.linalg.norm(points - x, axis=1) < dmin), 1, 1)
con2 = NonlinearConstraint(lambda x: np.min(np.linalg.norm(points - x, axis=1)), decoy_min, decoy_max)
# Define the objective function (maximize pairwise distance)
def objective(x):
return - np.sum(np.linalg.norm(points - x, axis=1))
# Perform differential evolution to find the optimal coordinate
result = differential_evolution(objective, bounds, constraints=(con1, con2))
return result.x, result.fun
def add_decoy_atom(structure, decoy_pos):
decoy = AtomArrayStack(length=1, depth=1)
decoy.coord = np.ones_like(decoy.coord) * decoy_pos
decoy.chain_id = ["q"]
decoy.element = ["C"]
decoy.atom_name = ["C"]
decoy.res_name = ["GLY"]
return structure + decoy
def set_protein_to_new_coord_plus_decoy_atom(input_pdb_file, new_coord, decoy_coord, output_file):
structure = PDBFile.read(input_pdb_file).get_structure()
structure.coord = np.ones_like(structure.coord) * np.array(new_coord)
# add decoy
structure = add_decoy_atom(structure, decoy_coord)
file = PDBFile()
file.set_structure(structure)
file.write(output_file)
def predict(input_sequence, input_ligand, input_msa, input_protein):
start_time = time.time()
# Do inference here
mol = generate_input_conformer(input_ligand, minimize_maxIters=500)
molwriter = Chem.SDWriter("test_docking_pose.sdf")
molwriter.write(mol)
# get only non hydrogen atoms
heavy_atom_mask = [at.GetAtomicNum() != 1 for at in mol.GetAtoms()]
mol_coords = mol.GetConformer().GetPositions()[heavy_atom_mask]
# get opt coords
new_coord, min_dist_sum = optimize_coordinate(mol_coords)
# get mindist to protein
min_dist = np.min(np.linalg.norm(mol_coords - new_coord, axis=1))
# decoy coord
decoy_coord, _ = optimize_decoy_coordinate(mol_coords)
decoy_min_dist = np.min(np.linalg.norm(mol_coords - decoy_coord, axis=1))
# save protein
output_file = "test_out.pdb"
set_protein_to_new_coord_plus_decoy_atom(input_protein, new_coord, decoy_coord, output_file)
# return an output pdb file with the protein and ligand with resname LIG or UNK.
# also return any metrics you want to log, metrics will not be used for evaluation but might be useful for users
metrics = {"min_dist": min_dist, "min_dist_sum": min_dist_sum, "decoy_min_dist": decoy_min_dist}
end_time = time.time()
run_time = end_time - start_time
return ["test_out.pdb", "test_docking_pose.sdf"], metrics, run_time
with gr.Blocks() as app:
gr.Markdown("# Template for inference")
gr.Markdown("Title, description, and other information about the model")
with gr.Row():
input_sequence = gr.Textbox(lines=3, label="Input Protein sequence (FASTA)")
input_ligand = gr.Textbox(lines=3, label="Input ligand SMILES")
with gr.Row():
input_msa = gr.File(label="Input Protein MSA (A3M)")
input_protein = gr.File(label="Input protein monomer")
# define any options here
# for automated inference the default options are used
# slider_option = gr.Slider(0,10, label="Slider Option")
# checkbox_option = gr.Checkbox(label="Checkbox Option")
# dropdown_option = gr.Dropdown(["Option 1", "Option 2", "Option 3"], label="Radio Option")
btn = gr.Button("Run Inference")
gr.Examples(
[
[
"",
"COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O",
"empty_file.a3m",
"test_input.pdb"
],
],
[input_sequence, input_ligand, input_msa, input_protein],
)
reps = [
{
"model": 0,
"style": "sphere",
"color": "grayCarbon",
},
{
"model": 1,
"style": "stick",
"color": "greenCarbon",
}
]
out = Molecule3D(reps=reps)
metrics = gr.JSON(label="Metrics")
run_time = gr.Textbox(label="Runtime")
btn.click(predict, inputs=[input_sequence, input_ligand, input_msa, input_protein], outputs=[out, metrics, run_time])
app.launch()
|