Spaces:
Sleeping
Sleeping
Update inference_app.py
Browse files- inference_app.py +46 -12
inference_app.py
CHANGED
@@ -10,6 +10,7 @@ from scipy.optimize import differential_evolution, NonlinearConstraint
|
|
10 |
from biotite.structure.io.pdb import PDBFile
|
11 |
from rdkit import Chem
|
12 |
from rdkit.Chem import AllChem
|
|
|
13 |
|
14 |
|
15 |
def generate_input_conformer(
|
@@ -53,14 +54,6 @@ def generate_input_conformer(
|
|
53 |
return _mol
|
54 |
|
55 |
|
56 |
-
def set_protein_to_new_coord(input_pdb_file, new_coord, output_file):
|
57 |
-
structure = PDBFile.read(input_pdb_file).get_structure()
|
58 |
-
structure.coord = np.ones_like(structure.coord) * np.array(new_coord)
|
59 |
-
file = PDBFile()
|
60 |
-
file.set_structure(structure)
|
61 |
-
file.write(output_file)
|
62 |
-
|
63 |
-
|
64 |
def optimize_coordinate(points, bound_buffer=15, dmin=6.05):
|
65 |
|
66 |
bounds = list(
|
@@ -83,6 +76,44 @@ def optimize_coordinate(points, bound_buffer=15, dmin=6.05):
|
|
83 |
return result.x, result.fun
|
84 |
|
85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
def predict(input_sequence, input_ligand, input_msa, input_protein):
|
87 |
start_time = time.time()
|
88 |
|
@@ -93,18 +124,21 @@ def predict(input_sequence, input_ligand, input_msa, input_protein):
|
|
93 |
molwriter.write(mol)
|
94 |
|
95 |
mol_coords = mol.GetConformer().GetPositions()
|
96 |
-
#
|
97 |
-
# new_coord = np.mean(mol_coords, axis=0) + [3.5, 3.5, 3.5]
|
98 |
new_coord, min_dist_sum = optimize_coordinate(mol_coords)
|
99 |
# get mindist to protein
|
100 |
min_dist = np.min(np.linalg.norm(mol_coords - new_coord, axis=1))
|
|
|
|
|
|
|
101 |
|
|
|
102 |
output_file = "test_out.pdb"
|
103 |
-
|
104 |
|
105 |
# return an output pdb file with the protein and ligand with resname LIG or UNK.
|
106 |
# also return any metrics you want to log, metrics will not be used for evaluation but might be useful for users
|
107 |
-
metrics = {"min_dist": min_dist, "min_dist_sum": min_dist_sum}
|
108 |
|
109 |
end_time = time.time()
|
110 |
run_time = end_time - start_time
|
|
|
10 |
from biotite.structure.io.pdb import PDBFile
|
11 |
from rdkit import Chem
|
12 |
from rdkit.Chem import AllChem
|
13 |
+
from biotite.structure import AtomArrayStack
|
14 |
|
15 |
|
16 |
def generate_input_conformer(
|
|
|
54 |
return _mol
|
55 |
|
56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
def optimize_coordinate(points, bound_buffer=15, dmin=6.05):
|
58 |
|
59 |
bounds = list(
|
|
|
76 |
return result.x, result.fun
|
77 |
|
78 |
|
79 |
+
def optimize_decoy_coordinate(points, bound_buffer=15, dmin=6.05, decoy_min=4.0, decoy_max=4.99):
|
80 |
+
bounds = list(
|
81 |
+
zip(
|
82 |
+
np.average(points, axis=0) - [bound_buffer]*3,
|
83 |
+
np.average(points, axis=0) + [bound_buffer]*3
|
84 |
+
)
|
85 |
+
)
|
86 |
+
# Define the constraint function (ensure dmin distance for all but one atom)
|
87 |
+
con1 = NonlinearConstraint(lambda x: np.sum(np.linalg.norm(points - x, axis=1) < dmin), 1, 1)
|
88 |
+
con2 = NonlinearConstraint(lambda x: np.min(np.linalg.norm(points - x, axis=1)), decoy_min, decoy_max)
|
89 |
+
# Define the objective function (maximize pairwise distance)
|
90 |
+
def objective(x):
|
91 |
+
return - np.sum(np.linalg.norm(points - x, axis=1))
|
92 |
+
# Perform differential evolution to find the optimal coordinate
|
93 |
+
result = differential_evolution(objective, bounds, constraints=(con1, con2))
|
94 |
+
return result.x, result.fun
|
95 |
+
|
96 |
+
|
97 |
+
def add_decoy_atom(structure, decoy_pos):
|
98 |
+
decoy = AtomArrayStack(length=1, depth=1)
|
99 |
+
decoy.coord = np.ones_like(struct.coord) * decoy_pos
|
100 |
+
decoy.chain_id = ["q"]
|
101 |
+
decoy.element = ["C"]
|
102 |
+
decoy.atom_name = ["C"]
|
103 |
+
decoy.res_name = ["GLY"]
|
104 |
+
return structure + struct
|
105 |
+
|
106 |
+
|
107 |
+
def set_protein_to_new_coord_plus_decoy_atom(input_pdb_file, new_coord, decoy_coord, output_file):
|
108 |
+
structure = PDBFile.read(input_pdb_file).get_structure()
|
109 |
+
structure.coord = np.ones_like(structure.coord) * np.array(new_coord)
|
110 |
+
# add decoy
|
111 |
+
structure = add_decoy_atom(structure, decoy_pos)
|
112 |
+
file = PDBFile()
|
113 |
+
file.set_structure(structure)
|
114 |
+
file.write(output_file)
|
115 |
+
|
116 |
+
|
117 |
def predict(input_sequence, input_ligand, input_msa, input_protein):
|
118 |
start_time = time.time()
|
119 |
|
|
|
124 |
molwriter.write(mol)
|
125 |
|
126 |
mol_coords = mol.GetConformer().GetPositions()
|
127 |
+
# get opt coords
|
|
|
128 |
new_coord, min_dist_sum = optimize_coordinate(mol_coords)
|
129 |
# get mindist to protein
|
130 |
min_dist = np.min(np.linalg.norm(mol_coords - new_coord, axis=1))
|
131 |
+
# decoy coord
|
132 |
+
decoy_coord = optimize_decoy_coordinate(mol_coords)
|
133 |
+
decoy_min_dist = np.min(np.linalg.norm(mol_coords - decoy_coord, axis=1))
|
134 |
|
135 |
+
# save protein
|
136 |
output_file = "test_out.pdb"
|
137 |
+
set_protein_to_new_coord_plus_decoy_atom(input_protein, new_coord, decoy_coord, output_file)
|
138 |
|
139 |
# return an output pdb file with the protein and ligand with resname LIG or UNK.
|
140 |
# also return any metrics you want to log, metrics will not be used for evaluation but might be useful for users
|
141 |
+
metrics = {"min_dist": min_dist, "min_dist_sum": min_dist_sum, "decoy_min_dist": decoy_min_dist}
|
142 |
|
143 |
end_time = time.time()
|
144 |
run_time = end_time - start_time
|