OleinikovasV commited on
Commit
d35b73e
·
verified ·
1 Parent(s): 3f86c33

Update inference_app.py

Browse files
Files changed (1) hide show
  1. inference_app.py +46 -12
inference_app.py CHANGED
@@ -10,6 +10,7 @@ from scipy.optimize import differential_evolution, NonlinearConstraint
10
  from biotite.structure.io.pdb import PDBFile
11
  from rdkit import Chem
12
  from rdkit.Chem import AllChem
 
13
 
14
 
15
  def generate_input_conformer(
@@ -53,14 +54,6 @@ def generate_input_conformer(
53
  return _mol
54
 
55
 
56
- def set_protein_to_new_coord(input_pdb_file, new_coord, output_file):
57
- structure = PDBFile.read(input_pdb_file).get_structure()
58
- structure.coord = np.ones_like(structure.coord) * np.array(new_coord)
59
- file = PDBFile()
60
- file.set_structure(structure)
61
- file.write(output_file)
62
-
63
-
64
  def optimize_coordinate(points, bound_buffer=15, dmin=6.05):
65
 
66
  bounds = list(
@@ -83,6 +76,44 @@ def optimize_coordinate(points, bound_buffer=15, dmin=6.05):
83
  return result.x, result.fun
84
 
85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  def predict(input_sequence, input_ligand, input_msa, input_protein):
87
  start_time = time.time()
88
 
@@ -93,18 +124,21 @@ def predict(input_sequence, input_ligand, input_msa, input_protein):
93
  molwriter.write(mol)
94
 
95
  mol_coords = mol.GetConformer().GetPositions()
96
- # new_coord = [0, 0, 0]
97
- # new_coord = np.mean(mol_coords, axis=0) + [3.5, 3.5, 3.5]
98
  new_coord, min_dist_sum = optimize_coordinate(mol_coords)
99
  # get mindist to protein
100
  min_dist = np.min(np.linalg.norm(mol_coords - new_coord, axis=1))
 
 
 
101
 
 
102
  output_file = "test_out.pdb"
103
- set_protein_to_new_coord(input_protein, new_coord, output_file)
104
 
105
  # return an output pdb file with the protein and ligand with resname LIG or UNK.
106
  # also return any metrics you want to log, metrics will not be used for evaluation but might be useful for users
107
- metrics = {"min_dist": min_dist, "min_dist_sum": min_dist_sum}
108
 
109
  end_time = time.time()
110
  run_time = end_time - start_time
 
10
  from biotite.structure.io.pdb import PDBFile
11
  from rdkit import Chem
12
  from rdkit.Chem import AllChem
13
+ from biotite.structure import AtomArrayStack
14
 
15
 
16
  def generate_input_conformer(
 
54
  return _mol
55
 
56
 
 
 
 
 
 
 
 
 
57
  def optimize_coordinate(points, bound_buffer=15, dmin=6.05):
58
 
59
  bounds = list(
 
76
  return result.x, result.fun
77
 
78
 
79
+ def optimize_decoy_coordinate(points, bound_buffer=15, dmin=6.05, decoy_min=4.0, decoy_max=4.99):
80
+ bounds = list(
81
+ zip(
82
+ np.average(points, axis=0) - [bound_buffer]*3,
83
+ np.average(points, axis=0) + [bound_buffer]*3
84
+ )
85
+ )
86
+ # Define the constraint function (ensure dmin distance for all but one atom)
87
+ con1 = NonlinearConstraint(lambda x: np.sum(np.linalg.norm(points - x, axis=1) < dmin), 1, 1)
88
+ con2 = NonlinearConstraint(lambda x: np.min(np.linalg.norm(points - x, axis=1)), decoy_min, decoy_max)
89
+ # Define the objective function (maximize pairwise distance)
90
+ def objective(x):
91
+ return - np.sum(np.linalg.norm(points - x, axis=1))
92
+ # Perform differential evolution to find the optimal coordinate
93
+ result = differential_evolution(objective, bounds, constraints=(con1, con2))
94
+ return result.x, result.fun
95
+
96
+
97
+ def add_decoy_atom(structure, decoy_pos):
98
+ decoy = AtomArrayStack(length=1, depth=1)
99
+ decoy.coord = np.ones_like(struct.coord) * decoy_pos
100
+ decoy.chain_id = ["q"]
101
+ decoy.element = ["C"]
102
+ decoy.atom_name = ["C"]
103
+ decoy.res_name = ["GLY"]
104
+ return structure + struct
105
+
106
+
107
+ def set_protein_to_new_coord_plus_decoy_atom(input_pdb_file, new_coord, decoy_coord, output_file):
108
+ structure = PDBFile.read(input_pdb_file).get_structure()
109
+ structure.coord = np.ones_like(structure.coord) * np.array(new_coord)
110
+ # add decoy
111
+ structure = add_decoy_atom(structure, decoy_pos)
112
+ file = PDBFile()
113
+ file.set_structure(structure)
114
+ file.write(output_file)
115
+
116
+
117
  def predict(input_sequence, input_ligand, input_msa, input_protein):
118
  start_time = time.time()
119
 
 
124
  molwriter.write(mol)
125
 
126
  mol_coords = mol.GetConformer().GetPositions()
127
+ # get opt coords
 
128
  new_coord, min_dist_sum = optimize_coordinate(mol_coords)
129
  # get mindist to protein
130
  min_dist = np.min(np.linalg.norm(mol_coords - new_coord, axis=1))
131
+ # decoy coord
132
+ decoy_coord = optimize_decoy_coordinate(mol_coords)
133
+ decoy_min_dist = np.min(np.linalg.norm(mol_coords - decoy_coord, axis=1))
134
 
135
+ # save protein
136
  output_file = "test_out.pdb"
137
+ set_protein_to_new_coord_plus_decoy_atom(input_protein, new_coord, decoy_coord, output_file)
138
 
139
  # return an output pdb file with the protein and ligand with resname LIG or UNK.
140
  # also return any metrics you want to log, metrics will not be used for evaluation but might be useful for users
141
+ metrics = {"min_dist": min_dist, "min_dist_sum": min_dist_sum, "decoy_min_dist": decoy_min_dist}
142
 
143
  end_time = time.time()
144
  run_time = end_time - start_time