OleinikovasV commited on
Commit
5e24610
·
verified ·
1 Parent(s): 7009d49

Update inference_app.py

Browse files
Files changed (1) hide show
  1. inference_app.py +71 -5
inference_app.py CHANGED
@@ -5,15 +5,80 @@ import gradio as gr
5
 
6
  from gradio_molecule3d import Molecule3D
7
 
 
 
 
 
8
 
9
 
 
 
 
 
 
10
 
11
- def predict (input_sequence, input_ligand,input_msa, input_protein):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  start_time = time.time()
 
13
  # Do inference here
 
 
 
 
 
 
 
 
 
 
14
  # return an output pdb file with the protein and ligand with resname LIG or UNK.
15
  # also return any metrics you want to log, metrics will not be used for evaluation but might be useful for users
16
- metrics = {"mean_plddt": 80, "binding_affinity": -2}
 
 
17
  end_time = time.time()
18
  run_time = end_time - start_time
19
  return ["test_out.pdb", "test_docking_pose.sdf"], metrics, run_time
@@ -43,12 +108,13 @@ with gr.Blocks() as app:
43
  gr.Examples(
44
  [
45
  [
46
- "SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL:SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL",
47
  "COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O",
 
48
  "test_out.pdb"
49
  ],
50
  ],
51
- [input_sequence, input_ligand, input_protein],
52
  )
53
  reps = [
54
  {
@@ -68,6 +134,6 @@ with gr.Blocks() as app:
68
  metrics = gr.JSON(label="Metrics")
69
  run_time = gr.Textbox(label="Runtime")
70
 
71
- btn.click(predict, inputs=[input_sequence, input_ligand, input_msa, input_protein], outputs=[out,metrics, run_time])
72
 
73
  app.launch()
 
5
 
6
  from gradio_molecule3d import Molecule3D
7
 
8
+ import numpy as np
9
+ from biotite.structure.io.pdb import PDBFile
10
+ from rdkit import Chem
11
+ from rdkit.Chem import AllChem
12
 
13
 
14
+ def generate_input_conformer(
15
+ ligand_smiles: str,
16
+ addHs: bool = False,
17
+ minimize_maxIters: int = -1,
18
+ ) -> Chem.Mol:
19
 
20
+ _mol = Chem.MolFromSmiles(ligand_smiles)
21
+ # need to add Hs to generate sensible conformers
22
+ _mol = Chem.AddHs(_mol)
23
+
24
+ # try embedding molecule using ETKDGv2 (default)
25
+ confid = AllChem.EmbedMolecule(
26
+ _mol,
27
+ useRandomCoords=True,
28
+ useBasicKnowledge=True,
29
+ maxAttempts=100,
30
+ randomSeed=42,
31
+ )
32
+ if confid != -1:
33
+ if minimize_maxIters > 0:
34
+ # molecule successfully embedded - minimize
35
+ success = AllChem.MMFFOptimizeMolecule(_mol, maxIters=minimize_maxIters)
36
+ # 0 if the optimization converged,
37
+ # -1 if the forcefield could not be set up,
38
+ # 1 if more iterations are required.
39
+ if success == 1:
40
+ # extend optimization to double the steps (extends by the same amount)
41
+ AllChem.MMFFOptimizeMolecule(_mol, maxIters=minimize_maxIters)
42
+ else:
43
+ # this means EmbedMolecule failed
44
+ # try less optimal approach
45
+ confid = AllChem.EmbedMolecule(
46
+ _mol,
47
+ useRandomCoords=True,
48
+ useBasicKnowledge=False,
49
+ maxAttempts=100,
50
+ randomSeed=42,
51
+ )
52
+ return _mol
53
+
54
+
55
+ def set_protein_to_new_coord(input_pdb_file, new_coord, output_file):
56
+ structure = PDBFile.read(input_pdb_file).get_structure()
57
+ structure.coord = np.array([new_coord] * len(structure.coord))
58
+ file = PDBFile()
59
+ file.set_structure(structure)
60
+ file.write(output_file)
61
+
62
+
63
+ def predict (input_sequence, input_ligand, input_msa, input_protein):
64
  start_time = time.time()
65
+
66
  # Do inference here
67
+ mol = generate_input_conformer(input_ligand, minimize_maxIters=100)
68
+ with Chem.SDWriter("test_docking_pose.sdf") as writer:
69
+ writer.write(mol)
70
+ mol_coords = mol.GetConformer().GetPositions()
71
+
72
+ # new_coord = [0, 0, 0]
73
+ new_coord = np.mean(mol_coords, axis=1)
74
+ output_file = "test_out.pdb"
75
+ set_protein_to_new_coord(input_protein, new_coord, output_file)
76
+
77
  # return an output pdb file with the protein and ligand with resname LIG or UNK.
78
  # also return any metrics you want to log, metrics will not be used for evaluation but might be useful for users
79
+ # metrics = {"mean_plddt": 80, "binding_affinity": -2}
80
+ metrics = {}
81
+
82
  end_time = time.time()
83
  run_time = end_time - start_time
84
  return ["test_out.pdb", "test_docking_pose.sdf"], metrics, run_time
 
108
  gr.Examples(
109
  [
110
  [
111
+ None,
112
  "COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O",
113
+ None,
114
  "test_out.pdb"
115
  ],
116
  ],
117
+ [input_sequence, input_ligand, input_msa, input_protein],
118
  )
119
  reps = [
120
  {
 
134
  metrics = gr.JSON(label="Metrics")
135
  run_time = gr.Textbox(label="Runtime")
136
 
137
+ btn.click(predict, inputs=[input_sequence, input_ligand, input_protein], outputs=[out,metrics, run_time])
138
 
139
  app.launch()