File size: 6,894 Bytes
bebad14
 
 
dffaf30
 
bebad14
 
5e24610
c1225dc
5e24610
 
 
d35b73e
bebad14
28cb117
5e24610
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40a2fdf
c1225dc
 
 
 
 
 
 
ad31760
c1225dc
 
 
 
 
 
 
 
40a2fdf
d35b73e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a15a26
d35b73e
 
 
 
4a15a26
d35b73e
 
 
 
 
 
5b72455
d35b73e
 
 
 
 
48ff744
bebad14
5e24610
bebad14
c1225dc
5e24610
48ff744
 
 
40a2fdf
 
 
d35b73e
3f86c33
 
 
d35b73e
244f1ce
d35b73e
c1225dc
d35b73e
5e24610
d35b73e
5e24610
4853a01
dadbe2e
d35b73e
5e24610
bebad14
 
dadbe2e
bebad14
f624b87
bebad14
 
 
 
 
f354223
bebad14
c0df2f3
43105fe
f354223
 
bebad14
 
 
28cb117
bebad14
 
 
 
44470f9
28cb117
 
 
 
48ff744
28cb117
063629c
a8e9ffd
28cb117
 
5e24610
28cb117
 
 
 
296aa30
 
4853a01
 
c0df2f3
4853a01
 
28cb117
4853a01
28cb117
 
 
dadbe2e
bebad14
 
48ff744
dffaf30
bebad14
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197

import time

import gradio as gr

from gradio_molecule3d import Molecule3D

import numpy as np
from scipy.optimize import differential_evolution, NonlinearConstraint
from biotite.structure.io.pdb import PDBFile
from rdkit import Chem
from rdkit.Chem import AllChem
from biotite.structure import AtomArrayStack


def generate_input_conformer(
    ligand_smiles: str,
    addHs: bool = False,
    minimize_maxIters: int = -1,
) -> Chem.Mol:
    _mol = Chem.MolFromSmiles(ligand_smiles)
    # need to add Hs to generate sensible conformers
    _mol = Chem.AddHs(_mol)

    # try embedding molecule using ETKDGv2 (default)
    confid = AllChem.EmbedMolecule(
        _mol,
        useRandomCoords=True,
        useBasicKnowledge=True,
        maxAttempts=100,
        randomSeed=42,
    )
    if confid != -1:
        if minimize_maxIters > 0:
            # molecule successfully embedded - minimize
            success = AllChem.MMFFOptimizeMolecule(_mol, maxIters=minimize_maxIters)
            # 0 if the optimization converged,
            # -1 if the forcefield could not be set up,
            # 1 if more iterations are required.
            if success == 1:
                # extend optimization to double the steps (extends by the same amount)
                AllChem.MMFFOptimizeMolecule(_mol, maxIters=minimize_maxIters)
    else:
        # this means EmbedMolecule failed
        # try less optimal approach
        confid = AllChem.EmbedMolecule(
            _mol,
            useRandomCoords=True,
            useBasicKnowledge=False,
            maxAttempts=100,
            randomSeed=42,
        )
    return _mol


def optimize_coordinate(points, bound_buffer=15, dmin=6.02):
    bounds = list(
        zip(
            np.average(points, axis=0) - [bound_buffer]*3,
            np.average(points, axis=0) + [bound_buffer]*3
            )
        )
    # Define the constraint function (ensure dmin distance)
    con = NonlinearConstraint(lambda x: np.min(np.linalg.norm(points - x, axis=1)), dmin, 8)
    # Define the objective function (minimize pairwise distance)
    def objective(x):
        return np.sum(np.linalg.norm(points - x, axis=1))
    # Perform differential evolution to find the optimal coordinate
    result = differential_evolution(objective, bounds, constraints=con)
    return result.x, result.fun


def optimize_decoy_coordinate(points, bound_buffer=15, dmin=6.02, decoy_min=4.0, decoy_max=4.98):
    bounds = list(
        zip(
            np.average(points, axis=0) - [bound_buffer]*3,
            np.average(points, axis=0) + [bound_buffer]*3
            )
        )
    # Define the constraint function (ensure dmin distance for all but one atom)
    con1 = NonlinearConstraint(lambda x: np.sum(np.linalg.norm(points - x, axis=1) < dmin), 1, 1)
    con2 = NonlinearConstraint(lambda x: np.min(np.linalg.norm(points - x, axis=1)), decoy_min, decoy_max)
    # Define the objective function (maximize pairwise distance)
    def objective(x):
        return - np.sum(np.linalg.norm(points - x, axis=1))
    # Perform differential evolution to find the optimal coordinate
    result = differential_evolution(objective, bounds, constraints=(con1, con2))
    return result.x, result.fun


def add_decoy_atom(structure, decoy_pos):
    decoy = AtomArrayStack(length=1, depth=1)
    decoy.coord = np.ones_like(decoy.coord) * decoy_pos
    decoy.chain_id = ["q"]
    decoy.element = ["C"]
    decoy.atom_name = ["C"]
    decoy.res_name = ["GLY"]
    return structure + decoy


def set_protein_to_new_coord_plus_decoy_atom(input_pdb_file, new_coord, decoy_coord, output_file):
    structure = PDBFile.read(input_pdb_file).get_structure()
    structure.coord = np.ones_like(structure.coord) * np.array(new_coord)
    # add decoy 
    structure = add_decoy_atom(structure, decoy_coord)
    file = PDBFile()
    file.set_structure(structure)
    file.write(output_file)

    
def predict(input_sequence, input_ligand, input_msa, input_protein):
    start_time = time.time()
    
    # Do inference here
    mol = generate_input_conformer(input_ligand, minimize_maxIters=500)
    
    molwriter = Chem.SDWriter("test_docking_pose.sdf")
    molwriter.write(mol)

    # get only non hydrogen atoms
    heavy_atom_mask = [at.GetAtomicNum() != 1 for at in mol.GetAtoms()]
    mol_coords = mol.GetConformer().GetPositions()[heavy_atom_mask]
    # get opt coords
    new_coord, min_dist_sum = optimize_coordinate(mol_coords)
    # get mindist to protein
    min_dist = np.min(np.linalg.norm(mol_coords - new_coord, axis=1))
    # decoy coord
    decoy_coord, _ = optimize_decoy_coordinate(mol_coords)
    decoy_min_dist = np.min(np.linalg.norm(mol_coords - decoy_coord, axis=1))

    # save protein
    output_file = "test_out.pdb"
    set_protein_to_new_coord_plus_decoy_atom(input_protein, new_coord, decoy_coord, output_file)
    
    # return an output pdb file with the protein and ligand with resname LIG or UNK. 
    # also return any metrics you want to log, metrics will not be used for evaluation but might be useful for users
    metrics = {"min_dist": min_dist, "min_dist_sum": min_dist_sum, "decoy_min_dist": decoy_min_dist}
    
    end_time = time.time()
    run_time = end_time - start_time
    return ["test_out.pdb", "test_docking_pose.sdf"], metrics, run_time

with gr.Blocks() as app:

    gr.Markdown("# Template for inference")

    gr.Markdown("Title, description, and other information about the model")   
    with gr.Row():
        input_sequence = gr.Textbox(lines=3, label="Input Protein sequence (FASTA)")
        input_ligand = gr.Textbox(lines=3, label="Input ligand SMILES")
    with gr.Row():
        input_msa = gr.File(label="Input Protein MSA (A3M)")
        input_protein = gr.File(label="Input protein monomer")
        
    
    # define any options here

    # for automated inference the default options are used
    # slider_option = gr.Slider(0,10, label="Slider Option")
    # checkbox_option = gr.Checkbox(label="Checkbox Option")
    # dropdown_option = gr.Dropdown(["Option 1", "Option 2", "Option 3"], label="Radio Option")

    btn = gr.Button("Run Inference")

    gr.Examples(
        [
            [
                "",
                "COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O",
                "empty_file.a3m",
                "test_input.pdb"
            ],
        ],
        [input_sequence, input_ligand, input_msa, input_protein],
    )
    reps =    [
    {
      "model": 0,
      "style": "sphere",
      "color": "grayCarbon",
    },
        {
      "model": 1,
      "style": "stick",
      "color": "greenCarbon",
    }
        
  ]
    
    out = Molecule3D(reps=reps)
    metrics = gr.JSON(label="Metrics")
    run_time = gr.Textbox(label="Runtime")

    btn.click(predict, inputs=[input_sequence, input_ligand, input_msa, input_protein], outputs=[out, metrics, run_time])

app.launch()