|
from __future__ import annotations |
|
from pathlib import Path |
|
import time |
|
from biotite.application.autodock import VinaApp |
|
|
|
import gradio as gr |
|
|
|
from gradio_molecule3d import Molecule3D |
|
from gradio_molecule2d import molecule2d |
|
import numpy as np |
|
from rdkit import Chem |
|
from rdkit.Chem import AllChem |
|
import pandas as pd |
|
from biotite.structure import centroid, from_template |
|
from biotite.structure.io import load_structure |
|
from biotite.structure.io.mol import MOLFile, SDFile |
|
from biotite.structure.io.pdb import PDBFile |
|
|
|
from plinder.eval.docking.write_scores import evaluate |
|
|
|
|
|
EVAL_METRICS = ["system", "LDDT-PLI", "LDDT-LP", "BISY-RMSD"] |
|
|
|
|
|
def vina( |
|
ligand, receptor, pocket_center, output_folder: Path, size=10, max_num_poses=5 |
|
): |
|
app = VinaApp( |
|
ligand, |
|
receptor, |
|
center=pocket_center, |
|
size=[size, size, size], |
|
) |
|
app.set_max_number_of_models(max_num_poses) |
|
app.start() |
|
app.join() |
|
docked_ligand = from_template(ligand, app.get_ligand_coord()) |
|
docked_ligand = docked_ligand[..., ~np.isnan(docked_ligand.coord[0]).any(axis=-1)] |
|
output_files = [] |
|
for i in range(max_num_poses): |
|
sdf_file = MOLFile() |
|
sdf_file.set_structure(docked_ligand[i]) |
|
output_file = output_folder / f"docked_ligand_{i}.sdf" |
|
sdf_file.write(output_file) |
|
output_files.append(output_file) |
|
return output_files |
|
|
|
|
|
def predict( |
|
input_sequence: str, |
|
input_ligand: str, |
|
input_msa: gr.File | None = None, |
|
input_protein: gr.File | None = None, |
|
max_num_poses: int = 1, |
|
): |
|
""" |
|
Main prediction function that calls ligsite and smina |
|
Parameters |
|
---------- |
|
input_sequence: str |
|
monomer sequence |
|
input_ligand: str |
|
ligand as SMILES string |
|
input_msa: gradio.File | None |
|
Gradio file object to MSA a3m file |
|
input_protein: gradio.File | None |
|
Gradio file object to monomer protein structure as CIF file |
|
max_num_poses: int |
|
Number of poses to generate |
|
Returns |
|
------- |
|
output_structures: tuple |
|
(output_protein, output_ligand_sdf) |
|
run_time: float |
|
run time of the program |
|
""" |
|
start_time = time.time() |
|
|
|
if input_protein is None: |
|
raise gr.Error("need input_protein") |
|
print(input_protein) |
|
ligand_file = Path(input_protein).parent / "ligand.sdf" |
|
print(ligand_file) |
|
conformer = Chem.AddHs(Chem.MolFromSmiles(input_ligand)) |
|
AllChem.EmbedMolecule(conformer) |
|
AllChem.MMFFOptimizeMolecule(conformer) |
|
Chem.SDWriter(ligand_file).write(conformer) |
|
ligand = SDFile.read(ligand_file).record.get_structure() |
|
receptor = load_structure(input_protein, include_bonds=True) |
|
docking_poses = vina( |
|
ligand, |
|
receptor, |
|
centroid(receptor), |
|
Path(input_protein).parent, |
|
max_num_poses=max_num_poses, |
|
) |
|
end_time = time.time() |
|
run_time = end_time - start_time |
|
pdb_file = PDBFile() |
|
pdb_file.set_structure(receptor) |
|
output_pdb = Path(input_protein).parent / "receptor.pdb" |
|
pdb_file.write(output_pdb) |
|
return [str(output_pdb), str(docking_poses[0])], run_time |
|
|
|
|
|
def get_metrics( |
|
system_id: str, |
|
receptor_file: Path, |
|
ligand_file: Path, |
|
flexible: bool = True, |
|
posebusters: bool = True, |
|
) -> tuple[pd.DataFrame, float]: |
|
start_time = time.time() |
|
metrics = pd.DataFrame( |
|
[ |
|
evaluate( |
|
model_system_id=system_id, |
|
reference_system_id=system_id, |
|
receptor_file=receptor_file, |
|
ligand_file_list=[Path(ligand_file)], |
|
flexible=flexible, |
|
posebusters=posebusters, |
|
posebusters_full=False, |
|
).get("LIG_0", {}) |
|
] |
|
) |
|
if posebusters: |
|
metrics["posebusters"] = metrics[ |
|
[col for col in metrics.columns if col.startswith("posebusters_")] |
|
].sum(axis=1) |
|
metrics["posebusters_valid"] = metrics[ |
|
[col for col in metrics.columns if col.startswith("posebusters_")] |
|
].sum(axis=1) == 20 |
|
columns = ["reference", "lddt_pli_ave", "lddt_lp_ave", "bisy_rmsd_ave"] |
|
if flexible: |
|
columns.extend(["lddt", "bb_lddt"]) |
|
if posebusters: |
|
columns.extend([col for col in metrics.columns if col.startswith("posebusters")]) |
|
|
|
metrics = metrics[columns].copy() |
|
mapping = { |
|
"lddt_pli_ave": "LDDT-PLI", |
|
"lddt_lp_ave": "LDDT-LP", |
|
"bisy_rmsd_ave": "BISY-RMSD", |
|
"reference": "system", |
|
} |
|
if flexible: |
|
mapping["lddt"] = "LDDT" |
|
mapping["bb_lddt"] = "Backbone LDDT" |
|
if posebusters: |
|
mapping["posebusters"] = "PoseBusters #checks" |
|
mapping["posebusters_valid"] = "PoseBusters valid" |
|
metrics.rename( |
|
columns=mapping, |
|
inplace=True, |
|
) |
|
end_time = time.time() |
|
run_time = end_time - start_time |
|
return metrics, run_time |
|
|
|
|
|
with gr.Blocks() as app: |
|
with gr.Tab("🧬 PINDER evaluation template"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
input_system_id_pinder = gr.Textbox(label="PINDER system ID") |
|
input_receptor_file_pinder = gr.File(label="Receptor file") |
|
input_ligand_file_pinder = gr.File(label="Ligand file") |
|
methodname_pinder = gr.Textbox(label="Name of your method in the format mlsb/spacename") |
|
store_pinder = gr.Checkbox(label="Store on huggingface for leaderboard", value=False) |
|
eval_btn_pinder = gr.Button("Run Evaluation") |
|
|
|
|
|
|
|
|
|
with gr.Tab("⚖️ PLINDER evaluation template"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
input_system_id = gr.Textbox(label="PLINDER system ID") |
|
input_receptor_file = gr.File(label="Receptor file (CIF)") |
|
input_ligand_file = gr.File(label="Ligand file (SDF)") |
|
flexible = gr.Checkbox(label="Flexible docking", value=True) |
|
posebusters = gr.Checkbox(label="PoseBusters", value=True) |
|
methodname = gr.Textbox(label="Name of your method in the format mlsb/spacename") |
|
store = gr.Checkbox(label="Store on huggingface for leaderboard", value=False) |
|
|
|
eval_btn = gr.Button("Run Evaluation") |
|
gr.Examples( |
|
[ |
|
[ |
|
"4neh__1__1.B__1.H", |
|
"input_protein_test.cif", |
|
"input_ligand_test.sdf", |
|
True, |
|
True, |
|
], |
|
], |
|
[input_system_id, input_receptor_file, input_ligand_file, flexible, posebusters, methodname, store], |
|
) |
|
eval_run_time = gr.Textbox(label="Evaluation runtime") |
|
metric_table = gr.DataFrame( |
|
pd.DataFrame([], columns=EVAL_METRICS), label="Evaluation metrics" |
|
) |
|
|
|
metric_table_pinder = gr.DataFrame( |
|
pd.DataFrame([], columns=EVAL_METRICS_PINDER), label="Evaluation metrics" |
|
) |
|
|
|
eval_btn.click( |
|
get_metrics, |
|
inputs=[input_system_id, input_receptor_file, input_ligand_file, flexible, posebusters], |
|
outputs=[metric_table, eval_run_time], |
|
) |
|
eval_btn_pinder.click( |
|
get_metrics_pinder, |
|
inputs=[input_system_id_pinder, input_receptor_file_pinder, input_ligand_file_pinder, methodname_pinder, store_pinder], |
|
outputs=[metric_table_pinder, eval_run_time], |
|
) |
|
|
|
app.launch() |