|
from __future__ import annotations |
|
from pathlib import Path |
|
import time |
|
|
|
import gradio as gr |
|
|
|
from gradio_molecule3d import Molecule3D |
|
from gradio_molecule2d import molecule2d |
|
import numpy as np |
|
from rdkit import Chem |
|
from rdkit.Chem import AllChem |
|
import pandas as pd |
|
from biotite.structure import centroid, from_template |
|
from biotite.structure.io import load_structure |
|
from biotite.structure.io.mol import MOLFile, SDFile |
|
from biotite.structure.io.pdb import PDBFile |
|
|
|
from plinder.eval.docking.write_scores import evaluate |
|
|
|
|
|
EVAL_METRICS = ["system", "LDDT-PLI", "LDDT-LP", "BISY-RMSD"] |
|
|
|
EVAL_METRICS_PINDER = ["system","L_rms", "I_rms", "F_nat", "DOCKQ", "CAPRI_class"] |
|
|
|
|
|
import os |
|
|
|
from huggingface_hub import HfApi |
|
|
|
|
|
|
|
TOKEN = os.environ.get("HF_TOKEN") |
|
|
|
OWNER = "MLSB" |
|
|
|
|
|
REPO_ID = f"{OWNER}/leaderboard2024" |
|
QUEUE_REPO = f"{OWNER}/requests" |
|
RESULTS_REPO = f"{OWNER}/results" |
|
|
|
|
|
CACHE_PATH=os.getenv("HF_HOME", ".") |
|
|
|
|
|
EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue") |
|
EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results") |
|
EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk") |
|
EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk") |
|
|
|
API = HfApi(token=TOKEN) |
|
|
|
|
|
|
|
def get_metrics( |
|
system_id: str, |
|
receptor_file: Path, |
|
ligand_file: Path, |
|
flexible: bool = True, |
|
posebusters: bool = True, |
|
methodname: str = "", |
|
store:bool =True |
|
) -> tuple[pd.DataFrame, float]: |
|
start_time = time.time() |
|
metrics = pd.DataFrame( |
|
[ |
|
evaluate( |
|
model_system_id=system_id, |
|
reference_system_id=system_id, |
|
receptor_file=receptor_file, |
|
ligand_file_list=[Path(ligand_file)], |
|
flexible=flexible, |
|
posebusters=posebusters, |
|
posebusters_full=False, |
|
).get("LIG_0", {}) |
|
] |
|
) |
|
if posebusters: |
|
metrics["posebusters"] = metrics[ |
|
[col for col in metrics.columns if col.startswith("posebusters_")] |
|
].sum(axis=1) |
|
metrics["posebusters_valid"] = metrics[ |
|
[col for col in metrics.columns if col.startswith("posebusters_")] |
|
].sum(axis=1) == 20 |
|
columns = ["reference", "lddt_pli_ave", "lddt_lp_ave", "bisy_rmsd_ave"] |
|
if flexible: |
|
columns.extend(["lddt", "bb_lddt"]) |
|
if posebusters: |
|
columns.extend([col for col in metrics.columns if col.startswith("posebusters")]) |
|
|
|
metrics = metrics[columns].copy() |
|
mapping = { |
|
"lddt_pli_ave": "LDDT-PLI", |
|
"lddt_lp_ave": "LDDT-LP", |
|
"bisy_rmsd_ave": "BISY-RMSD", |
|
"reference": "system", |
|
} |
|
if flexible: |
|
mapping["lddt"] = "LDDT" |
|
mapping["bb_lddt"] = "Backbone LDDT" |
|
if posebusters: |
|
mapping["posebusters"] = "PoseBusters #checks" |
|
mapping["posebusters_valid"] = "PoseBusters valid" |
|
metrics.rename( |
|
columns=mapping, |
|
inplace=True, |
|
) |
|
if store: |
|
with tempfile.NamedTemporaryFile as temp: |
|
metrics.to_csv(temp.name) |
|
API.upload_file( |
|
path_or_fileobj=temp.name, |
|
path_in_repo=f"{dataset}/{methodname}/{system_id}/", |
|
repo_id=QUEUE_REPO, |
|
repo_type="dataset", |
|
commit_message=f"Add {model_name} to eval queue", |
|
) |
|
API.upload_file( |
|
path_or_fileobj=receptor_file.name, |
|
path_in_repo=f"{dataset}/{methodname}/{system_id}/", |
|
repo_id=QUEUE_REPO, |
|
repo_type="dataset", |
|
commit_message=f"Add {model_name} to eval queue", |
|
) |
|
API.upload_file( |
|
path_or_fileobj=ligand_file.name, |
|
path_in_repo=f"{dataset}/{methodname}/{system_id}/", |
|
repo_id=QUEUE_REPO, |
|
repo_type="dataset", |
|
commit_message=f"Add {model_name} to eval queue", |
|
) |
|
end_time = time.time() |
|
run_time = end_time - start_time |
|
return gr.DataFrame(metrics, visible=True), run_time |
|
|
|
|
|
|
|
def get_metrics_pinder( |
|
system_id: str, |
|
complex_file: Path, |
|
methodname: str = "", |
|
store:bool =True |
|
) -> tuple[pd.DataFrame, float]: |
|
start_time = time.time() |
|
|
|
if not isinstance(prediction, Path): |
|
prediction = Path(prediction) |
|
|
|
|
|
|
|
native = Path(f"./ground_truth/{system_id}.pdb") |
|
|
|
|
|
|
|
try: |
|
|
|
bdq = BiotiteDockQ(native, complex_file.name, parallel_io=False) |
|
metrics = bdq.calculate() |
|
metrics = metrics[["system", "LRMS", "iRMS", "Fnat", "DockQ", "CAPRI"]].copy() |
|
metrics.rename(columns={"LRMS": "L_rms", "iRMS": "I_rms", "Fnat": "F_nat", "DockQ": "DOCKQ", "CAPRI": "CAPRI_class"}, inplace=True) |
|
except Exception as e: |
|
failed_metrics = {"L_rms": 100.0, "I_rms": 100.0, "F_nat": 0.0, "DOCKQ": 0.0, "CAPRI_class": "Incorrect"} |
|
metrics = pd.DataFrame([failed_metrics]) |
|
metrics["system"] = native.stem |
|
gr.Error(f"Failed to evaluate prediction [{prediction}]:\n{e}") |
|
if store: |
|
|
|
with tempfile.NamedTemporaryFile as temp: |
|
metrics.to_csv(temp.name) |
|
API.upload_file( |
|
path_or_fileobj=temp.name, |
|
path_in_repo=f"{dataset}/{methodname}/{system_id}/", |
|
repo_id=QUEUE_REPO, |
|
repo_type="dataset", |
|
commit_message=f"Add {model_name} to eval queue", |
|
) |
|
API.upload_file( |
|
path_or_fileobj=complex_file.name, |
|
path_in_repo=f"{dataset}/{methodname}/{system_id}/", |
|
repo_id=QUEUE_REPO, |
|
repo_type="dataset", |
|
commit_message=f"Add {model_name} to eval queue", |
|
) |
|
end_time = time.time() |
|
run_time = end_time - start_time |
|
return gr.DataFrame(metrics, visible=True), run_time |
|
|
|
with gr.Blocks() as app: |
|
with gr.Tab("🧬 PINDER evaluation template"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
input_system_id_pinder = gr.Textbox(label="PINDER system ID") |
|
input_complex_pinder = gr.File(label="Receptor file") |
|
methodname_pinder = gr.Textbox(label="Name of your method in the format mlsb/spacename") |
|
store_pinder = gr.Checkbox(label="Store on huggingface for leaderboard", value=False) |
|
gr.Examples( |
|
[ |
|
[ |
|
"4neh__1__1.B__1.H", |
|
"input_protein_test.cif", |
|
"mlsb/test", |
|
False |
|
], |
|
], |
|
[input_system_id_pinder, input_complex_pinder, methodname_pinder, store_pinder], |
|
) |
|
eval_btn_pinder = gr.Button("Run Evaluation") |
|
|
|
|
|
|
|
|
|
with gr.Tab("⚖️ PLINDER evaluation template"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
input_system_id = gr.Textbox(label="PLINDER system ID") |
|
input_receptor_file = gr.File(label="Receptor file (CIF)") |
|
input_ligand_file = gr.File(label="Ligand file (SDF)") |
|
flexible = gr.Checkbox(label="Flexible docking", value=True) |
|
posebusters = gr.Checkbox(label="PoseBusters", value=True) |
|
methodname = gr.Textbox(label="Name of your method in the format mlsb/spacename") |
|
store = gr.Checkbox(label="Store on huggingface for leaderboard", value=False) |
|
gr.Examples( |
|
[ |
|
[ |
|
"4neh__1__1.B__1.H", |
|
"input_protein_test.cif", |
|
"input_ligand_test.sdf", |
|
True, |
|
True, |
|
"mlsb/test", |
|
False |
|
], |
|
], |
|
[input_system_id, input_receptor_file, input_ligand_file, flexible, posebusters, methodname, store], |
|
) |
|
eval_btn = gr.Button("Run Evaluation") |
|
|
|
eval_run_time = gr.Textbox(label="Evaluation runtime") |
|
metric_table = gr.DataFrame( |
|
pd.DataFrame([], columns=EVAL_METRICS), label="Evaluation metrics", visible=False |
|
) |
|
|
|
metric_table_pinder = gr.DataFrame( |
|
pd.DataFrame([], columns=EVAL_METRICS_PINDER), label="Evaluation metrics", visible=False |
|
) |
|
|
|
eval_btn.click( |
|
get_metrics, |
|
inputs=[input_system_id, input_receptor_file, input_ligand_file, flexible, posebusters, methodname, store], |
|
outputs=[metric_table, eval_run_time], |
|
) |
|
eval_btn_pinder.click( |
|
get_metrics_pinder, |
|
inputs=[input_system_id_pinder, input_complex_pinder, methodname_pinder, store_pinder], |
|
outputs=[metric_table_pinder, eval_run_time], |
|
) |
|
|
|
app.launch() |