DockFormer

Running

App Files Files Community

bshor commited on Mar 17

Commit

f1046e9

1 Parent(s): c271a8f

add all model variations

Browse files

Files changed (10) hide show

dockformer/config.py +9 -3
dockformer/data/data_pipeline.py +36 -21
dockformer/model/heads.py +29 -11
dockformer/model/model.py +2 -1
dockformer/utils/loss.py +23 -0
env_consts.py +6 -1
inference_app.py +10 -3
resources/run_config.json +1 -1
run_on_seq.py +2 -2
run_pretrained_model.py +17 -10

dockformer/config.py CHANGED Viewed

@@ -250,6 +250,9 @@ config = mlc.ConfigDict(
                     "c_s": c_s,
                     "num_bins": aux_affinity_bins,
                 },
                 "binding_site": {
                     "c_s": c_s,
                     "c_out": 1,
@@ -302,19 +305,22 @@ config = mlc.ConfigDict(
                 "min_bin": 0,
                 "max_bin": 15,
                 "no_bins": aux_affinity_bins,
-                "weight": 0.0,
             },
             "affinity1d": {
                 "min_bin": 0,
                 "max_bin": 15,
                 "no_bins": aux_affinity_bins,
-                "weight": 0.0,
             },
             "affinity_cls": {
                 "min_bin": 0,
                 "max_bin": 15,
                 "no_bins": aux_affinity_bins,
-                "weight": 0.0,
             },
             "fape_backbone": {
                 "clamp_distance": 10.0,

                     "c_s": c_s,
                     "num_bins": aux_affinity_bins,
                 },
+                "affinity_cls_reg": {
+                    "c_s": c_s,
+                },
                 "binding_site": {
                     "c_s": c_s,
                     "c_out": 1,
                 "min_bin": 0,
                 "max_bin": 15,
                 "no_bins": aux_affinity_bins,
+                "weight": 0.03,
             },
             "affinity1d": {
                 "min_bin": 0,
                 "max_bin": 15,
                 "no_bins": aux_affinity_bins,
+                "weight": 0.03,
             },
             "affinity_cls": {
                 "min_bin": 0,
                 "max_bin": 15,
                 "no_bins": aux_affinity_bins,
+                "weight": 0.03,
+            },
+            "affinity_cls_reg": {
+                "weight": 0.03,
             },
             "fape_backbone": {
                 "clamp_distance": 10.0,

dockformer/data/data_pipeline.py CHANGED Viewed

@@ -101,6 +101,18 @@ def _apply_protein_probablistic_transforms(tensors: FeatureTensorDict, cfg: mlc.
     return tensors
 class DataPipeline:
     """Assembles input features."""
     def __init__(self, config: mlc.ConfigDict, mode: str):
@@ -200,37 +212,40 @@ class DataPipeline:
                 raise ValueError(f"Unknown key in sdf list features {k}")
         return joined_ligand_feats
     def get_matching_positions_list(self, ref_path_list: List[str], gt_path_list: List[str]):
         joined_gt_positions = []
         for ref_ligand_path, gt_ligand_path in zip(ref_path_list, gt_path_list):
-            ref_ligand = Chem.MolFromMolFile(ref_ligand_path)
-            gt_ligand = Chem.MolFromMolFile(gt_ligand_path)
-            gt_original_positions = gt_ligand.GetConformer(0).GetPositions()
-            gt_positions = [gt_original_positions[idx] for idx in gt_ligand.GetSubstructMatch(ref_ligand)]
             joined_gt_positions.extend(gt_positions)
         return torch.tensor(np.array(joined_gt_positions)).float()
     def get_matching_positions(self, ref_ligand_path: str, gt_ligand_path: str):
-        ref_ligand = Chem.MolFromMolFile(ref_ligand_path)
-        gt_ligand = Chem.MolFromMolFile(gt_ligand_path)
-        gt_original_positions = gt_ligand.GetConformer(0).GetPositions()
-        gt_positions = [gt_original_positions[idx] for idx in gt_ligand.GetSubstructMatch(ref_ligand)]
-        # ref_positions = ref_ligand.GetConformer(0).GetPositions()
-        # for i in range(len(ref_positions)):
-        #     for j in range(i + 1, len(ref_positions)):
-        #         dist_ref = np.linalg.norm(ref_positions[i] - ref_positions[j])
-        #         dist_gt = np.linalg.norm(gt_positions[i] - gt_positions[j])
-        #         dist_gt = np.linalg.norm(gt_original_positions[i] - gt_original_positions[j])
-        #         if abs(dist_ref - dist_gt) > 1.0:
-        #             print(f"Distance mismatch {i} {j} {dist_ref} {dist_gt}")
         return torch.tensor(np.array(gt_positions)) .float()

     return tensors
+def get_psuedo_beta(pdb_path: str) -> torch.Tensor:
+    """Get pseudo beta positions for a protein."""
+    with open(pdb_path, 'r') as f:
+        pdb_str = f.read()
+    protein_object = protein.from_pdb_string(pdb_str)
+    pdb_feats = make_protein_features(protein_object, "")
+    tensor_feats = _np_filter_and_to_tensor_dict(pdb_feats, ["aatype", "all_atom_positions", "all_atom_mask"])
+    pdb_feats = _apply_protein_transforms(tensor_feats)
+    return pdb_feats["pseudo_beta"]
 class DataPipeline:
     """Assembles input features."""
     def __init__(self, config: mlc.ConfigDict, mode: str):
                 raise ValueError(f"Unknown key in sdf list features {k}")
         return joined_ligand_feats
+    @staticmethod
+    def _get_gt_positions(ref_ligand_path: str, gt_ligand_path: str):
+        ref_ligand = Chem.MolFromMolFile(ref_ligand_path)
+        gt_ligand = Chem.MolFromMolFile(gt_ligand_path)
+        gt_original_positions = gt_ligand.GetConformer(0).GetPositions()
+        gt_positions = [gt_original_positions[idx] for idx in gt_ligand.GetSubstructMatch(ref_ligand)]
+        if len(gt_positions) == 0:
+            from rdkit.Chem import rdFMCS
+            mcs_result = rdFMCS.FindMCS([ref_ligand, gt_ligand])
+            if mcs_result.canceled:
+                print("MCS search canceled, Error!!!! Can't map ref ligand to gt ligand")
+                gt_positions = gt_original_positions
+            else:
+                mcs_mol = Chem.MolFromSmarts(mcs_result.smartsString)
+                ref_match = ref_ligand.GetSubstructMatch(mcs_mol)
+                gt_match = gt_ligand.GetSubstructMatch(mcs_mol)
+                ref_to_gt_atom = {ref_idx: gt_idx for ref_idx, gt_idx in zip(ref_match, gt_match)}
+                gt_positions = [gt_original_positions[ref_to_gt_atom[i]] for i in sorted(list(ref_to_gt_atom.keys()))]
+        return gt_positions
     def get_matching_positions_list(self, ref_path_list: List[str], gt_path_list: List[str]):
         joined_gt_positions = []
         for ref_ligand_path, gt_ligand_path in zip(ref_path_list, gt_path_list):
+            gt_positions = self.get_matching_positions(ref_ligand_path, gt_ligand_path)
             joined_gt_positions.extend(gt_positions)
         return torch.tensor(np.array(joined_gt_positions)).float()
     def get_matching_positions(self, ref_ligand_path: str, gt_ligand_path: str):
+        gt_positions = self._get_gt_positions(ref_ligand_path, gt_ligand_path)
         return torch.tensor(np.array(gt_positions)) .float()

dockformer/model/heads.py CHANGED Viewed

@@ -50,6 +50,10 @@ class AuxiliaryHeads(nn.Module):
             **config["affinity_cls"],
         )
         self.binding_site = BindingSitePredictor(
             **config["binding_site"],
         )
@@ -60,7 +64,7 @@ class AuxiliaryHeads(nn.Module):
         self.config = config
-    def forward(self, outputs, inter_mask, affinity_mask):
         aux_out = {}
         lddt_logits = self.plddt(outputs["sm"]["single"])
         aux_out["lddt_logits"] = lddt_logits
@@ -75,10 +79,12 @@ class AuxiliaryHeads(nn.Module):
         aux_out["affinity_2d_logits"] = self.affinity_2d(outputs["pair"], aux_out["inter_contact_logits"], inter_mask)
-        aux_out["affinity_1d_logits"] = self.affinity_1d(outputs["single"])
         aux_out["affinity_cls_logits"] = self.affinity_cls(outputs["single"], affinity_mask)
         aux_out["binding_site_logits"] = self.binding_site(outputs["single"])
         return aux_out
@@ -120,18 +126,14 @@ class Affinity1DPredictor(nn.Module):
         self.c_s = c_s
         self.linear1 = Linear(self.c_s, self.c_s, init="final")
-        self.linear2 = Linear(self.c_s, num_bins, init="final")
-    def forward(self, s):
         # [*, N, C_out]
-        s = self.linear1(s)
-        # get an average over the sequence
-        s = torch.mean(s, dim=1)
-        logits = self.linear2(s)
-        return logits
 class AffinityClsTokenPredictor(nn.Module):
@@ -146,6 +148,22 @@ class AffinityClsTokenPredictor(nn.Module):
         return self.linear(affinity_tokens)
 class BindingSitePredictor(nn.Module):
     def __init__(self, c_s, c_out, **kwargs):
         super(BindingSitePredictor, self).__init__()

             **config["affinity_cls"],
         )
+        self.affinity_cls_reg = AffinityClsTokenPredictorRegression(
+            **config["affinity_cls_reg"],
+        )
         self.binding_site = BindingSitePredictor(
             **config["binding_site"],
         )
         self.config = config
+    def forward(self, outputs, inter_mask, affinity_mask, ligand_mask):
         aux_out = {}
         lddt_logits = self.plddt(outputs["sm"]["single"])
         aux_out["lddt_logits"] = lddt_logits
         aux_out["affinity_2d_logits"] = self.affinity_2d(outputs["pair"], aux_out["inter_contact_logits"], inter_mask)
+        aux_out["affinity_1d_logits"] = self.affinity_1d(outputs["single"], ligand_mask)
         aux_out["affinity_cls_logits"] = self.affinity_cls(outputs["single"], affinity_mask)
+        aux_out["affinity_cls_reg_logits"] = self.affinity_cls_reg(outputs["single"], affinity_mask)
         aux_out["binding_site_logits"] = self.binding_site(outputs["single"])
         return aux_out
         self.c_s = c_s
         self.linear1 = Linear(self.c_s, self.c_s, init="final")
+        self.out = Linear(self.c_s, num_bins, init="final")
+    def forward(self, s, ligand_mask):
         # [*, N, C_out]
+        s = nn.functional.relu(self.linear1(s))
+        mean_of_ligand = (s * ligand_mask.unsqueeze(-1)).sum(dim=1) / ligand_mask.sum(dim=1).unsqueeze(-1)
+        return self.out(mean_of_ligand)
 class AffinityClsTokenPredictor(nn.Module):
         return self.linear(affinity_tokens)
+class AffinityClsTokenPredictorRegression(nn.Module):
+    def __init__(self, c_s, **kwargs):
+        super(AffinityClsTokenPredictorRegression, self).__init__()
+        self.c_s = c_s
+        self.fc1 = nn.Linear(self.c_s, self.c_s)
+        self.fc2 = nn.Linear(self.c_s, self.c_s)
+        self.out = nn.Linear(self.c_s, 1)
+    def forward(self, s, affinity_mask):
+        affinity_tokens = (s * affinity_mask.unsqueeze(-1)).sum(dim=1)
+        x = nn.functional.relu(self.fc1(affinity_tokens))
+        x = nn.functional.relu(self.fc2(x))
+        return self.out(x)
 class BindingSitePredictor(nn.Module):
     def __init__(self, c_s, c_out, **kwargs):
         super(BindingSitePredictor, self).__init__()

dockformer/model/model.py CHANGED Viewed

@@ -313,6 +313,7 @@ class AlphaFold(nn.Module):
         outputs["num_recycles"] = torch.tensor(num_recycles, device=feats["aatype"].device)
         # Run auxiliary heads, remove the recycling dimension batch properties
-        outputs.update(self.aux_heads(outputs, batch["inter_pair_mask"][..., 0], batch["affinity_mask"][..., 0]))
         return outputs

         outputs["num_recycles"] = torch.tensor(num_recycles, device=feats["aatype"].device)
         # Run auxiliary heads, remove the recycling dimension batch properties
+        outputs.update(self.aux_heads(outputs, batch["inter_pair_mask"][..., 0], batch["affinity_mask"][..., 0],
+                                      batch["ligand_mask"][..., 0]))
         return outputs

dockformer/utils/loss.py CHANGED Viewed

@@ -670,6 +670,25 @@ def affinity_loss(
     # print("after factor", after_factor.shape, after_factor, affinity_loss_factor.sum(), mean_val)
     return mean_val
 def positions_inter_distogram_loss(
     out,
@@ -1085,6 +1104,10 @@ class AlphaFoldLoss(nn.Module):
                 logits=out["affinity_cls_logits"],
                 **{**batch, **self.config.affinity_cls},
             ),
             "binding_site": lambda: binding_site_loss(
                 logits=out["binding_site_logits"],
                 **{**batch, **self.config.binding_site},

     # print("after factor", after_factor.shape, after_factor, affinity_loss_factor.sum(), mean_val)
     return mean_val
+def affinity_loss_reg(
+    logits,
+    affinity,
+    affinity_loss_factor,
+    **kwargs,
+):
+    # apply mse loss
+    errors = torch.nn.functional.mse_loss(logits, affinity, reduction='none')
+    # print("errors dim", errors.shape, affinity_loss_factor.shape, errors)
+    after_factor = errors * affinity_loss_factor.squeeze()
+    if affinity_loss_factor.sum() > 0.1:
+        mean_val = after_factor.sum() / affinity_loss_factor.sum()
+    else:
+        # If no affinity in batch - get a very small loss. the factor also makes the loss small
+        mean_val = after_factor.sum() * 1e-3
+    # print("after factor", after_factor.shape, after_factor, affinity_loss_factor.sum(), mean_val)
+    return mean_val
 def positions_inter_distogram_loss(
     out,
                 logits=out["affinity_cls_logits"],
                 **{**batch, **self.config.affinity_cls},
             ),
+            "affinity_cls_reg": lambda: affinity_loss_reg(
+                logits=out["affinity_cls_reg_logits"],
+                **{**batch, **self.config.affinity_cls_reg},
+            ),
             "binding_site": lambda: binding_site_loss(
                 logits=out["binding_site_logits"],
                 **{**batch, **self.config.binding_site},

env_consts.py CHANGED Viewed

@@ -3,7 +3,12 @@ import os
 TEST_INPUT_DIR = None
 TEST_OUTPUT_DIR = None
 THIS_FILE_DIR = os.path.dirname(os.path.abspath(__file__))
-CKPT_PATH = os.path.join(THIS_FILE_DIR, "resources", "only_weights_114-191750.ckpt")
 RUN_CONFIG_PATH = os.path.join(THIS_FILE_DIR, "resources", "run_config.json")
 OUTPUT_PROT_PATH = os.path.join(THIS_FILE_DIR, "predicted_protein_out.pdb")

 TEST_INPUT_DIR = None
 TEST_OUTPUT_DIR = None
 THIS_FILE_DIR = os.path.dirname(os.path.abspath(__file__))
+MODEL_NAME_TO_CKPT = {
+    "DockFormer-Screen": os.path.join(THIS_FILE_DIR, "resources", "dockformer_screen_102-110250.ckpt"),
+    "DockFormer-PDBBind": os.path.join(THIS_FILE_DIR, "resources", "dockformer_pdbbind_95-108500.ckpt"),
+    "DockFormer-PLINDER": os.path.join(THIS_FILE_DIR, "resources", "dockformer_plinder_132-98000.ckpt"),
+}
 RUN_CONFIG_PATH = os.path.join(THIS_FILE_DIR, "resources", "run_config.json")
 OUTPUT_PROT_PATH = os.path.join(THIS_FILE_DIR, "predicted_protein_out.pdb")

inference_app.py CHANGED Viewed

@@ -4,16 +4,17 @@ import gradio as gr
 from gradio_molecule3d import Molecule3D
 from run_on_seq import run_on_sample_seqs
-from env_consts import RUN_CONFIG_PATH, OUTPUT_PROT_PATH, OUTPUT_LIG_PATH
-def predict(input_sequence, input_ligand, input_msa, input_protein):
     start_time = time.time()
     # Do inference here
     # return an output pdb file with the protein and ligand with resname LIG or UNK.
     # also return any metrics you want to log, metrics will not be used for evaluation but might be useful for users
     metrics = run_on_sample_seqs(input_sequence, input_protein, input_ligand, OUTPUT_PROT_PATH, OUTPUT_LIG_PATH,
-                                 RUN_CONFIG_PATH)
     end_time = time.time()
     run_time = end_time - start_time
@@ -23,6 +24,12 @@ def predict(input_sequence, input_ligand, input_msa, input_protein):
 with gr.Blocks() as app:
     gr.Markdown("DockFormer")
     # gr.Markdown("Title, description, and other information about the model")
     with gr.Row():
         input_sequence = gr.Textbox(lines=3, label="Input Protein sequence (FASTA)")

 from gradio_molecule3d import Molecule3D
 from run_on_seq import run_on_sample_seqs
+from env_consts import RUN_CONFIG_PATH, OUTPUT_PROT_PATH, OUTPUT_LIG_PATH, MODEL_NAME_TO_CKPT
+def predict(input_sequence, input_ligand, input_msa, input_protein, model_variation):
     start_time = time.time()
     # Do inference here
     # return an output pdb file with the protein and ligand with resname LIG or UNK.
     # also return any metrics you want to log, metrics will not be used for evaluation but might be useful for users
+    ckpt_path = MODEL_NAME_TO_CKPT[model_variation]
     metrics = run_on_sample_seqs(input_sequence, input_protein, input_ligand, OUTPUT_PROT_PATH, OUTPUT_LIG_PATH,
+                                 RUN_CONFIG_PATH, ckpt_path)
     end_time = time.time()
     run_time = end_time - start_time
 with gr.Blocks() as app:
     gr.Markdown("DockFormer")
+    model_variation = gr.Dropdown(
+        choices=["DockFormer-Screen", "DockFormer-PDBBind", "DockFormer-PLINDER"],
+        label="Select model variation",
+        value="DockFormer-Screen"  # Default value
+    )
     # gr.Markdown("Title, description, and other information about the model")
     with gr.Row():
         input_sequence = gr.Textbox(lines=3, label="Input Protein sequence (FASTA)")

resources/run_config.json CHANGED Viewed

@@ -16,6 +16,6 @@
         "affinity_cls": {"weight": 0.03},
         "fape_interface": {"weight": 1.0}
       },
-      "globals": {"max_lr": 0.0001}
     }
 }

         "affinity_cls": {"weight": 0.03},
         "fape_interface": {"weight": 1.0}
       },
+      "globals": {"max_lr": 0.0002}
     }
 }

run_on_seq.py CHANGED Viewed

@@ -115,7 +115,7 @@ def create_embeded_molecule(ref_mol: Chem.Mol, smiles: str):
 def run_on_sample_seqs(seq_protein: str, template_protein_path: str, smiles: str, output_prot_path: str,
-                       output_lig_path: str, run_config_path: str):
     temp_dir = tempfile.TemporaryDirectory()
     temp_dir_path = temp_dir.name
     metrics = {}
@@ -132,7 +132,7 @@ def run_on_sample_seqs(seq_protein: str, template_protein_path: str, smiles: str
     json.dump(json_data, open(f"{tmp_json_folder}/input.json", "w"))
     tmp_output_folder = f"{temp_dir_path}/output"
-    run_on_folder(tmp_json_folder, tmp_output_folder, run_config_path, skip_relaxation=True,
                   long_sequence_inference=False, skip_exists=False)
     predicted_protein_path = tmp_output_folder + "/predictions/input_predicted_protein.pdb"
     predicted_ligand_path = tmp_output_folder + "/predictions/input_predicted_ligand_0.sdf"

 def run_on_sample_seqs(seq_protein: str, template_protein_path: str, smiles: str, output_prot_path: str,
+                       output_lig_path: str, run_config_path: str, ckpt_path: str):
     temp_dir = tempfile.TemporaryDirectory()
     temp_dir_path = temp_dir.name
     metrics = {}
     json.dump(json_data, open(f"{tmp_json_folder}/input.json", "w"))
     tmp_output_folder = f"{temp_dir_path}/output"
+    run_on_folder(tmp_json_folder, tmp_output_folder, run_config_path, ckpt_path, skip_relaxation=True,
                   long_sequence_inference=False, skip_exists=False)
     predicted_protein_path = tmp_output_folder + "/predictions/input_predicted_protein.pdb"
     predicted_ligand_path = tmp_output_folder + "/predictions/input_predicted_ligand_0.sdf"

run_pretrained_model.py CHANGED Viewed

@@ -14,7 +14,7 @@
 # limitations under the License.
 import sys
-from env_consts import TEST_INPUT_DIR, TEST_OUTPUT_DIR, CKPT_PATH
 import json
 import logging
 import numpy as np
@@ -59,7 +59,7 @@ def override_config(base_config, overriding_config):
     return base_config
-def run_on_folder(input_dir: str, output_dir: str, run_config_path: str, skip_relaxation=True,
                   long_sequence_inference=False, skip_exists=False):
     config_preset = "initial_training"
     save_outputs = False
@@ -67,9 +67,7 @@ def run_on_folder(input_dir: str, output_dir: str, run_config_path: str, skip_re
     run_config = json.load(open(run_config_path))
-    ckpt_path = CKPT_PATH
-    if ckpt_path is None:
-        ckpt_path = get_latest_checkpoint(os.path.join(run_config["train_output_dir"], "checkpoint"))
     print("Using checkpoint: ", ckpt_path)
     config = model_config(config_preset, long_sequence_inference=long_sequence_inference)
@@ -115,17 +113,29 @@ def run_on_folder(input_dir: str, output_dir: str, run_config_path: str, skip_re
                                 dim=-1).item()
         affinity_cls = torch.sum(torch.softmax(torch.tensor(out["affinity_cls_logits"]), -1) * torch.linspace(0, 15, 32),
                                 dim=-1).item()
         affinity_2d_max = torch.linspace(0, 15, 32)[torch.argmax(torch.tensor(out["affinity_2d_logits"]))].item()
         affinity_1d_max = torch.linspace(0, 15, 32)[torch.argmax(torch.tensor(out["affinity_1d_logits"]))].item()
         affinity_cls_max = torch.linspace(0, 15, 32)[torch.argmax(torch.tensor(out["affinity_cls_logits"]))].item()
         print("Affinity: ", affinity_2d, affinity_cls, affinity_1d)
         with open(affinity_output_path, "w") as f:
             json.dump({"affinity_2d": affinity_2d, "affinity_1d": affinity_1d, "affinity_cls": affinity_cls,
                        "affinity_2d_max": affinity_2d_max, "affinity_1d_max": affinity_1d_max,
-                       "affinity_cls_max": affinity_cls_max}, f)
         # binding_site = torch.sigmoid(torch.tensor(out["binding_site_logits"])) * 100
         # binding_site = binding_site[:processed_feature_dict["aatype"].shape[1]].flatten()
@@ -135,9 +145,6 @@ def run_on_folder(input_dir: str, output_dir: str, run_config_path: str, skip_re
         ligand_output_path = os.path.join(output_directory, f"{output_name}_ligand_{{i}}.sdf")
-        protein_mask = processed_feature_dict["protein_mask"][0].astype(bool)
-        ligand_mask = processed_feature_dict["ligand_mask"][0].astype(bool)
         save_output_structure(
             aatype=processed_feature_dict["aatype"][0][protein_mask],
             residue_index=processed_feature_dict["in_chain_residue_index"][0],

 # limitations under the License.
 import sys
+from env_consts import TEST_INPUT_DIR, TEST_OUTPUT_DIR
 import json
 import logging
 import numpy as np
     return base_config
+def run_on_folder(input_dir: str, output_dir: str, run_config_path: str, ckpt_path: str, skip_relaxation=True,
                   long_sequence_inference=False, skip_exists=False):
     config_preset = "initial_training"
     save_outputs = False
     run_config = json.load(open(run_config_path))
+    ckpt_path = os.path.abspath(ckpt_path)
     print("Using checkpoint: ", ckpt_path)
     config = model_config(config_preset, long_sequence_inference=long_sequence_inference)
                                 dim=-1).item()
         affinity_cls = torch.sum(torch.softmax(torch.tensor(out["affinity_cls_logits"]), -1) * torch.linspace(0, 15, 32),
                                 dim=-1).item()
+        affinity_cls_reg = torch.tensor(out["affinity_cls_reg_logits"]).item()
         affinity_2d_max = torch.linspace(0, 15, 32)[torch.argmax(torch.tensor(out["affinity_2d_logits"]))].item()
         affinity_1d_max = torch.linspace(0, 15, 32)[torch.argmax(torch.tensor(out["affinity_1d_logits"]))].item()
         affinity_cls_max = torch.linspace(0, 15, 32)[torch.argmax(torch.tensor(out["affinity_cls_logits"]))].item()
+        protein_mask = processed_feature_dict["protein_mask"][0].astype(bool)
+        ligand_mask = processed_feature_dict["ligand_mask"][0].astype(bool)
+        protein_length = protein_mask.sum()
+        ligand_length = ligand_mask.sum()
+        predicted_inter_contacts_logits = torch.tensor(out["inter_contact_logits"][0][:protein_length,
+                                                       protein_length:protein_length+ligand_length, :])
+        top_100_inter_contacts = torch.topk(predicted_inter_contacts_logits.flatten(), 100).indices
+        inter_contacts_indices = [[int(i // ligand_length), int(i % ligand_length)] for i in top_100_inter_contacts]
         print("Affinity: ", affinity_2d, affinity_cls, affinity_1d)
         with open(affinity_output_path, "w") as f:
             json.dump({"affinity_2d": affinity_2d, "affinity_1d": affinity_1d, "affinity_cls": affinity_cls,
                        "affinity_2d_max": affinity_2d_max, "affinity_1d_max": affinity_1d_max,
+                       "affinity_cls_max": affinity_cls_max, "affinity_cls_reg": affinity_cls_reg,
+                       "inter_contacts": inter_contacts_indices}, f)
         # binding_site = torch.sigmoid(torch.tensor(out["binding_site_logits"])) * 100
         # binding_site = binding_site[:processed_feature_dict["aatype"].shape[1]].flatten()
         ligand_output_path = os.path.join(output_directory, f"{output_name}_ligand_{{i}}.sdf")
         save_output_structure(
             aatype=processed_feature_dict["aatype"][0][protein_mask],
             residue_index=processed_feature_dict["in_chain_residue_index"][0],