iitm_bisect_pinder_submission

Sleeping

App Files Files Community

Sukanyaaa commited on Nov 8, 2024

Commit

c618657

1 Parent(s): 5d7c203

Initial commit

Browse files

Files changed (4) hide show

EquiMPNN-epoch=413-val_loss=9.25-val_acc=0.00.ckpt +3 -0
inference_app.py +827 -6
lightning_logs/version_0/hparams.yaml +5 -0
requirements.txt +216 -2

EquiMPNN-epoch=413-val_loss=9.25-val_acc=0.00.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0196c51cf2e21a93906785c5ec4f3aef72d85b34908825b9c70cf29cc35d4fca
+size 556424

inference_app.py CHANGED Viewed

@@ -1,29 +1,849 @@
 import time
 import json
 import gradio as gr
 from gradio_molecule3d import Molecule3D
 def predict (input_seq_1, input_msa_1, input_protein_1, input_seq_2,input_msa_2,  input_protein_2):
     start_time = time.time()
-    # Do inference here
     # return an output pdb file with the protein and two chains A and B.
     # also return a JSON with any metrics you want to report
     metrics = {"mean_plddt": 80, "binding_affinity": 2}
     end_time = time.time()
     run_time = end_time - start_time
-    return "test_out.pdb",json.dumps(metrics), run_time
 with gr.Blocks() as app:
     gr.Markdown("# Template for inference")
-    gr.Markdown("Title, description, and other information about the model")
     with gr.Row():
         with gr.Column():
             input_seq_1 = gr.Textbox(lines=3, label="Input Protein 1 sequence (FASTA)")
@@ -94,3 +914,4 @@ with gr.Blocks() as app:
     btn.click(predict, inputs=[input_seq_1, input_msa_1, input_protein_1, input_seq_2, input_msa_2,  input_protein_2], outputs=[out, metrics, run_time])
 app.launch()

+from __future__ import annotations
 import time
 import json
 import gradio as gr
 from gradio_molecule3d import Molecule3D
+import torch
+from pinder.core import get_pinder_location
+get_pinder_location()
+from pytorch_lightning import LightningModule
+import torch
+import lightning.pytorch as pl
+import torch.nn.functional as F
+import torch.nn as nn
+import torchmetrics
+import torch.nn as nn
+import torch.nn.functional as F
+from torch_geometric.nn import MessagePassing
+from torch_geometric.nn import global_mean_pool
+from torch.nn import Sequential, Linear, BatchNorm1d, ReLU
+from torch_scatter import scatter
+from torch.nn import Module
+import pinder.core as pinder
+pinder.__version__
+from torch_geometric.loader import DataLoader
+from pinder.core.loader.dataset import get_geo_loader
+from pinder.core import download_dataset
+from pinder.core import get_index
+from pinder.core import get_metadata
+from pathlib import Path
+import pandas as pd
+from pinder.core import PinderSystem
+import torch
+from pinder.core.loader.dataset import PPIDataset
+from pinder.core.loader.geodata import NodeRepresentation
+import pickle
+from pinder.core import get_index, PinderSystem
+from torch_geometric.data import HeteroData
+import os
+from enum import Enum
+import numpy as np
+import torch
+import lightning.pytorch as pl
+from numpy.typing import NDArray
+from torch_geometric.data import HeteroData
+from pinder.core.index.system import PinderSystem
+from pinder.core.loader.structure import Structure
+from pinder.core.utils import constants as pc
+from pinder.core.utils.log import setup_logger
+from pinder.core.index.system import _align_monomers_with_mask
+from pinder.core.loader.structure import Structure
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch_geometric.nn import MessagePassing
+from torch_geometric.nn import global_mean_pool
+from torch.nn import Sequential, Linear, BatchNorm1d, ReLU
+from torch_scatter import scatter
+from torch.nn import Module
+import time
+from torch_geometric.nn import global_max_pool
+import copy
+import inspect
+import warnings
+from typing import Optional, Tuple, Union
+import torch
+from torch import Tensor
+from torch_geometric.data import Data, Dataset, HeteroData
+from torch_geometric.data.feature_store import FeatureStore
+from torch_geometric.data.graph_store import GraphStore
+from torch_geometric.loader import (
+    LinkLoader,
+    LinkNeighborLoader,
+    NeighborLoader,
+    NodeLoader,
+)
+from torch_geometric.loader.dataloader import DataLoader
+from torch_geometric.loader.utils import get_edge_label_index, get_input_nodes
+from torch_geometric.sampler import BaseSampler, NeighborSampler
+from torch_geometric.typing import InputEdges, InputNodes
+try:
+    from lightning.pytorch import LightningDataModule as PLLightningDataModule
+    no_pytorch_lightning = False
+except (ImportError, ModuleNotFoundError):
+    PLLightningDataModule = object
+    no_pytorch_lightning = True
+from lightning.pytorch.callbacks import ModelCheckpoint
+from lightning.pytorch.loggers.tensorboard import TensorBoardLogger
+from lightning.pytorch.callbacks.early_stopping import EarlyStopping
+from torch_geometric.data.lightning.datamodule import LightningDataset
+from pytorch_lightning.loggers.wandb import WandbLogger
+def get_system(system_id: str) -> PinderSystem:
+    return PinderSystem(system_id)
+from Bio import PDB
+def extract_coordinates_from_pdb(filename):
+    """
+    Extracts atom coordinates from a PDB file and returns them as a list of tuples.
+    Each tuple contains (x, y, z) coordinates of an atom.
+    """
+    parser = PDB.PDBParser(QUIET=True)
+    structure = parser.get_structure("structure", filename)
+    coordinates = []
+    # Loop through each model, chain, residue, and atom to collect coordinates
+    for model in structure:
+        for chain in model:
+            for residue in chain:
+                # Retrieve atoms and their coordinates
+                for atom in residue:
+                    xyz = atom.coord  # Coordinates are in a numpy array
+                    # Append the coordinates (x, y, z) as a tuple
+                    coordinates.append((xyz[0], xyz[1], xyz[2]))
+    return coordinates
+log = setup_logger(__name__)
+try:
+    from torch_cluster import knn_graph
+    torch_cluster_installed = True
+except ImportError as e:
+    log.warning(
+        "torch-cluster is not installed!"
+        "Please install the appropriate library for your pytorch installation."
+        "See https://github.com/rusty1s/pytorch_cluster/issues/185 for background."
+    )
+    torch_cluster_installed = False
+def structure2tensor(
+    atom_coordinates: NDArray[np.double] | None = None,
+    atom_types: NDArray[np.str_] | None = None,
+    element_types: NDArray[np.str_] | None = None,
+    residue_coordinates: NDArray[np.double] | None = None,
+    residue_ids: NDArray[np.int_] | None = None,
+    residue_types: NDArray[np.str_] | None = None,
+    chain_ids: NDArray[np.str_] | None = None,
+    dtype: torch.dtype = torch.float32,
+) -> dict[str, torch.Tensor]:
+    property_dict = {}
+    if atom_types is not None:
+        unknown_name_idx = max(pc.ALL_ATOM_POSNS.values()) + 1
+        types_array_at = np.zeros((len(atom_types), 1))
+        for i, name in enumerate(atom_types):
+            types_array_at[i] = pc.ALL_ATOM_POSNS.get(name, unknown_name_idx)
+        property_dict["atom_types"] = torch.tensor(types_array_at).type(dtype)
+    if element_types is not None:
+        types_array_ele = np.zeros((len(element_types), 1))
+        for i, name in enumerate(element_types):
+            types_array_ele[i] = pc.ELE2NUM.get(name, pc.ELE2NUM["other"])
+        property_dict["element_types"] = torch.tensor(types_array_ele).type(dtype)
+    if residue_types is not None:
+        unknown_name_idx = max(pc.AA_TO_INDEX.values()) + 1
+        types_array_res = np.zeros((len(residue_types), 1))
+        for i, name in enumerate(residue_types):
+            types_array_res[i] = pc.AA_TO_INDEX.get(name, unknown_name_idx)
+        property_dict["residue_types"] = torch.tensor(types_array_res).type(dtype)
+    if atom_coordinates is not None:
+        property_dict["atom_coordinates"] = torch.tensor(atom_coordinates, dtype=dtype)
+    if residue_coordinates is not None:
+        property_dict["residue_coordinates"] = torch.tensor(
+            residue_coordinates, dtype=dtype
+        )
+    if residue_ids is not None:
+        property_dict["residue_ids"] = torch.tensor(residue_ids, dtype=dtype)
+    if chain_ids is not None:
+        property_dict["chain_ids"] = torch.zeros(len(chain_ids), dtype=dtype)
+        property_dict["chain_ids"][chain_ids == "L"] = 1
+    return property_dict
+class NodeRepresentation(Enum):
+    Surface = "surface"
+    Atom = "atom"
+    Residue = "residue"
+class PairedPDB(HeteroData):  # type: ignore
+    @classmethod
+    def from_tuple_system(
+        cls,
+        tupal: tuple = (Structure , Structure , Structure),
+        add_edges: bool = True,
+        k: int = 10,
+    ) -> PairedPDB:
+        return cls.from_structure_pair(
+            holo=tupal[0],
+            apo=tupal[1],
+            add_edges=add_edges,
+            k=k,
+        )
+    @classmethod
+    def from_structure_pair(
+        cls,
+        holo: Structure,
+        apo: Structure,
+        add_edges: bool = True,
+        k: int = 10,
+    ) -> PairedPDB:
+        graph = cls()
+        holo_calpha = holo.filter("atom_name", mask=["CA"])
+        apo_calpha = apo.filter("atom_name", mask=["CA"])
+        r_h = (holo.dataframe['chain_id'] == 'R').sum()
+        r_a = (apo.dataframe['chain_id'] == 'R').sum()
+        holo_r_props = structure2tensor(
+            atom_coordinates=holo.coords[:r_h],
+            atom_types=holo.atom_array.atom_name[:r_h],
+            element_types=holo.atom_array.element[:r_h],
+            residue_coordinates=holo_calpha.coords[:r_h],
+            residue_types=holo_calpha.atom_array.res_name[:r_h],
+            residue_ids=holo_calpha.atom_array.res_id[:r_h],
+        )
+        holo_l_props = structure2tensor(
+            atom_coordinates=holo.coords[r_h:],
+            atom_types=holo.atom_array.atom_name[r_h:],
+            element_types=holo.atom_array.element[r_h:],
+            residue_coordinates=holo_calpha.coords[r_h:],
+            residue_types=holo_calpha.atom_array.res_name[r_h:],
+            residue_ids=holo_calpha.atom_array.res_id[r_h:],
+        )
+        apo_r_props = structure2tensor(
+            atom_coordinates=apo.coords[:r_a],
+            atom_types=apo.atom_array.atom_name[:r_a],
+            element_types=apo.atom_array.element[:r_a],
+            residue_coordinates=apo_calpha.coords[:r_a],
+            residue_types=apo_calpha.atom_array.res_name[:r_a],
+            residue_ids=apo_calpha.atom_array.res_id[:r_a],
+        )
+        apo_l_props = structure2tensor(
+            atom_coordinates=apo.coords[r_a:],
+            atom_types=apo.atom_array.atom_name[r_a:],
+            element_types=apo.atom_array.element[r_a:],
+            residue_coordinates=apo_calpha.coords[r_a:],
+            residue_types=apo_calpha.atom_array.res_name[r_a:],
+            residue_ids=apo_calpha.atom_array.res_id[r_a:],
+        )
+        graph["ligand"].x = apo_l_props["atom_types"]
+        graph["ligand"].pos = apo_l_props["atom_coordinates"]
+        graph["receptor"].x = apo_r_props["atom_types"]
+        graph["receptor"].pos = apo_r_props["atom_coordinates"]
+        graph["ligand"].y = holo_l_props["atom_coordinates"]
+        # graph["ligand"].pos = holo_l_props["atom_coordinates"]
+        graph["receptor"].y = holo_r_props["atom_coordinates"]
+        # graph["receptor"].pos = holo_r_props["atom_coordinates"]
+        if add_edges and torch_cluster_installed:
+                graph["ligand"].edge_index = knn_graph(
+                    graph["ligand"].pos, k=k
+                )
+                graph["receptor"].edge_index = knn_graph(
+                    graph["receptor"].pos, k=k
+                )
+                # graph["ligand"].edge_index = knn_graph(
+                #     graph["ligand"].pos, k=k
+                # )
+                # graph["receptor"].edge_index = knn_graph(
+                #     graph["receptor"].pos, k=k
+                # )
+        return graph
+def create_graph(pdb1, pdb2, pdb3='/home/sukanya/iitm_bisect_pinder_submission/test_out.pdb', k=5):
+    """
+    Create a heterogeneous graph from two PDB files, with the ligand and receptor
+    as separate nodes, and their respective features and edges.
+    Args:
+        pdb1 (str): PDB file path for ligand.
+        pdb2 (str): PDB file path for receptor.
+        coords3 (list): List of coordinates used for `y` values (e.g., binding affinity, etc.).
+        k (int): Number of nearest neighbors for constructing the knn graph.
+    Returns:
+        HeteroData: A PyG HeteroData object containing ligand and receptor data.
+    """
+    # Extract coordinates from PDB files
+    coords1 = torch.tensor(extract_coordinates_from_pdb(pdb1),dtype=torch.float)
+    coords2 = torch.tensor(extract_coordinates_from_pdb(pdb2),dtype=torch.float)
+    coords3 = torch.tensor(extract_coordinates_from_pdb(pdb3),dtype=torch.float)
+    # Create the HeteroData object
+    data = HeteroData()
+    # Define ligand node features
+    data["ligand"].x = torch.tensor(coords1, dtype=torch.float)
+    data["ligand"].pos = coords1
+    data["ligand"].y = torch.tensor(coords3[:len(coords1)], dtype=torch.float)
+    # Define receptor node features
+    data["receptor"].x = torch.tensor(coords2, dtype=torch.float)
+    data["receptor"].pos = coords2
+    data["receptor"].y = torch.tensor(coords3[len(coords1):], dtype=torch.float)
+    # Construct k-NN graph for ligand
+    ligand_edge_index = knn_graph(data["ligand"].pos, k=k)
+    data["ligand"].edge_index = ligand_edge_index
+    # Construct k-NN graph for receptor
+    receptor_edge_index = knn_graph(data["receptor"].pos, k=k)
+    data["receptor"].edge_index = receptor_edge_index
+    # Convert edge index to SparseTensor for ligand
+    data["ligand", "ligand"].edge_index = ligand_edge_index
+    # Convert edge index to SparseTensor for receptor
+    data["receptor", "receptor"].edge_index = receptor_edge_index
+    return data
+def tensor_to_pdb(tensor, pdb_filename="test_out.pdb", chain_id="L"):
+    """
+    Convert a tensor of coordinates to PDB format, handling an extra dimension if present.
+    Args:
+        tensor (torch.Tensor): Tensor of shape (1, N, 3) or (N, 3), where each entry is
+                               (x, y, z) coordinates for atoms.
+        pdb_filename (str): Output filename for the PDB file.
+        chain_id (str): Chain identifier for the PDB structure.
+    """
+    # Remove the first dimension if it’s 1 (e.g., shape is (1, N, 3))
+    if tensor.dim() == 3 and tensor.size(0) == 1:
+        tensor = tensor.squeeze(0)
+    # Open the PDB file for writing
+    with open(pdb_filename, 'w') as pdb_file:
+        pdb_file.write("REMARK   Generated by tensor_to_pdb function\n")
+        # Iterate over each atom in the tensor
+        for atom_idx, (x, y, z) in enumerate(tensor):
+            pdb_line = (
+                f"ATOM  {atom_idx + 1:5d}  C   LIG {chain_id} {atom_idx + 1:4d}    "
+                f"{x.item():8.3f}{y.item():8.3f}{z.item():8.3f}  1.00  0.00           C\n"
+            )
+            pdb_file.write(pdb_line)
+        pdb_file.write("END\n")
+class MPNNLayer(MessagePassing):
+    def __init__(self, emb_dim=64, edge_dim=4, aggr='add'):
+        """Message Passing Neural Network Layer
+        Args:
+            emb_dim: (int) - hidden dimension d
+            edge_dim: (int) - edge feature dimension d_e
+            aggr: (str) - aggregation function \oplus (sum/mean/max)
+        """
+        # Set the aggregation function
+        super().__init__(aggr=aggr)
+        self.emb_dim = emb_dim
+        self.edge_dim = edge_dim
+        # MLP \psi for computing messages m_ij
+        # Implemented as a stack of Linear->BN->ReLU->Linear->BN->ReLU
+        # dims: (2d + d_e) -> d
+        self.mlp_msg = Sequential(
+            Linear(2*emb_dim + edge_dim, emb_dim), BatchNorm1d(emb_dim), ReLU(),
+            Linear(emb_dim, emb_dim), BatchNorm1d(emb_dim), ReLU()
+          )
+        # MLP \phi for computing updated node features h_i^{l+1}
+        # Implemented as a stack of Linear->BN->ReLU->Linear->BN->ReLU
+        # dims: 2d -> d
+        self.mlp_upd = Sequential(
+            Linear(2*emb_dim, emb_dim), BatchNorm1d(emb_dim), ReLU(),
+            Linear(emb_dim, emb_dim), BatchNorm1d(emb_dim), ReLU()
+          )
+    def forward(self, h, edge_index, edge_attr):
+        """
+        The forward pass updates node features h via one round of message passing.
+        As our MPNNLayer class inherits from the PyG MessagePassing parent class,
+        we simply need to call the propagate() function which starts the
+        message passing procedure: message() -> aggregate() -> update().
+        The MessagePassing class handles most of the logic for the implementation.
+        To build custom GNNs, we only need to define our own message(),
+        aggregate(), and update() functions (defined subsequently).
+        Args:
+            h: (n, d) - initial node features
+            edge_index: (e, 2) - pairs of edges (i, j)
+            edge_attr: (e, d_e) - edge features
+        Returns:
+            out: (n, d) - updated node features
+        """
+        out = self.propagate(edge_index, h=h, edge_attr=edge_attr)
+        return out
+    def message(self, h_i, h_j, edge_attr):
+        """Step (1) Message
+        The message() function constructs messages from source nodes j
+        to destination nodes i for each edge (i, j) in edge_index.
+        The arguments can be a bit tricky to understand: message() can take
+        any arguments that were initially passed to propagate. Additionally,
+        we can differentiate destination nodes and source nodes by appending
+        _i or _j to the variable name, e.g. for the node features h, we
+        can use h_i and h_j.
+        This part is critical to understand as the message() function
+        constructs messages for each edge in the graph. The indexing of the
+        original node features h (or other node variables) is handled under
+        the hood by PyG.
+        Args:
+            h_i: (e, d) - destination node features
+            h_j: (e, d) - source node features
+            edge_attr: (e, d_e) - edge features
+        Returns:
+            msg: (e, d) - messages m_ij passed through MLP \psi
+        """
+        msg = torch.cat([h_i, h_j, edge_attr], dim=-1)
+        return self.mlp_msg(msg)
+    def aggregate(self, inputs, index):
+        """Step (2) Aggregate
+        The aggregate function aggregates the messages from neighboring nodes,
+        according to the chosen aggregation function ('sum' by default).
+        Args:
+            inputs: (e, d) - messages m_ij from destination to source nodes
+            index: (e, 1) - list of source nodes for each edge/message in input
+        Returns:
+            aggr_out: (n, d) - aggregated messages m_i
+        """
+        return scatter(inputs, index, dim=self.node_dim, reduce=self.aggr)
+    def update(self, aggr_out, h):
+        """
+        Step (3) Update
+        The update() function computes the final node features by combining the
+        aggregated messages with the initial node features.
+        update() takes the first argument aggr_out, the result of aggregate(),
+        as well as any optional arguments that were initially passed to
+        propagate(). E.g. in this case, we additionally pass h.
+        Args:
+            aggr_out: (n, d) - aggregated messages m_i
+            h: (n, d) - initial node features
+        Returns:
+            upd_out: (n, d) - updated node features passed through MLP \phi
+        """
+        upd_out = torch.cat([h, aggr_out], dim=-1)
+        return self.mlp_upd(upd_out)
+    def __repr__(self) -> str:
+        return (f'{self.__class__.__name__}(emb_dim={self.emb_dim}, aggr={self.aggr})')
+class MPNNModel(Module):
+    def __init__(self, num_layers=4, emb_dim=64, in_dim=11, edge_dim=4, out_dim=1):
+        """Message Passing Neural Network model for graph property prediction
+        Args:
+            num_layers: (int) - number of message passing layers L
+            emb_dim: (int) - hidden dimension d
+            in_dim: (int) - initial node feature dimension d_n
+            edge_dim: (int) - edge feature dimension d_e
+            out_dim: (int) - output dimension (fixed to 1)
+        """
+        super().__init__()
+        # Linear projection for initial node features
+        # dim: d_n -> d
+        self.lin_in = Linear(in_dim, emb_dim)
+        # Stack of MPNN layers
+        self.convs = torch.nn.ModuleList()
+        for layer in range(num_layers):
+            self.convs.append(MPNNLayer(emb_dim, edge_dim, aggr='add'))
+        # Global pooling/readout function R (mean pooling)
+        # PyG handles the underlying logic via global_mean_pool()
+        self.pool = global_mean_pool
+        # Linear prediction head
+        # dim: d -> out_dim
+        self.lin_pred = Linear(emb_dim, out_dim)
+    def forward(self, data):
+        """
+        Args:
+            data: (PyG.Data) - batch of PyG graphs
+        Returns:
+            out: (batch_size, out_dim) - prediction for each graph
+        """
+        h = self.lin_in(data.x) # (n, d_n) -> (n, d)
+        for conv in self.convs:
+            h = h + conv(h, data.edge_index, data.edge_attr) # (n, d) -> (n, d)
+            # Note that we add a residual connection after each MPNN layer
+        h_graph = self.pool(h, data.batch) # (n, d) -> (batch_size, d)
+        out = self.lin_pred(h_graph) # (batch_size, d) -> (batch_size, 1)
+        return out.view(-1)
+class EquivariantMPNNLayer(MessagePassing):
+    def __init__(self, emb_dim=64,  aggr='add'):
+        """Message Passing Neural Network Layer
+        This layer is equivariant to 3D rotations and translations.
+        Args:
+            emb_dim: (int) - hidden dimension d
+            edge_dim: (int) - edge feature dimension d_e
+            aggr: (str) - aggregation function \oplus (sum/mean/max)
+        """
+        # Set the aggregation function
+        super().__init__(aggr=aggr)
+        self.emb_dim = emb_dim
+        #
+        self.mlp_msg =  Sequential(
+                  Linear(2 * emb_dim  + 1, emb_dim),
+                  BatchNorm1d(emb_dim),
+                  ReLU(),
+                  Linear(emb_dim, emb_dim),
+                   BatchNorm1d(emb_dim),
+                   ReLU()
+                    )
+        self.mlp_pos = Sequential(
+                 Linear(emb_dim, emb_dim),
+                 BatchNorm1d(emb_dim),
+                 ReLU(),
+                 Linear(emb_dim,1)
+        ) # MLP \psi
+        self.mlp_upd = Sequential(
+                       Linear(2*emb_dim, emb_dim), BatchNorm1d(emb_dim), ReLU(), Linear(emb_dim,emb_dim), BatchNorm1d(emb_dim), ReLU()
+        )  # MLP \phi
+        # ===========================================
+    def forward(self, h, pos, edge_index):
+        """
+        The forward pass updates node features h via one round of message passing.
+        Args:
+            h: (n, d) - initial node features
+            pos: (n, 3) - initial node coordinates
+            edge_index: (e, 2) - pairs of edges (i, j)
+            edge_attr: (e, d_e) - edge features
+        Returns:
+            out: [(n, d),(n,3)] - updated node features
+        """
+        #
+        out = self.propagate(edge_index=edge_index, h=h, pos=pos)
+        return out
+        # ==========================================
+    #
+    def message(self, h_i,h_j,pos_i,pos_j):
+        # Compute distance between nodes i and j (Euclidean distance)
+        #distance_ij = torch.norm(pos_i - pos_j, dim=-1, keepdim=True)  # (e, 1)
+        pos_diff = pos_i - pos_j
+        dists = torch.norm(pos_diff,dim=-1).unsqueeze(1)
+        # Concatenate node features, edge features, and distance
+        msg = torch.cat([h_i , h_j, dists], dim=-1)
+        msg = self.mlp_msg(msg)
+        pos_diff = pos_diff * self.mlp_pos(msg)  # (e, 2d + d_e + 1)
+  # (e, d)
+        return msg , pos_diff
+    #   ...
+    #
+    def aggregate(self, inputs, index):
+        """The aggregate function aggregates the messages from neighboring nodes,
+        according to the chosen aggregation function ('sum' by default).
+        Args:
+            inputs: (e, d) - messages m_ij from destination to source nodes
+            index: (e, 1) - list of source nodes for each edge/message in input
+        Returns:
+            aggr_out: (n, d) - aggregated messages m_i
+        """
+        msgs , pos_diffs = inputs
+        msg_aggr = scatter(msgs, index , dim = self.node_dim , reduce = self.aggr)
+        pos_aggr = scatter(pos_diffs, index, dim = self.node_dim , reduce = "mean")
+        return msg_aggr , pos_aggr
+    def update(self, aggr_out, h , pos):
+        msg_aggr , pos_aggr = aggr_out
+        upd_out = self.mlp_upd(torch.cat((h, msg_aggr), dim=-1))
+        upd_pos = pos + pos_aggr
+        return upd_out , upd_pos
+    def __repr__(self) -> str:
+        return (f'{self.__class__.__name__}(emb_dim={self.emb_dim}, aggr={self.aggr})')
+class FinalMPNNModel(MPNNModel):
+    def __init__(self, num_layers=4, emb_dim=64, in_dim=3,  num_heads = 2):
+        """Message Passing Neural Network model for graph property prediction
+        This model uses both node features and coordinates as inputs, and
+        is invariant to 3D rotations and translations (the constituent MPNN layers
+        are equivariant to 3D rotations and translations).
+        Args:
+            num_layers: (int) - number of message passing layers L
+            emb_dim: (int) - hidden dimension d
+            in_dim: (int) - initial node feature dimension d_n
+            edge_dim: (int) - edge feature dimension d_e
+            out_dim: (int) - output dimension (fixed to 1)
+        """
+        super().__init__()
+        # Linear projection for initial node features
+        # dim: d_n -> d
+        self.lin_in = Linear(in_dim, emb_dim)
+        self.equiv_layer = EquivariantMPNNLayer(emb_dim=emb_dim)
+        # Stack of MPNN layers
+        self.convs = torch.nn.ModuleList()
+        for layer in range(num_layers):
+            self.convs.append(EquivariantMPNNLayer(emb_dim, aggr='add'))
+        self.cross_attention = nn.MultiheadAttention(emb_dim, num_heads, batch_first=True)
+        self.fc_rotation = nn.Linear(emb_dim, 9)
+        self.fc_translation = nn.Linear(emb_dim, 3)
+        # Global pooling/readout function R (mean pooling)
+        # PyG handles the underlying logic via global_mean_pool()
+        # self.pool = global_mean_pool
+    def naive_single(self, receptor, ligand , receptor_edge_index , ligand_edge_index):
+        """
+        Processes a single receptor-ligand pair.
+        Args:
+            receptor: Tensor of shape (1, num_receptor_atoms, 3) (receptor coordinates)
+            ligand: Tensor of shape (1, num_ligand_atoms, 3) (ligand coordinates)
+        Returns:
+            rotation_matrix: Tensor of shape (1, 3, 3) predicted rotation matrix for the ligand.
+            translation_vector: Tensor of shape (1, 3) predicted translation vector for the ligand.
+        """
+        # h_receptor = receptor  # Initial node features for the receptor
+        # h_ligand = ligand
+        h_receptor = self.lin_in(receptor)
+        h_ligand = self.lin_in(ligand)      # Initial node features for the ligand
+        pos_receptor = receptor  # Initial positions
+        pos_ligand = ligand
+        for layer in self.convs:
+        # Apply the equivariant message-passing layer for both receptor and ligand
+            h_receptor, pos_receptor = layer(h_receptor, pos_receptor,receptor_edge_index  )
+            h_ligand, pos_ligand = layer(h_ligand, pos_ligand, ligand_edge_index)
+            # print("Shape of h_receptor:", h_receptor.shape)
+            # print("Shape of h_ligand:", h_ligand.shape)
+        # Pass the layer outputs through MLPs for embeddings
+            emb_features_receptor = h_receptor
+            emb_features_ligand = h_ligand
+        attn_output, _ = self.cross_attention(emb_features_receptor, emb_features_ligand, emb_features_ligand)
+        rotation_matrix = self.fc_rotation(attn_output.mean(dim=0))
+        rotation_matrix = rotation_matrix.view(-1, 3, 3)
+        translation_vector = self.fc_translation(attn_output.mean(dim=0))
+        return rotation_matrix, translation_vector
+    def forward(self, data):
+        """
+        The main forward pass of the model.
+        Args:
+            batch: Same as in forward_rot_trans.
+        Returns:
+            transformed_ligands: List of tensors, each of shape (1, num_ligand_atoms, 3)
+            representing the transformed ligand coordinates after applying the predicted
+            rotation and translation.
+        """
+        receptor = data['receptor']['pos']
+        ligand = data['ligand']['pos']
+        receptor_edge_index = data['receptor']['edge_index']
+        ligand_edge_index = data['ligand']['edge_index']
+        rotation_matrix, translation_vector = self.naive_single(receptor, ligand,receptor_edge_index , ligand_edge_index)
+        # for i in range(len(ligands)):
+        #     ligands[i] = ligands[i] @ rotation_matrix[i] + translation_vector[i]
+        ligands = data['ligand']['pos'] @ rotation_matrix + translation_vector
+        return ligands
+class FinalMPNNModelight(pl.LightningModule):
+    def __init__(self, num_layers=4, emb_dim=32, in_dim=3, num_heads=1, lr=1e-4):
+        super().__init__()
+        self.lin_in = nn.Linear(in_dim, emb_dim)
+        self.convs = nn.ModuleList([EquivariantMPNNLayer(emb_dim, aggr='add') for _ in range(num_layers)])
+        self.cross_attention = nn.MultiheadAttention(emb_dim, num_heads, batch_first=True)
+        self.fc_rotation = nn.Linear(emb_dim, 9)
+        self.fc_translation = nn.Linear(emb_dim, 3)
+        self.lr = lr
+    def naive_single(self, receptor, ligand, receptor_edge_index, ligand_edge_index):
+        h_receptor = self.lin_in(receptor)
+        h_ligand = self.lin_in(ligand)
+        pos_receptor, pos_ligand = receptor, ligand
+        for layer in self.convs:
+            h_receptor, pos_receptor = layer(h_receptor, pos_receptor, receptor_edge_index)
+            h_ligand, pos_ligand = layer(h_ligand, pos_ligand, ligand_edge_index)
+        attn_output, _ = self.cross_attention(h_receptor, h_ligand, h_ligand)
+        rotation_matrix = self.fc_rotation(attn_output.mean(dim=0)).view(-1, 3, 3)
+        translation_vector = self.fc_translation(attn_output.mean(dim=0))
+        return rotation_matrix, translation_vector
+    def forward(self, data):
+        device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
+        receptor = data['receptor']['pos'].to(device)
+        ligand = data['ligand']['pos'].to(device)
+        receptor_edge_index = data['receptor', 'receptor']['edge_index'].to(device)
+        ligand_edge_index = data['ligand', 'ligand']['edge_index'].to(device)
+        rotation_matrix, translation_vector = self.naive_single(receptor, ligand, receptor_edge_index, ligand_edge_index)
+        transformed_ligand = torch.matmul(ligand ,rotation_matrix) + translation_vector
+        return transformed_ligand
+    def training_step(self, batch, batch_idx):
+        ligand_pred = self(batch)
+        ligand_true = batch['ligand']['y']
+        loss = F.mse_loss(ligand_pred.squeeze(0), ligand_true)
+        self.log('train_loss', loss, batch_size=8)
+        return loss
+    def validation_step(self, batch, batch_idx):
+        ligand_pred = self(batch)
+        ligand_true = batch['ligand']['y']
+        loss = F.l1_loss(ligand_pred.squeeze(0), ligand_true)
+        self.log('val_loss', loss, prog_bar=True, batch_size=8)
+        return loss
+    def test_step(self, batch, batch_idx):
+        ligand_pred = self(batch)
+        ligand_true = batch['ligand']['y']
+        loss = F.l1_loss(ligand_pred.squeeze(0), ligand_true)
+        self.log('test_loss', loss, prog_bar=True, batch_size=8)
+        return loss
+    def configure_optimizers(self):
+        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
+        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
+            optimizer, mode="min", factor=0.1, patience=5
+        )
+        return {
+            "optimizer": optimizer,
+            "lr_scheduler": {
+                "scheduler": scheduler,
+                "monitor": "val_loss",  # Monitor validation loss to adjust the learning rate
+            },
+        }
+model_path = "/home/sukanya/iitm_bisect_pinder_submission/EquiMPNN-epoch=413-val_loss=9.25-val_acc=0.00.ckpt"
+model = FinalMPNNModelight.load_from_checkpoint(model_path)
+trainer = pl.Trainer(
+   fast_dev_run=False,
+   accelerator="gpu" if torch.cuda.is_available() else "cpu",
+   precision="bf16-mixed",
+   devices=1,
+)
+model.eval()
 def predict (input_seq_1, input_msa_1, input_protein_1, input_seq_2,input_msa_2,  input_protein_2):
     start_time = time.time()
+    data = create_graph(input_protein_1, input_protein_2, '/home/sukanya/iitm_bisect_pinder_submission/test_out.pdb', k=10)
+    with torch.no_grad():
+        output = model(data)
+    file = tensor_to_pdb(output)
     # return an output pdb file with the protein and two chains A and B.
     # also return a JSON with any metrics you want to report
     metrics = {"mean_plddt": 80, "binding_affinity": 2}
     end_time = time.time()
     run_time = end_time - start_time
+    return file,json.dumps(metrics), run_time
 with gr.Blocks() as app:
     gr.Markdown("# Template for inference")
+    gr.Markdown("EquiMPNN MOdel")
     with gr.Row():
         with gr.Column():
             input_seq_1 = gr.Textbox(lines=3, label="Input Protein 1 sequence (FASTA)")
     btn.click(predict, inputs=[input_seq_1, input_msa_1, input_protein_1, input_seq_2, input_msa_2,  input_protein_2], outputs=[out, metrics, run_time])
 app.launch()

lightning_logs/version_0/hparams.yaml ADDED Viewed

	@@ -0,0 +1,5 @@

+emb_dim: 32
+in_dim: 3
+lr: 0.0001
+num_heads: 1
+num_layers: 4

requirements.txt CHANGED Viewed

@@ -1,2 +1,216 @@
-gradio
-gradio_molecule3d

+absl-py==2.1.0
+aiofiles==23.2.aiohappyeyeballs==2.4.3
+aiohttp==3.10.10
+aiosignal==1.3.1
+annotated-types==0.7.0
+anyio==4.6.2.post1
+argon2-cffi==23.1.0
+argon2-cffi-bindings==21.2.0
+arrow==1.3.0
+asttokens==2.4.1
+async-lru==2.0.4
+async-timeout==4.0.3
+attrs==24.2.0
+babel==2.16.0
+beautifulsoup4==4.12.3
+bio==1.7.1
+biopython==1.84
+biothings-client==0.3.1
+biotite==0.41.2
+bleach==6.2.0
+cachetools==5.5.0
+certifi==2024.8.30
+cffi==1.17.1
+charset-normalizer==3.4.0
+click==8.1.7
+comm==0.2.2
+debugpy==1.8.7
+decorator==5.1.1
+defusedxml==0.7.1
+docker-pycreds==0.4.0
+exceptiongroup==1.2.2
+executing==2.1.0
+fastapi==0.115.4
+fastjsonschema==2.20.0
+fastpdb==1.3.1
+ffmpy==0.4.0
+filelock==3.16.1
+fqdn==1.5.1
+frozenlist==1.5.0
+fsspec==2024.10.0
+gcsfs==2024.10.0
+gitdb==4.0.11
+GitPython==3.1.43
+google-api-core==2.22.0
+google-auth==2.35.0
+google-auth-oauthlib==1.2.1
+google-cloud-core==2.4.1
+google-cloud-storage==2.18.2
+google-crc32c==1.6.0
+google-resumable-media==2.7.2
+googleapis-common-protos==1.65.0
+gprofiler-official==1.0.0
+gradio==5.5.0
+gradio_client==1.4.2
+gradio_molecule3d==0.0.6
+grpcio==1.67.1
+h11==0.14.0
+httpcore==1.0.6
+httpx==0.27.2
+huggingface-hub==0.26.2
+idna==3.10
+ipykernel==6.29.5
+ipython==8.29.0
+ipywidgets==8.1.5
+isoduration==20.11.0
+jedi==0.19.1
+Jinja2==3.1.4
+joblib==1.4.2
+json5==0.9.25
+jsonpointer==3.0.0
+jsonschema==4.23.0
+jsonschema-specifications==2024.10.1
+jupyter-events==0.10.0
+jupyter-lsp==2.2.5
+jupyter_client==8.6.3
+jupyter_core==5.7.2
+jupyter_server==2.14.2
+jupyter_server_terminals==0.5.3
+jupyterlab==4.3.0
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.27.3
+jupyterlab_widgets==3.0.13
+lightning==2.4.0
+lightning-utilities==0.11.8
+Markdown==3.7
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib-inline==0.1.7
+mdurl==0.1.2
+mistune==3.0.2
+mpmath==1.3.0
+msgpack==1.1.0
+multidict==6.1.0
+mygene==3.2.2
+nbclient==0.10.0
+nbconvert==7.16.4
+nbformat==5.10.4
+nest-asyncio==1.6.0
+networkx==3.4.2
+notebook_shim==0.2.4
+numpy==1.26.4
+nvidia-cublas-cu12==12.4.5.8
+nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cuda-nvrtc-cu12==12.4.127
+nvidia-cuda-runtime-cu12==12.4.127
+nvidia-cudnn-cu12==9.1.0.70
+nvidia-cufft-cu12==11.2.1.3
+nvidia-curand-cu12==10.3.5.147
+nvidia-cusolver-cu12==11.6.1.9
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-nccl-cu12==2.21.5
+nvidia-nvjitlink-cu12==12.4.127
+nvidia-nvtx-cu12==12.4.127
+oauthlib==3.2.2
+orjson==3.10.11
+overrides==7.7.0
+packaging==24.1
+pandas==2.2.3
+pandocfilters==1.5.1
+parso==0.8.4
+pexpect==4.9.0
+pillow==11.0.0
+pinder==0.4.1
+platformdirs==4.3.6
+plotly==5.24.1
+pooch==1.8.2
+prometheus_client==0.21.0
+prompt_toolkit==3.0.48
+propcache==0.2.0
+proto-plus==1.25.0
+protobuf==5.28.3
+psutil==6.1.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pyarrow==18.0.0
+pyasn1==0.6.1
+pyasn1_modules==0.4.1
+pycparser==2.22
+pydantic==2.9.2
+pydantic_core==2.23.4
+pydub==0.25.1
+pyg-lib==0.4.0+pt24cu124
+Pygments==2.18.0
+pyparsing==3.2.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-json-logger==2.0.7
+python-multipart==0.0.12
+pytorch-lightning==2.4.0
+pytz==2024.2
+PyYAML==6.0.2
+pyzmq==26.2.0
+referencing==0.35.1
+requests==2.32.3
+requests-oauthlib==2.0.0
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rich==13.9.4
+rootutils==1.0.7
+rpds-py==0.20.1
+rsa==4.9
+ruff==0.7.2
+safehttpx==0.1.1
+scikit-learn==1.5.2
+scipy==1.14.1
+semantic-version==2.10.0
+Send2Trash==1.8.3
+sentry-sdk==2.18.0
+setproctitle==1.3.3
+shellingham==1.5.4
+six==1.16.0
+smmap==5.0.1
+sniffio==1.3.1
+soupsieve==2.6
+stack-data==0.6.3
+starlette==0.41.2
+sympy==1.13.1
+tabulate==0.9.0
+tenacity==9.0.0
+tensorboard==2.18.0
+tensorboard-data-server==0.7.2
+tensorboardX==2.6.2.2
+terminado==0.18.1
+threadpoolctl==3.5.0
+tinycss2==1.4.0
+tomli==2.0.2
+tomlkit==0.12.0
+torch==2.5.1
+torch-geometric==2.6.1
+torch_cluster==1.6.3+pt24cu124
+torch_scatter==2.1.2+pt24cu124
+torch_sparse==0.6.18+pt24cu124
+torch_spline_conv==1.2.2+pt24cu124
+torchmetrics==1.5.1
+torchtyping==0.1.5
+tornado==6.4.1
+tqdm==4.66.6
+traitlets==5.14.3
+triton==3.1.0
+typeguard==2.13.3
+typer==0.13.0
+types-python-dateutil==2.9.0.20241003
+typing_extensions==4.12.2
+tzdata==2024.2
+uri-template==1.3.0
+urllib3==2.2.3
+uvicorn==0.32.0
+wandb==0.18.5
+wcwidth==0.2.13
+webcolors==24.8.0
+webencodings==0.5.1
+websocket-client==1.8.0
+websockets==12.0
+Werkzeug==3.1.2
+widgetsnbextension==4.0.13
+yarl==1.17.1