# Copyright (C) 2022, Joao Rodrigues (j.p.g.l.m.rodrigues@gmail.com # Anuj Sharma (anuj.sharma80@gmail.com) # # This file is part of the Biopython distribution and governed by your # choice of the "Biopython License Agreement" or the "BSD 3-Clause License". # Please see the LICENSE file that should have been included as part of this # package. """Structural alignment using Quaternion Characteristic Polynomial (QCP). QCPSuperimposer finds the best rotation and translation to put two point sets on top of each other (minimizing the RMSD). This is eg. useful to superimpose crystal structures. QCP stands for Quaternion Characteristic Polynomial, which is used in the algorithm. Algorithm and original code described in: Theobald DL. Rapid calculation of RMSDs using a quaternion-based characteristic polynomial. Acta Crystallogr A. 2005 Jul;61(Pt 4):478-80. doi: 10.1107/S0108767305015266. Epub 2005 Jun 23. PMID: 15973002. """ import numpy as np from Bio.PDB.PDBExceptions import PDBException def qcp(coords1, coords2, natoms): """Implement the QCP code in Python. Input coordinate arrays must be centered at the origin and have shape Nx3. Variable names match (as much as possible) the C implementation. """ # Original code has coords1 be the mobile. I think it makes more sense # for it to be the reference, so I swapped here. G1 = np.trace(np.dot(coords2, coords2.T)) G2 = np.trace(np.dot(coords1, coords1.T)) A = np.dot(coords2.T, coords1) # referred to as M in the original paper. E0 = (G1 + G2) * 0.5 Sxx, Sxy, Sxz, Syx, Syy, Syz, Szx, Szy, Szz = A.flatten() Sxx2 = Sxx * Sxx Syy2 = Syy * Syy Szz2 = Szz * Szz Sxy2 = Sxy * Sxy Syz2 = Syz * Syz Sxz2 = Sxz * Sxz Syx2 = Syx * Syx Szy2 = Szy * Szy Szx2 = Szx * Szx SyzSzymSyySzz2 = 2.0 * (Syz * Szy - Syy * Szz) Sxx2Syy2Szz2Syz2Szy2 = Syy2 + Szz2 - Sxx2 + Syz2 + Szy2 C2 = -2.0 * (Sxx2 + Syy2 + Szz2 + Sxy2 + Syx2 + Sxz2 + Szx2 + Syz2 + Szy2) C1 = 8.0 * ( Sxx * Syz * Szy + Syy * Szx * Sxz + Szz * Sxy * Syx - Sxx * Syy * Szz - Syz * Szx * Sxy - Szy * Syx * Sxz ) SxzpSzx = Sxz + Szx SyzpSzy = Syz + Szy SxypSyx = Sxy + Syx SyzmSzy = Syz - Szy SxzmSzx = Sxz - Szx SxymSyx = Sxy - Syx SxxpSyy = Sxx + Syy SxxmSyy = Sxx - Syy Sxy2Sxz2Syx2Szx2 = Sxy2 + Sxz2 - Syx2 - Szx2 negSxzpSzx = -SxzpSzx negSxzmSzx = -SxzmSzx negSxymSyx = -SxymSyx SxxpSyy_p_Szz = SxxpSyy + Szz C0 = ( Sxy2Sxz2Syx2Szx2 * Sxy2Sxz2Syx2Szx2 + (Sxx2Syy2Szz2Syz2Szy2 + SyzSzymSyySzz2) * (Sxx2Syy2Szz2Syz2Szy2 - SyzSzymSyySzz2) + (negSxzpSzx * (SyzmSzy) + (SxymSyx) * (SxxmSyy - Szz)) * (negSxzmSzx * (SyzpSzy) + (SxymSyx) * (SxxmSyy + Szz)) + (negSxzpSzx * (SyzpSzy) - (SxypSyx) * (SxxpSyy - Szz)) * (negSxzmSzx * (SyzmSzy) - (SxypSyx) * SxxpSyy_p_Szz) + (+(SxypSyx) * (SyzpSzy) + (SxzpSzx) * (SxxmSyy + Szz)) * (negSxymSyx * (SyzmSzy) + (SxzpSzx) * SxxpSyy_p_Szz) + (+(SxypSyx) * (SyzmSzy) + (SxzmSzx) * (SxxmSyy - Szz)) * (negSxymSyx * (SyzpSzy) + (SxzmSzx) * (SxxpSyy - Szz)) ) # Newton-Rhapson # Original paper mentions 5 iterations are sufficient (on average) # for convergence up to 10^-6 precision but original code writes 50. # I guess for robustness. nr_it = 50 mxEigenV = E0 evalprec = 1e-11 for _ in range(nr_it): oldg = mxEigenV x2 = mxEigenV * mxEigenV b = (x2 + C2) * mxEigenV a = b + C1 delta = (a * mxEigenV + C0) / (2.0 * x2 * mxEigenV + b + a) mxEigenV -= delta if abs(mxEigenV - oldg) < abs(evalprec * mxEigenV): break else: print(f"Newton-Rhapson did not converge after {nr_it} iterations") # The original code has a guard if minScore > 0 and rmsd < minScore, although # the default value of minScore is -1. For simplicity, we ignore that check. rmsd = (2.0 * abs(E0 - mxEigenV) / natoms) ** 0.5 a11 = SxxpSyy + Szz - mxEigenV a12 = SyzmSzy a13 = negSxzmSzx a14 = SxymSyx a21 = SyzmSzy a22 = SxxmSyy - Szz - mxEigenV a23 = SxypSyx a24 = SxzpSzx a31 = a13 a32 = a23 a33 = Syy - Sxx - Szz - mxEigenV a34 = SyzpSzy a41 = a14 a42 = a24 a43 = a34 a44 = Szz - SxxpSyy - mxEigenV a3344_4334 = a33 * a44 - a43 * a34 a3244_4234 = a32 * a44 - a42 * a34 a3243_4233 = a32 * a43 - a42 * a33 a3143_4133 = a31 * a43 - a41 * a33 a3144_4134 = a31 * a44 - a41 * a34 a3142_4132 = a31 * a42 - a41 * a32 q1 = a22 * a3344_4334 - a23 * a3244_4234 + a24 * a3243_4233 q2 = -a21 * a3344_4334 + a23 * a3144_4134 - a24 * a3143_4133 q3 = a21 * a3244_4234 - a22 * a3144_4134 + a24 * a3142_4132 q4 = -a21 * a3243_4233 + a22 * a3143_4133 - a23 * a3142_4132 qsqr = q1 * q1 + q2 * q2 + q3 * q3 + q4 * q4 evecprec = 1e-6 if qsqr < evecprec: q1 = a12 * a3344_4334 - a13 * a3244_4234 + a14 * a3243_4233 q2 = -a11 * a3344_4334 + a13 * a3144_4134 - a14 * a3143_4133 q3 = a11 * a3244_4234 - a12 * a3144_4134 + a14 * a3142_4132 q4 = -a11 * a3243_4233 + a12 * a3143_4133 - a13 * a3142_4132 qsqr = q1 * q1 + q2 * q2 + q3 * q3 + q4 * q4 if qsqr < evecprec: a1324_1423 = a13 * a24 - a14 * a23 a1224_1422 = a12 * a24 - a14 * a22 a1223_1322 = a12 * a23 - a13 * a22 a1124_1421 = a11 * a24 - a14 * a21 a1123_1321 = a11 * a23 - a13 * a21 a1122_1221 = a11 * a22 - a12 * a21 q1 = a42 * a1324_1423 - a43 * a1224_1422 + a44 * a1223_1322 q2 = -a41 * a1324_1423 + a43 * a1124_1421 - a44 * a1123_1321 q3 = a41 * a1224_1422 - a42 * a1124_1421 + a44 * a1122_1221 q4 = -a41 * a1223_1322 + a42 * a1123_1321 - a43 * a1122_1221 qsqr = q1 * q1 + q2 * q2 + q3 * q3 + q4 * q4 if qsqr < evecprec: q1 = a32 * a1324_1423 - a33 * a1224_1422 + a34 * a1223_1322 q2 = -a31 * a1324_1423 + a33 * a1124_1421 - a34 * a1123_1321 q3 = a31 * a1224_1422 - a32 * a1124_1421 + a34 * a1122_1221 q4 = -a31 * a1223_1322 + a32 * a1123_1321 - a33 * a1122_1221 qsqr = q1 * q1 + q2 * q2 + q3 * q3 + q4 * q4 if qsqr < evecprec: rot = np.eye(3) return rmsd, rot, [q1, q2, q3, q4] normq = qsqr**0.5 q1 /= normq q2 /= normq q3 /= normq q4 /= normq a2 = q1 * q1 x2 = q2 * q2 y2 = q3 * q3 z2 = q4 * q4 xy = q2 * q3 az = q1 * q4 zx = q4 * q2 ay = q1 * q3 yz = q3 * q4 ax = q1 * q2 rot = np.zeros((3, 3)) rot[0][0] = a2 + x2 - y2 - z2 rot[0][1] = 2 * (xy + az) rot[0][2] = 2 * (zx - ay) rot[1][0] = 2 * (xy - az) rot[1][1] = a2 - x2 + y2 - z2 rot[1][2] = 2 * (yz + ax) rot[2][0] = 2 * (zx + ay) rot[2][1] = 2 * (yz - ax) rot[2][2] = a2 - x2 - y2 + z2 return rmsd, rot, (q1, q2, q3, q4) class QCPSuperimposer: """Quaternion Characteristic Polynomial (QCP) Superimposer. QCPSuperimposer finds the best rotation and translation to put two point sets on top of each other (minimizing the RMSD). This is eg. useful to superimposing 3D structures of proteins. QCP stands for Quaternion Characteristic Polynomial, which is used in the algorithm. Reference: Douglas L Theobald (2005), "Rapid calculation of RMSDs using a quaternion-based characteristic polynomial.", Acta Crystallogr A 61(4):478-480 """ def __init__(self): """Initialize the class.""" self._reset_properties() # Private methods def _reset_properties(self): """Reset all relevant properties to None to avoid conflicts between runs.""" self.reference_coords = None self.coords = None self.transformed_coords = None self.rot = None self.tran = None self.rms = None self.init_rms = None # Public methods def set_atoms(self, fixed, moving): """Prepare alignment between two atom lists. Put (translate/rotate) the atoms in fixed on the atoms in moving, in such a way that the RMSD is minimized. :param fixed: list of (fixed) atoms :param moving: list of (moving) atoms :type fixed,moving: [L{Atom}, L{Atom},...] """ assert len(fixed) == len(moving), "Fixed and moving atom lists differ in size" # Grab coordinates in double precision fix_coord = np.array([a.get_coord() for a in fixed], dtype=np.float64) mov_coord = np.array([a.get_coord() for a in moving], dtype=np.float64) self.set(fix_coord, mov_coord) self.run() self.rms = self.get_rms() self.rotran = self.get_rotran() def apply(self, atom_list): """Apply the QCP rotation matrix/translation vector to a set of atoms.""" if self.rotran is None: raise PDBException("No transformation has been calculated yet") rot, tran = self.rotran for atom in atom_list: atom.transform(rot, tran) # Low(er) level functions def set(self, reference_coords, coords): """Set the coordinates to be superimposed. coords will be put on top of reference_coords. - reference_coords: an NxDIM array - coords: an NxDIM array DIM is the dimension of the points, N is the number of points to be superimposed. """ self._reset_properties() # store coordinates self.reference_coords = reference_coords self.coords = coords self._natoms, n_dim = coords.shape if reference_coords.shape != coords.shape: raise PDBException("Coordinates must have the same dimensions.") if n_dim != 3: raise PDBException("Coordinates must be Nx3 arrays.") def run(self): """Superimpose the coordinate sets.""" if self.coords is None or self.reference_coords is None: raise PDBException("No coordinates set.") coords = self.coords.copy() coords_ref = self.reference_coords.copy() # Center Coordinates com1 = np.mean(coords, axis=0) com2 = np.mean(coords_ref, axis=0) coords -= com1 coords_ref -= com2 (self.rms, self.rot, _) = qcp(coords_ref, coords, self._natoms) self.tran = com2 - np.dot(com1, self.rot) # Getters def get_transformed(self): """Get the transformed coordinate set.""" if self.coords is None or self.reference_coords is None: raise PDBException("No coordinates set.") if self.rot is None: raise PDBException("Nothing is superimposed yet.") self.transformed_coords = np.dot(self.coords, self.rot) + self.tran return self.transformed_coords def get_rotran(self): """Return right multiplying rotation matrix and translation vector.""" if self.rot is None: raise PDBException("Nothing is superimposed yet.") return self.rot, self.tran def get_init_rms(self): """Return the root mean square deviation of untransformed coordinates.""" if self.coords is None: raise PDBException("No coordinates set yet.") if self.init_rms is None: diff = self.coords - self.reference_coords self.init_rms = np.sqrt(np.sum(np.dot(diff, diff), axis=0) / self._natoms) return self.init_rms def get_rms(self): """Root mean square deviation of superimposed coordinates.""" if self.rms is None: raise PDBException("Nothing superimposed yet.") return self.rms