aakash0017's picture
Upload folder using huggingface_hub
b7731cd
# Copyright (C) 2022, Joao Rodrigues ([email protected]
# Anuj Sharma ([email protected])
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Structural alignment using Quaternion Characteristic Polynomial (QCP).
QCPSuperimposer finds the best rotation and translation to put
two point sets on top of each other (minimizing the RMSD). This is
eg. useful to superimpose crystal structures. QCP stands for
Quaternion Characteristic Polynomial, which is used in the algorithm.
Algorithm and original code described in:
Theobald DL.
Rapid calculation of RMSDs using a quaternion-based characteristic polynomial.
Acta Crystallogr A. 2005 Jul;61(Pt 4):478-80. doi: 10.1107/S0108767305015266.
Epub 2005 Jun 23. PMID: 15973002.
"""
import numpy as np
from Bio.PDB.PDBExceptions import PDBException
def qcp(coords1, coords2, natoms):
"""Implement the QCP code in Python.
Input coordinate arrays must be centered at the origin and have
shape Nx3.
Variable names match (as much as possible) the C implementation.
"""
# Original code has coords1 be the mobile. I think it makes more sense
# for it to be the reference, so I swapped here.
G1 = np.trace(np.dot(coords2, coords2.T))
G2 = np.trace(np.dot(coords1, coords1.T))
A = np.dot(coords2.T, coords1) # referred to as M in the original paper.
E0 = (G1 + G2) * 0.5
Sxx, Sxy, Sxz, Syx, Syy, Syz, Szx, Szy, Szz = A.flatten()
Sxx2 = Sxx * Sxx
Syy2 = Syy * Syy
Szz2 = Szz * Szz
Sxy2 = Sxy * Sxy
Syz2 = Syz * Syz
Sxz2 = Sxz * Sxz
Syx2 = Syx * Syx
Szy2 = Szy * Szy
Szx2 = Szx * Szx
SyzSzymSyySzz2 = 2.0 * (Syz * Szy - Syy * Szz)
Sxx2Syy2Szz2Syz2Szy2 = Syy2 + Szz2 - Sxx2 + Syz2 + Szy2
C2 = -2.0 * (Sxx2 + Syy2 + Szz2 + Sxy2 + Syx2 + Sxz2 + Szx2 + Syz2 + Szy2)
C1 = 8.0 * (
Sxx * Syz * Szy
+ Syy * Szx * Sxz
+ Szz * Sxy * Syx
- Sxx * Syy * Szz
- Syz * Szx * Sxy
- Szy * Syx * Sxz
)
SxzpSzx = Sxz + Szx
SyzpSzy = Syz + Szy
SxypSyx = Sxy + Syx
SyzmSzy = Syz - Szy
SxzmSzx = Sxz - Szx
SxymSyx = Sxy - Syx
SxxpSyy = Sxx + Syy
SxxmSyy = Sxx - Syy
Sxy2Sxz2Syx2Szx2 = Sxy2 + Sxz2 - Syx2 - Szx2
negSxzpSzx = -SxzpSzx
negSxzmSzx = -SxzmSzx
negSxymSyx = -SxymSyx
SxxpSyy_p_Szz = SxxpSyy + Szz
C0 = (
Sxy2Sxz2Syx2Szx2 * Sxy2Sxz2Syx2Szx2
+ (Sxx2Syy2Szz2Syz2Szy2 + SyzSzymSyySzz2)
* (Sxx2Syy2Szz2Syz2Szy2 - SyzSzymSyySzz2)
+ (negSxzpSzx * (SyzmSzy) + (SxymSyx) * (SxxmSyy - Szz))
* (negSxzmSzx * (SyzpSzy) + (SxymSyx) * (SxxmSyy + Szz))
+ (negSxzpSzx * (SyzpSzy) - (SxypSyx) * (SxxpSyy - Szz))
* (negSxzmSzx * (SyzmSzy) - (SxypSyx) * SxxpSyy_p_Szz)
+ (+(SxypSyx) * (SyzpSzy) + (SxzpSzx) * (SxxmSyy + Szz))
* (negSxymSyx * (SyzmSzy) + (SxzpSzx) * SxxpSyy_p_Szz)
+ (+(SxypSyx) * (SyzmSzy) + (SxzmSzx) * (SxxmSyy - Szz))
* (negSxymSyx * (SyzpSzy) + (SxzmSzx) * (SxxpSyy - Szz))
)
# Newton-Rhapson
# Original paper mentions 5 iterations are sufficient (on average)
# for convergence up to 10^-6 precision but original code writes 50.
# I guess for robustness.
nr_it = 50
mxEigenV = E0
evalprec = 1e-11
for _ in range(nr_it):
oldg = mxEigenV
x2 = mxEigenV * mxEigenV
b = (x2 + C2) * mxEigenV
a = b + C1
delta = (a * mxEigenV + C0) / (2.0 * x2 * mxEigenV + b + a)
mxEigenV -= delta
if abs(mxEigenV - oldg) < abs(evalprec * mxEigenV):
break
else:
print(f"Newton-Rhapson did not converge after {nr_it} iterations")
# The original code has a guard if minScore > 0 and rmsd < minScore, although
# the default value of minScore is -1. For simplicity, we ignore that check.
rmsd = (2.0 * abs(E0 - mxEigenV) / natoms) ** 0.5
a11 = SxxpSyy + Szz - mxEigenV
a12 = SyzmSzy
a13 = negSxzmSzx
a14 = SxymSyx
a21 = SyzmSzy
a22 = SxxmSyy - Szz - mxEigenV
a23 = SxypSyx
a24 = SxzpSzx
a31 = a13
a32 = a23
a33 = Syy - Sxx - Szz - mxEigenV
a34 = SyzpSzy
a41 = a14
a42 = a24
a43 = a34
a44 = Szz - SxxpSyy - mxEigenV
a3344_4334 = a33 * a44 - a43 * a34
a3244_4234 = a32 * a44 - a42 * a34
a3243_4233 = a32 * a43 - a42 * a33
a3143_4133 = a31 * a43 - a41 * a33
a3144_4134 = a31 * a44 - a41 * a34
a3142_4132 = a31 * a42 - a41 * a32
q1 = a22 * a3344_4334 - a23 * a3244_4234 + a24 * a3243_4233
q2 = -a21 * a3344_4334 + a23 * a3144_4134 - a24 * a3143_4133
q3 = a21 * a3244_4234 - a22 * a3144_4134 + a24 * a3142_4132
q4 = -a21 * a3243_4233 + a22 * a3143_4133 - a23 * a3142_4132
qsqr = q1 * q1 + q2 * q2 + q3 * q3 + q4 * q4
evecprec = 1e-6
if qsqr < evecprec:
q1 = a12 * a3344_4334 - a13 * a3244_4234 + a14 * a3243_4233
q2 = -a11 * a3344_4334 + a13 * a3144_4134 - a14 * a3143_4133
q3 = a11 * a3244_4234 - a12 * a3144_4134 + a14 * a3142_4132
q4 = -a11 * a3243_4233 + a12 * a3143_4133 - a13 * a3142_4132
qsqr = q1 * q1 + q2 * q2 + q3 * q3 + q4 * q4
if qsqr < evecprec:
a1324_1423 = a13 * a24 - a14 * a23
a1224_1422 = a12 * a24 - a14 * a22
a1223_1322 = a12 * a23 - a13 * a22
a1124_1421 = a11 * a24 - a14 * a21
a1123_1321 = a11 * a23 - a13 * a21
a1122_1221 = a11 * a22 - a12 * a21
q1 = a42 * a1324_1423 - a43 * a1224_1422 + a44 * a1223_1322
q2 = -a41 * a1324_1423 + a43 * a1124_1421 - a44 * a1123_1321
q3 = a41 * a1224_1422 - a42 * a1124_1421 + a44 * a1122_1221
q4 = -a41 * a1223_1322 + a42 * a1123_1321 - a43 * a1122_1221
qsqr = q1 * q1 + q2 * q2 + q3 * q3 + q4 * q4
if qsqr < evecprec:
q1 = a32 * a1324_1423 - a33 * a1224_1422 + a34 * a1223_1322
q2 = -a31 * a1324_1423 + a33 * a1124_1421 - a34 * a1123_1321
q3 = a31 * a1224_1422 - a32 * a1124_1421 + a34 * a1122_1221
q4 = -a31 * a1223_1322 + a32 * a1123_1321 - a33 * a1122_1221
qsqr = q1 * q1 + q2 * q2 + q3 * q3 + q4 * q4
if qsqr < evecprec:
rot = np.eye(3)
return rmsd, rot, [q1, q2, q3, q4]
normq = qsqr**0.5
q1 /= normq
q2 /= normq
q3 /= normq
q4 /= normq
a2 = q1 * q1
x2 = q2 * q2
y2 = q3 * q3
z2 = q4 * q4
xy = q2 * q3
az = q1 * q4
zx = q4 * q2
ay = q1 * q3
yz = q3 * q4
ax = q1 * q2
rot = np.zeros((3, 3))
rot[0][0] = a2 + x2 - y2 - z2
rot[0][1] = 2 * (xy + az)
rot[0][2] = 2 * (zx - ay)
rot[1][0] = 2 * (xy - az)
rot[1][1] = a2 - x2 + y2 - z2
rot[1][2] = 2 * (yz + ax)
rot[2][0] = 2 * (zx + ay)
rot[2][1] = 2 * (yz - ax)
rot[2][2] = a2 - x2 - y2 + z2
return rmsd, rot, (q1, q2, q3, q4)
class QCPSuperimposer:
"""Quaternion Characteristic Polynomial (QCP) Superimposer.
QCPSuperimposer finds the best rotation and translation to put
two point sets on top of each other (minimizing the RMSD). This is
eg. useful to superimposing 3D structures of proteins.
QCP stands for Quaternion Characteristic Polynomial, which is used
in the algorithm.
Reference:
Douglas L Theobald (2005), "Rapid calculation of RMSDs using a
quaternion-based characteristic polynomial.", Acta Crystallogr
A 61(4):478-480
"""
def __init__(self):
"""Initialize the class."""
self._reset_properties()
# Private methods
def _reset_properties(self):
"""Reset all relevant properties to None to avoid conflicts between runs."""
self.reference_coords = None
self.coords = None
self.transformed_coords = None
self.rot = None
self.tran = None
self.rms = None
self.init_rms = None
# Public methods
def set_atoms(self, fixed, moving):
"""Prepare alignment between two atom lists.
Put (translate/rotate) the atoms in fixed on the atoms in
moving, in such a way that the RMSD is minimized.
:param fixed: list of (fixed) atoms
:param moving: list of (moving) atoms
:type fixed,moving: [L{Atom}, L{Atom},...]
"""
assert len(fixed) == len(moving), "Fixed and moving atom lists differ in size"
# Grab coordinates in double precision
fix_coord = np.array([a.get_coord() for a in fixed], dtype=np.float64)
mov_coord = np.array([a.get_coord() for a in moving], dtype=np.float64)
self.set(fix_coord, mov_coord)
self.run()
self.rms = self.get_rms()
self.rotran = self.get_rotran()
def apply(self, atom_list):
"""Apply the QCP rotation matrix/translation vector to a set of atoms."""
if self.rotran is None:
raise PDBException("No transformation has been calculated yet")
rot, tran = self.rotran
for atom in atom_list:
atom.transform(rot, tran)
# Low(er) level functions
def set(self, reference_coords, coords):
"""Set the coordinates to be superimposed.
coords will be put on top of reference_coords.
- reference_coords: an NxDIM array
- coords: an NxDIM array
DIM is the dimension of the points, N is the number
of points to be superimposed.
"""
self._reset_properties()
# store coordinates
self.reference_coords = reference_coords
self.coords = coords
self._natoms, n_dim = coords.shape
if reference_coords.shape != coords.shape:
raise PDBException("Coordinates must have the same dimensions.")
if n_dim != 3:
raise PDBException("Coordinates must be Nx3 arrays.")
def run(self):
"""Superimpose the coordinate sets."""
if self.coords is None or self.reference_coords is None:
raise PDBException("No coordinates set.")
coords = self.coords.copy()
coords_ref = self.reference_coords.copy()
# Center Coordinates
com1 = np.mean(coords, axis=0)
com2 = np.mean(coords_ref, axis=0)
coords -= com1
coords_ref -= com2
(self.rms, self.rot, _) = qcp(coords_ref, coords, self._natoms)
self.tran = com2 - np.dot(com1, self.rot)
# Getters
def get_transformed(self):
"""Get the transformed coordinate set."""
if self.coords is None or self.reference_coords is None:
raise PDBException("No coordinates set.")
if self.rot is None:
raise PDBException("Nothing is superimposed yet.")
self.transformed_coords = np.dot(self.coords, self.rot) + self.tran
return self.transformed_coords
def get_rotran(self):
"""Return right multiplying rotation matrix and translation vector."""
if self.rot is None:
raise PDBException("Nothing is superimposed yet.")
return self.rot, self.tran
def get_init_rms(self):
"""Return the root mean square deviation of untransformed coordinates."""
if self.coords is None:
raise PDBException("No coordinates set yet.")
if self.init_rms is None:
diff = self.coords - self.reference_coords
self.init_rms = np.sqrt(np.sum(np.dot(diff, diff), axis=0) / self._natoms)
return self.init_rms
def get_rms(self):
"""Root mean square deviation of superimposed coordinates."""
if self.rms is None:
raise PDBException("Nothing superimposed yet.")
return self.rms