File size: 11,906 Bytes
b7731cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
# Copyright (C) 2022, Joao Rodrigues ([email protected]
#                     Anuj Sharma ([email protected])
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.

"""Structural alignment using Quaternion Characteristic Polynomial (QCP).

QCPSuperimposer finds the best rotation and translation to put
two point sets on top of each other (minimizing the RMSD). This is
eg. useful to superimpose crystal structures. QCP stands for
Quaternion Characteristic Polynomial, which is used in the algorithm.

Algorithm and original code described in:

Theobald DL.
Rapid calculation of RMSDs using a quaternion-based characteristic polynomial.
Acta Crystallogr A. 2005 Jul;61(Pt 4):478-80. doi: 10.1107/S0108767305015266.
Epub 2005 Jun 23. PMID: 15973002.
"""


import numpy as np

from Bio.PDB.PDBExceptions import PDBException


def qcp(coords1, coords2, natoms):
    """Implement the QCP code in Python.

    Input coordinate arrays must be centered at the origin and have
    shape Nx3.

    Variable names match (as much as possible) the C implementation.
    """
    # Original code has coords1 be the mobile. I think it makes more sense
    # for it to be the reference, so I swapped here.
    G1 = np.trace(np.dot(coords2, coords2.T))
    G2 = np.trace(np.dot(coords1, coords1.T))
    A = np.dot(coords2.T, coords1)  # referred to as M in the original paper.
    E0 = (G1 + G2) * 0.5

    Sxx, Sxy, Sxz, Syx, Syy, Syz, Szx, Szy, Szz = A.flatten()

    Sxx2 = Sxx * Sxx
    Syy2 = Syy * Syy
    Szz2 = Szz * Szz
    Sxy2 = Sxy * Sxy
    Syz2 = Syz * Syz
    Sxz2 = Sxz * Sxz
    Syx2 = Syx * Syx
    Szy2 = Szy * Szy
    Szx2 = Szx * Szx

    SyzSzymSyySzz2 = 2.0 * (Syz * Szy - Syy * Szz)
    Sxx2Syy2Szz2Syz2Szy2 = Syy2 + Szz2 - Sxx2 + Syz2 + Szy2

    C2 = -2.0 * (Sxx2 + Syy2 + Szz2 + Sxy2 + Syx2 + Sxz2 + Szx2 + Syz2 + Szy2)
    C1 = 8.0 * (
        Sxx * Syz * Szy
        + Syy * Szx * Sxz
        + Szz * Sxy * Syx
        - Sxx * Syy * Szz
        - Syz * Szx * Sxy
        - Szy * Syx * Sxz
    )

    SxzpSzx = Sxz + Szx
    SyzpSzy = Syz + Szy
    SxypSyx = Sxy + Syx
    SyzmSzy = Syz - Szy
    SxzmSzx = Sxz - Szx
    SxymSyx = Sxy - Syx
    SxxpSyy = Sxx + Syy
    SxxmSyy = Sxx - Syy
    Sxy2Sxz2Syx2Szx2 = Sxy2 + Sxz2 - Syx2 - Szx2

    negSxzpSzx = -SxzpSzx
    negSxzmSzx = -SxzmSzx
    negSxymSyx = -SxymSyx
    SxxpSyy_p_Szz = SxxpSyy + Szz

    C0 = (
        Sxy2Sxz2Syx2Szx2 * Sxy2Sxz2Syx2Szx2
        + (Sxx2Syy2Szz2Syz2Szy2 + SyzSzymSyySzz2)
        * (Sxx2Syy2Szz2Syz2Szy2 - SyzSzymSyySzz2)
        + (negSxzpSzx * (SyzmSzy) + (SxymSyx) * (SxxmSyy - Szz))
        * (negSxzmSzx * (SyzpSzy) + (SxymSyx) * (SxxmSyy + Szz))
        + (negSxzpSzx * (SyzpSzy) - (SxypSyx) * (SxxpSyy - Szz))
        * (negSxzmSzx * (SyzmSzy) - (SxypSyx) * SxxpSyy_p_Szz)
        + (+(SxypSyx) * (SyzpSzy) + (SxzpSzx) * (SxxmSyy + Szz))
        * (negSxymSyx * (SyzmSzy) + (SxzpSzx) * SxxpSyy_p_Szz)
        + (+(SxypSyx) * (SyzmSzy) + (SxzmSzx) * (SxxmSyy - Szz))
        * (negSxymSyx * (SyzpSzy) + (SxzmSzx) * (SxxpSyy - Szz))
    )

    # Newton-Rhapson
    # Original paper mentions 5 iterations are sufficient (on average)
    # for convergence up to 10^-6 precision but original code writes 50.
    # I guess for robustness.
    nr_it = 50
    mxEigenV = E0
    evalprec = 1e-11
    for _ in range(nr_it):
        oldg = mxEigenV
        x2 = mxEigenV * mxEigenV
        b = (x2 + C2) * mxEigenV
        a = b + C1
        delta = (a * mxEigenV + C0) / (2.0 * x2 * mxEigenV + b + a)
        mxEigenV -= delta
        if abs(mxEigenV - oldg) < abs(evalprec * mxEigenV):
            break
    else:
        print(f"Newton-Rhapson did not converge after {nr_it} iterations")

    # The original code has a guard if minScore > 0 and rmsd < minScore, although
    # the default value of minScore is -1. For simplicity, we ignore that check.
    rmsd = (2.0 * abs(E0 - mxEigenV) / natoms) ** 0.5

    a11 = SxxpSyy + Szz - mxEigenV
    a12 = SyzmSzy
    a13 = negSxzmSzx
    a14 = SxymSyx
    a21 = SyzmSzy
    a22 = SxxmSyy - Szz - mxEigenV
    a23 = SxypSyx
    a24 = SxzpSzx
    a31 = a13
    a32 = a23
    a33 = Syy - Sxx - Szz - mxEigenV
    a34 = SyzpSzy
    a41 = a14
    a42 = a24
    a43 = a34
    a44 = Szz - SxxpSyy - mxEigenV
    a3344_4334 = a33 * a44 - a43 * a34
    a3244_4234 = a32 * a44 - a42 * a34
    a3243_4233 = a32 * a43 - a42 * a33
    a3143_4133 = a31 * a43 - a41 * a33
    a3144_4134 = a31 * a44 - a41 * a34
    a3142_4132 = a31 * a42 - a41 * a32
    q1 = a22 * a3344_4334 - a23 * a3244_4234 + a24 * a3243_4233
    q2 = -a21 * a3344_4334 + a23 * a3144_4134 - a24 * a3143_4133
    q3 = a21 * a3244_4234 - a22 * a3144_4134 + a24 * a3142_4132
    q4 = -a21 * a3243_4233 + a22 * a3143_4133 - a23 * a3142_4132

    qsqr = q1 * q1 + q2 * q2 + q3 * q3 + q4 * q4

    evecprec = 1e-6
    if qsqr < evecprec:
        q1 = a12 * a3344_4334 - a13 * a3244_4234 + a14 * a3243_4233
        q2 = -a11 * a3344_4334 + a13 * a3144_4134 - a14 * a3143_4133
        q3 = a11 * a3244_4234 - a12 * a3144_4134 + a14 * a3142_4132
        q4 = -a11 * a3243_4233 + a12 * a3143_4133 - a13 * a3142_4132
        qsqr = q1 * q1 + q2 * q2 + q3 * q3 + q4 * q4

        if qsqr < evecprec:
            a1324_1423 = a13 * a24 - a14 * a23
            a1224_1422 = a12 * a24 - a14 * a22
            a1223_1322 = a12 * a23 - a13 * a22
            a1124_1421 = a11 * a24 - a14 * a21
            a1123_1321 = a11 * a23 - a13 * a21
            a1122_1221 = a11 * a22 - a12 * a21

            q1 = a42 * a1324_1423 - a43 * a1224_1422 + a44 * a1223_1322
            q2 = -a41 * a1324_1423 + a43 * a1124_1421 - a44 * a1123_1321
            q3 = a41 * a1224_1422 - a42 * a1124_1421 + a44 * a1122_1221
            q4 = -a41 * a1223_1322 + a42 * a1123_1321 - a43 * a1122_1221
            qsqr = q1 * q1 + q2 * q2 + q3 * q3 + q4 * q4

            if qsqr < evecprec:
                q1 = a32 * a1324_1423 - a33 * a1224_1422 + a34 * a1223_1322
                q2 = -a31 * a1324_1423 + a33 * a1124_1421 - a34 * a1123_1321
                q3 = a31 * a1224_1422 - a32 * a1124_1421 + a34 * a1122_1221
                q4 = -a31 * a1223_1322 + a32 * a1123_1321 - a33 * a1122_1221
                qsqr = q1 * q1 + q2 * q2 + q3 * q3 + q4 * q4

                if qsqr < evecprec:
                    rot = np.eye(3)
                    return rmsd, rot, [q1, q2, q3, q4]

    normq = qsqr**0.5
    q1 /= normq
    q2 /= normq
    q3 /= normq
    q4 /= normq

    a2 = q1 * q1
    x2 = q2 * q2
    y2 = q3 * q3
    z2 = q4 * q4

    xy = q2 * q3
    az = q1 * q4
    zx = q4 * q2
    ay = q1 * q3
    yz = q3 * q4
    ax = q1 * q2

    rot = np.zeros((3, 3))

    rot[0][0] = a2 + x2 - y2 - z2
    rot[0][1] = 2 * (xy + az)
    rot[0][2] = 2 * (zx - ay)
    rot[1][0] = 2 * (xy - az)
    rot[1][1] = a2 - x2 + y2 - z2
    rot[1][2] = 2 * (yz + ax)
    rot[2][0] = 2 * (zx + ay)
    rot[2][1] = 2 * (yz - ax)
    rot[2][2] = a2 - x2 - y2 + z2

    return rmsd, rot, (q1, q2, q3, q4)


class QCPSuperimposer:
    """Quaternion Characteristic Polynomial (QCP) Superimposer.

    QCPSuperimposer finds the best rotation and translation to put
    two point sets on top of each other (minimizing the RMSD). This is
    eg. useful to superimposing 3D structures of proteins.

    QCP stands for Quaternion Characteristic Polynomial, which is used
    in the algorithm.

    Reference:

    Douglas L Theobald (2005), "Rapid calculation of RMSDs using a
    quaternion-based characteristic polynomial.", Acta Crystallogr
    A 61(4):478-480
    """

    def __init__(self):
        """Initialize the class."""
        self._reset_properties()

    # Private methods

    def _reset_properties(self):
        """Reset all relevant properties to None to avoid conflicts between runs."""
        self.reference_coords = None
        self.coords = None
        self.transformed_coords = None
        self.rot = None
        self.tran = None
        self.rms = None
        self.init_rms = None

    # Public methods
    def set_atoms(self, fixed, moving):
        """Prepare alignment between two atom lists.

        Put (translate/rotate) the atoms in fixed on the atoms in
        moving, in such a way that the RMSD is minimized.

        :param fixed: list of (fixed) atoms
        :param moving: list of (moving) atoms
        :type fixed,moving: [L{Atom}, L{Atom},...]
        """
        assert len(fixed) == len(moving), "Fixed and moving atom lists differ in size"

        # Grab coordinates in double precision
        fix_coord = np.array([a.get_coord() for a in fixed], dtype=np.float64)
        mov_coord = np.array([a.get_coord() for a in moving], dtype=np.float64)

        self.set(fix_coord, mov_coord)
        self.run()

        self.rms = self.get_rms()
        self.rotran = self.get_rotran()

    def apply(self, atom_list):
        """Apply the QCP rotation matrix/translation vector to a set of atoms."""
        if self.rotran is None:
            raise PDBException("No transformation has been calculated yet")

        rot, tran = self.rotran
        for atom in atom_list:
            atom.transform(rot, tran)

    # Low(er) level functions
    def set(self, reference_coords, coords):
        """Set the coordinates to be superimposed.

        coords will be put on top of reference_coords.

        - reference_coords: an NxDIM array
        - coords: an NxDIM array

        DIM is the dimension of the points, N is the number
        of points to be superimposed.
        """
        self._reset_properties()

        # store coordinates
        self.reference_coords = reference_coords
        self.coords = coords
        self._natoms, n_dim = coords.shape

        if reference_coords.shape != coords.shape:
            raise PDBException("Coordinates must have the same dimensions.")
        if n_dim != 3:
            raise PDBException("Coordinates must be Nx3 arrays.")

    def run(self):
        """Superimpose the coordinate sets."""
        if self.coords is None or self.reference_coords is None:
            raise PDBException("No coordinates set.")

        coords = self.coords.copy()
        coords_ref = self.reference_coords.copy()

        # Center Coordinates
        com1 = np.mean(coords, axis=0)
        com2 = np.mean(coords_ref, axis=0)

        coords -= com1
        coords_ref -= com2

        (self.rms, self.rot, _) = qcp(coords_ref, coords, self._natoms)
        self.tran = com2 - np.dot(com1, self.rot)

    # Getters
    def get_transformed(self):
        """Get the transformed coordinate set."""
        if self.coords is None or self.reference_coords is None:
            raise PDBException("No coordinates set.")

        if self.rot is None:
            raise PDBException("Nothing is superimposed yet.")

        self.transformed_coords = np.dot(self.coords, self.rot) + self.tran
        return self.transformed_coords

    def get_rotran(self):
        """Return right multiplying rotation matrix and translation vector."""
        if self.rot is None:
            raise PDBException("Nothing is superimposed yet.")
        return self.rot, self.tran

    def get_init_rms(self):
        """Return the root mean square deviation of untransformed coordinates."""
        if self.coords is None:
            raise PDBException("No coordinates set yet.")

        if self.init_rms is None:
            diff = self.coords - self.reference_coords
            self.init_rms = np.sqrt(np.sum(np.dot(diff, diff), axis=0) / self._natoms)
        return self.init_rms

    def get_rms(self):
        """Root mean square deviation of superimposed coordinates."""
        if self.rms is None:
            raise PDBException("Nothing superimposed yet.")
        return self.rms