Spaces:
No application file
No application file
# Copyright 2019-22 by Robert T. Miller. All rights reserved. | |
# This file is part of the Biopython distribution and governed by your | |
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
# Please see the LICENSE file that should have been included as part of this | |
# package. | |
"""Classes to support internal coordinates for protein structures. | |
Internal coordinates comprise Psi, Omega and Phi dihedral angles along the | |
protein backbone, Chi angles along the sidechains, and all 3-atom angles and | |
bond lengths defining a protein chain. These routines can compute internal | |
coordinates from atom XYZ coordinates, and compute atom XYZ coordinates from | |
internal coordinates. | |
Secondary benefits include the ability to align and compare residue | |
environments in 3D structures, support for 2D atom distance plots, converting a | |
distance plot plus chirality information to a structure, generating an OpenSCAD | |
description of a structure for 3D printing, and reading/writing structures as | |
internal coordinate data files. | |
**Usage:** | |
:: | |
from Bio.PDB.PDBParser import PDBParser | |
from Bio.PDB.Chain import Chain | |
from Bio.PDB.internal_coords import * | |
from Bio.PDB.PICIO import write_PIC, read_PIC, read_PIC_seq | |
from Bio.PDB.ic_rebuild import write_PDB, IC_duplicate, structure_rebuild_test | |
from Bio.PDB.SCADIO import write_SCAD | |
from Bio.Seq import Seq | |
from Bio.SeqRecord import SeqRecord | |
from Bio.PDB.PDBIO import PDBIO | |
import numpy as np | |
# load a structure as normal, get first chain | |
parser = PDBParser() | |
myProtein = parser.get_structure("7rsa", "pdb7rsa.ent") | |
myChain = myProtein[0]["A"] | |
# compute bond lengths, angles, dihedral angles | |
myChain.atom_to_internal_coordinates(verbose=True) | |
# check myChain makes sense (can get angles and rebuild same structure) | |
resultDict = structure_rebuild_test(myChain) | |
assert resultDict['pass'] == True | |
# get residue 1 chi2 angle | |
r1 = next(myChain.get_residues()) | |
r1chi2 = r1.internal_coord.get_angle("chi2") | |
# rotate residue 1 chi2 angle by 120 degrees (loops w/in +/-180) | |
r1.internal_coord.set_angle("chi2", r1chi2 + 120.0) | |
# update myChain XYZ coordinates with chi2 changed | |
myChain.internal_to_atom_coordinates() | |
# write new conformation with PDBIO | |
write_PDB(myProtein, "myChain.pdb") | |
# or just the ATOM records without headers: | |
io = PDBIO() | |
io.set_structure(myProtein) | |
io.save("myChain2.pdb") | |
# write chain as 'protein internal coordinates' (.pic) file | |
write_PIC(myProtein, "myChain.pic") | |
# read .pic file | |
myProtein2 = read_PIC("myChain.pic") | |
# create default structure for random sequence by reading as .pic file | |
myProtein3 = read_PIC_seq( | |
SeqRecord( | |
Seq("GAVLIMFPSTCNQYWDEHKR"), | |
id="1RND", | |
description="my random sequence", | |
) | |
) | |
myProtein3.internal_to_atom_coordinates() | |
write_PDB(myProtein3, "myRandom.pdb") | |
# access the all-dihedrals array for the chain, e.g. residue 1 chi2 angle: | |
r1chi2_obj = r1.internal_coord.pick_angle("chi2") | |
# or same thing: r1chi2_obj = r1.internal_coord.pick_angle("CA:CB:CG:CD") | |
r1chi2_key = r1chi2_obj.atomkeys | |
# r1chi2_key is tuple of AtomKeys (1_K_CA, 1_K_CB, 1_K_CG, 1_K_CD) | |
r1chi2_index = myChain.internal_coord.dihedraNdx[r1chi2_key] | |
# or same thing: r1chi2_index = r1chi2_obj.ndx | |
r1chi2_value = myChain.internal_coord.dihedraAngle[r1chi2_index] | |
# also true: r1chi2_obj == myChain.internal_coord.dihedra[r1chi2_index] | |
# access the array of all atoms for the chain, e.g. residue 1 C-beta | |
r1_cBeta_index = myChain.internal_coord.atomArrayIndex[AtomKey("1_K_CB")] | |
r1_cBeta_coords = myChain.internal_coord.atomArray[r1_cBeta_index] | |
# r1_cBeta_coords = [ x, y, z, 1.0 ] | |
# the Biopython Atom coord array is now a view into atomArray, so | |
assert r1_cBeta_coords[1] == r1["CB"].coord[1] | |
r1_cBeta_coords[1] += 1.0 # change the Y coord 1 angstrom | |
assert r1_cBeta_coords[1] == r1["CB"].coord[1] | |
# they are always the same (they share the same memory) | |
r1_cBeta_coords[1] -= 1.0 # restore | |
# create a selector to filter just the C-alpha atoms from the all atom array | |
atmNameNdx = AtomKey.fields.atm | |
atomArrayIndex = myChain.internal_coord.atomArrayIndex | |
CaSelect = [ | |
atomArrayIndex.get(k) for k in atomArrayIndex.keys() if k.akl[atmNameNdx] == "CA" | |
] | |
# now the ordered array of C-alpha atom coordinates is: | |
CA_coords = myChain.internal_coord.atomArray[CaSelect] | |
# note this uses Numpy fancy indexing, so CA_coords is a new copy | |
# create a C-alpha distance plot | |
caDistances = myChain.internal_coord.distance_plot(CaSelect) | |
# display with e.g. MatPlotLib: | |
# import matplotlib.pyplot as plt | |
# plt.imshow(caDistances, cmap="hot", interpolation="nearest") | |
# plt.show() | |
# build structure from distance plot: | |
## create the all-atom distance plot | |
distances = myChain.internal_coord.distance_plot() | |
## get the sign of the dihedral angles | |
chirality = myChain.internal_coord.dihedral_signs() | |
## get new, empty data structure : copy data structure from myChain | |
myChain2 = IC_duplicate(myChain)[0]["A"] | |
cic2 = myChain2.internal_coord | |
## clear the new atomArray and di/hedra value arrays, just for proof | |
cic2.atomArray = np.zeros((cic2.AAsiz, 4), dtype=np.float64) | |
cic2.dihedraAngle[:] = 0.0 | |
cic2.hedraAngle[:] = 0.0 | |
cic2.hedraL12[:] = 0.0 | |
cic2.hedraL23[:] = 0.0 | |
## copy just the first N-Ca-C coords so structures will superimpose: | |
cic2.copy_initNCaCs(myChain.internal_coord) | |
## copy distances to chain arrays: | |
cic2.distplot_to_dh_arrays(distances, chirality) | |
## compute angles and dihedral angles from distances: | |
cic2.distance_to_internal_coordinates() | |
## generate XYZ coordinates from internal coordinates: | |
myChain2.internal_to_atom_coordinates() | |
## confirm result atomArray matches original structure: | |
assert np.allclose(cic2.atomArray, myChain.internal_coord.atomArray) | |
# superimpose all phe-phe pairs - quick hack just to demonstrate concept | |
# for analyzing pairwise residue interactions. Generates PDB ATOM records | |
# placing each PHE at origin and showing all other PHEs in environment | |
## shorthand for key variables: | |
cic = myChain.internal_coord | |
resNameNdx = AtomKey.fields.resname | |
aaNdx = cic.atomArrayIndex | |
## select just PHE atoms: | |
pheAtomSelect = [aaNdx.get(k) for k in aaNdx.keys() if k.akl[resNameNdx] == "F"] | |
aaF = cic.atomArray[ pheAtomSelect ] # numpy fancy indexing makes COPY not view | |
for ric in cic.ordered_aa_ic_list: # internal_coords version of get_residues() | |
if ric.rbase[2] == "F": # if PHE, get transform matrices for chi1 dihedral | |
chi1 = ric.pick_angle("N:CA:CB:CG") # chi1 space has C-alpha at origin | |
cst = np.transpose(chi1.cst) # transform TO chi1 space | |
# rcst = np.transpose(chi1.rcst) # transform FROM chi1 space | |
cic.atomArray[pheAtomSelect] = aaF.dot(cst) # transform just the PHEs | |
for res in myChain.get_residues(): # print PHEs in new coordinate space | |
if res.resname in ["PHE"]: | |
print(res.internal_coord.pdb_residue_string()) | |
cic.atomArray[pheAtomSelect] = aaF # restore coordinate space from copy | |
# write OpenSCAD program of spheres and cylinders to 3d print myChain backbone | |
## set atom load filter to accept backbone only: | |
IC_Residue.accept_atoms = IC_Residue.accept_backbone | |
## delete existing data to force re-read of all atoms: | |
myChain.internal_coord = None | |
write_SCAD(myChain, "myChain.scad", scale=10.0) | |
See the `''Internal coordinates module''` section of the `Biopython Tutorial | |
and Cookbook` for further discussion. | |
**Terms and key data structures:** | |
Internal coordinates are defined on sequences of atoms which span | |
residues or follow accepted nomenclature along sidechains. To manage these | |
sequences and support Biopython's disorder mechanisms, :class:`AtomKey` | |
specifiers are implemented to capture residue, atom and variant identification | |
in a single object. A :class:`Hedron` object is specified as three sequential | |
AtomKeys, comprising two bond lengths and the bond angle between them. A | |
:class:`Dihedron` consists of four sequential AtomKeys, linking two Hedra with | |
a dihedral angle between them. | |
**Algorithmic overview:** | |
The Internal Coordinates module combines a specification of connected atoms as | |
hedra and dihedra in the :mod:`.ic_data` file with routines here to transform | |
XYZ coordinates of these atom sets between a local coordinate system and the | |
world coordinates supplied in e.g. a PDB or mmCif data file. The local | |
coordinate system places the center atom of a hedron at the origin (0,0,0), one | |
leg on the +Z axis, and the other leg on the XZ plane (see :class:`Hedron`). | |
Measurement and creation or manipulation of hedra and dihedra in the local | |
coordinate space is straightforward, and the calculated transformation matrices | |
enable assembling these subunits into a protein chain starting from supplied | |
(PDB) coordinates for the initial N-Ca-C atoms. | |
Psi and Phi angles are defined on atoms from adjacent residues in a protein | |
chain, see e.g. :meth:`.pick_angle` and :mod:`.ic_data` for the relevant | |
mapping between residues and backbone dihedral angles. | |
Transforms to and from the dihedron local coordinate space described above are | |
accessible via :data:`IC_Chain.dCoordSpace` and :class:`Dihedron` attributes | |
.cst and .rcst, and may be applied in the alignment and comparison of residues | |
and their environments with code along the lines of:: | |
chi1 = ric0.pick_angle("chi1") # chi1 space defined with CA at origin | |
cst = np.transpose(chi1.cst) # transform TO chi1 local space | |
newAtomCoords = oldAtomCoords.dot(cst) | |
The core algorithms were developed independently during 1993-4 for | |
`''Development and Application of a Three-dimensional Description of Amino Acid | |
Environments in Protein,'' Miller, Douthart, and Dunker, Advances in Molecular | |
Bioinformatics, IOS Press, 1994, ISBN 90 5199 172 x, pp. 9-30. | |
<https://www.google.com/books/edition/Advances_in_Molecular_Bioinformatics/VmFSNNm7k6cC?gbpv=1>`_ | |
A Protein Internal Coordinate (.pic) file format is defined to capture | |
sufficient detail to reproduce a PDB file from chain starting coordinates | |
(first residue N, Ca, C XYZ coordinates) and remaining internal coordinates. | |
These files are used internally to verify that a given structure can be | |
regenerated from its internal coordinates. See :mod:`.PICIO` for reading and | |
writing .pic files and :func:`.structure_rebuild_test` to determine if a | |
specific PDB or mmCif datafile has sufficient information to interconvert | |
between cartesian and internal coordinates. | |
Internal coordinates may also be exported as `OpenSCAD <https://www.openscad.org>`_ | |
data arrays for generating 3D printed protein models. OpenSCAD software is | |
provided as a starting point and proof-of-concept for generating such models. | |
See :mod:`.SCADIO` and this `Thingiverse project <https://www.thingiverse.com/thing:3957471>`_ | |
for a more advanced example. | |
Refer to :meth:`.distance_plot` and :meth:`.distance_to_internal_coordinates` | |
for converting structure data to/from 2D distance plots. | |
The following classes comprise the core functionality for processing internal | |
coordinates and are sufficiently related and coupled to place them together in | |
this module: | |
:class:`IC_Chain`: Extends Biopython Chain on .internal_coord attribute. | |
Manages connected sequence of residues and chain breaks; holds numpy arrays | |
for all atom coordinates and bond geometries. For 'parallel' processing | |
IC_Chain methods operate on these arrays with single numpy commands. | |
:class:`IC_Residue`: Extends Biopython Residue on .internal_coord attribute. | |
Access for per residue views on internal coordinates and methods for serial | |
(residue by residue) assembly. | |
:class:`Dihedron`: four joined atoms forming a dihedral angle. | |
Dihedral angle, homogeneous atom coordinates in local coordinate space, | |
references to relevant Hedra and IC_Residue. Getter methods for | |
residue dihedral angles, bond angles and bond lengths. | |
:class:`Hedron`: three joined atoms forming a plane. | |
Contains homogeneous atom coordinates in local coordinate space as well as | |
bond lengths and angle between them. | |
:class:`Edron`: base class for Hedron and Dihedron classes. | |
Tuple of AtomKeys comprising child, string ID, mainchain membership boolean | |
and other routines common for both Hedra and Dihedra. Implements rich | |
comparison. | |
:class:`AtomKey`: keys (dictionary and string) for referencing atom sequences. | |
Capture residue and disorder/occupancy information, provides a | |
no-whitespace key for .pic files, and implements rich comparison. | |
Custom exception classes: :class:`HedronMatchError` and | |
:class:`MissingAtomError` | |
""" # noqa | |
import re | |
from collections import deque, namedtuple | |
import copy | |
# from numpy import floor, ndarray | |
from numbers import Integral | |
try: | |
import numpy as np # type: ignore | |
except ImportError: | |
from Bio import MissingPythonDependencyError | |
raise MissingPythonDependencyError( | |
"Install numpy to build proteins from internal coordinates." | |
) | |
from Bio.PDB.Atom import Atom, DisorderedAtom | |
from Bio.Data.PDBData import protein_letters_3to1 | |
from Bio.PDB.vectors import multi_coord_space, multi_rot_Z | |
from Bio.PDB.vectors import coord_space | |
from Bio.PDB.ic_data import ic_data_backbone, ic_data_sidechains | |
from Bio.PDB.ic_data import primary_angles | |
from Bio.PDB.ic_data import ic_data_sidechain_extras, residue_atom_bond_state | |
from Bio.PDB.ic_data import dihedra_primary_defaults, hedra_defaults | |
# for type checking only | |
from typing import ( | |
List, | |
Dict, | |
Set, | |
TextIO, | |
Union, | |
Tuple, | |
cast, | |
TYPE_CHECKING, | |
Optional, | |
) | |
if TYPE_CHECKING: | |
from Bio.PDB.Residue import Residue | |
from Bio.PDB.Chain import Chain | |
HKT = Tuple["AtomKey", "AtomKey", "AtomKey"] # Hedron key tuple | |
DKT = Tuple["AtomKey", "AtomKey", "AtomKey", "AtomKey"] # Dihedron Key Tuple | |
EKT = Union[HKT, DKT] # Edron Key Tuple | |
BKT = Tuple["AtomKey", "AtomKey"] # Bond Key Tuple | |
# HACS = Tuple[np.array, np.array, np.array] # Hedron Atom Coord Set | |
HACS = np.array # Hedron Atom Coord Set | |
DACS = Tuple[np.array, np.array, np.array, np.array] # Dihedron Atom Coord Set | |
class IC_Chain: | |
"""Class to extend Biopython Chain with internal coordinate data. | |
Attributes | |
---------- | |
chain: object reference | |
The Biopython :class:`Bio.PDB.Chain.Chain` object this extends | |
MaxPeptideBond: float | |
**Class** attribute to detect chain breaks. | |
Override for fully contiguous chains with some very long bonds - e.g. | |
for 3D printing (OpenSCAD output) a structure with missing residues. | |
:data:`MaxPeptideBond` | |
ParallelAssembleResidues: bool | |
**Class** attribute affecting internal_to_atom_coords. | |
Short (50 residue and less) chains are faster to assemble without the | |
overhead of creating numpy arrays, and the algorithm is easier to | |
understand and trace processing a single residue at a time. Clearing | |
(set to False) this flag will switch to the serial algorithm | |
ordered_aa_ic_list: list | |
IC_Residue objects internal_coords algorithms can process (e.g. no | |
waters) | |
initNCaC: List of N, Ca, C AtomKey tuples (NCaCKeys). | |
NCaCKeys start chain segments (first residue or after chain break). | |
These 3 atoms define the coordinate space for a contiguous chain | |
segment, as initially specified by PDB or mmCIF file. | |
AAsiz = int | |
AtomArray size, number of atoms in this chain | |
atomArray: numpy array | |
homogeneous atom coords ([x,, y, z, 1.0]) for every atom in chain | |
atomArrayIndex: dict | |
maps AtomKeys to atomArray indexes | |
hedra: dict | |
Hedra forming residues in this chain; indexed by 3-tuples of AtomKeys. | |
hedraLen: int | |
length of hedra dict | |
hedraNdx: dict | |
maps hedra AtomKeys to numeric index into hedra data arrays e.g. | |
hedraL12 below | |
a2ha_map: [hedraLen x 3] | |
atom indexes in hedraNdx order | |
dihedra: dict | |
Dihedra forming residues in this chain; indexed by 4-tuples of AtomKeys. | |
dihedraLen: int | |
length of dihedra dict | |
dihedraNdx: dict | |
maps dihedra AtomKeys to dihedra data arrays e.g. dihedraAngle | |
a2da_map : [dihedraLen x 4] | |
AtomNdx's in dihedraNdx order | |
d2a_map : [dihedraLen x [4]] | |
AtomNdx's for each dihedron (reshaped a2da_map) | |
Numpy arrays for vector processing of chain di/hedra: | |
hedraL12: numpy array | |
bond length between hedron 1st and 2nd atom | |
hedraAngle: numpy array | |
bond angle for each hedron, in degrees | |
hedraL23: numpy array | |
bond length between hedron 2nd and 3rd atom | |
id3_dh_index: dict | |
maps hedron key to list of dihedra starting with hedron, used by | |
assemble and bond_rotate to find dihedra with h1 key | |
id32_dh_index: dict | |
like id3_dh_index, find dihedra from h2 key | |
hAtoms: numpy array | |
homogeneous atom coordinates (3x4) of hedra, central atom at origin | |
hAtomsR: numpy array | |
hAtoms in reverse orientation | |
hAtoms_needs_update: numpy array of bool | |
indicates whether hAtoms represent hedraL12/A/L23 | |
dihedraAngle: numpy array | |
dihedral angles (degrees) for each dihedron | |
dAtoms: numpy array | |
homogeneous atom coordinates (4x4) of dihedra, second atom at origin | |
dAtoms_needs_update: numpy array of bool | |
indicates whether dAtoms represent dihedraAngle | |
dCoordSpace: numpy array | |
forward and reverse transform matrices standardising positions of first | |
hedron. See :data:`dCoordSpace`. | |
dcsValid: bool | |
indicates dCoordSpace up to date | |
See also attributes generated by :meth:`build_edraArrays` for indexing | |
di/hedra data elements. | |
Methods | |
------- | |
internal_to_atom_coordinates: | |
Process ic data to Residue/Atom coordinates; calls assemble_residues() | |
assemble_residues: | |
Generate IC_Chain atom coords from internal coordinates (parallel) | |
assemble_residues_ser: | |
Generate IC_Residue atom coords from internal coordinates (serial) | |
atom_to_internal_coordinates: | |
Calculate dihedrals, angles, bond lengths (internal coordinates) for | |
Atom data | |
write_SCAD: | |
Write OpenSCAD matrices for internal coordinate data comprising chain; | |
this is a support routine, see :func:`.SCADIO.write_SCAD` to generate | |
OpenSCAD description of a protein chain. | |
distance_plot: | |
Generate 2D plot of interatomic distances with optional filter | |
distance_to_internal_coordinates: | |
Compute internal coordinates from distance plot and array of dihedral | |
angle signs. | |
""" | |
# Class globals | |
MaxPeptideBond = 1.4 | |
"""Larger C-N distance than this will be chain break""" | |
ParallelAssembleResidues = True | |
"""Enable parallel internal_to_atom algorithm, is slower for short chains""" | |
AAsiz = 0 | |
"""Number of atoms in this chain (size of atomArray)""" | |
atomArray: np.array = None | |
"""AAsiz x [4] of float np.float64 homogeneous atom coordinates, all atoms | |
in chain.""" | |
dCoordSpace = None | |
"""[2][dihedraLen][4][4] : 2 arrays of 4x4 coordinate space transforms for | |
each dihedron. The first [0] converts TO standard space with first atom on | |
the XZ plane, the second atom at the origin, the third on the +Z axis, and | |
the fourth placed according to the dihedral angle. The second [1] transform | |
returns FROM the standard space to world coordinates (PDB file input or | |
whatever is current). Also accessible as .cst (forward | |
transform) and .rcst (reverse transform) in :class:`Dihedron`.""" | |
dcsValid = None | |
"""True if dCoordSpace is up to date. Use :meth:`.update_dCoordSpace` | |
if needed.""" | |
# for assemble_residues | |
_dihedraSelect = np.array([True, True, True, False]) | |
_dihedraOK = np.array([True, True, True, True]) | |
def __init__(self, parent: "Chain", verbose: bool = False) -> None: | |
"""Initialize IC_Chain object, with or without residue/Atom data. | |
:param Bio.PDB.Chain parent: Biopython Chain object | |
Chain object this extends | |
""" | |
# type hinting parent as Chain leads to import cycle | |
self.chain = parent | |
self.ordered_aa_ic_list: List[IC_Residue] = [] | |
# self.initNCaC: Dict[Tuple[str], Dict["AtomKey", np.array]] = {} | |
self.initNCaCs = [] | |
self.sqMaxPeptideBond = np.square(IC_Chain.MaxPeptideBond) | |
# need init here for _gen_edra(): | |
self.hedra = {} | |
# self.hedraNdx = {} | |
self.dihedra = {} | |
# self.dihedraNdx = {} | |
# cache of AtomKey results for cak() | |
# self.akc: Dict[Tuple(IC_Residue, str), AtomKey] = {} | |
self.atomArrayIndex: Dict["AtomKey", int] = {} | |
self.bpAtomArray: List["Atom"] = [] # rtm | |
self._set_residues(verbose) # no effect if no residues loaded | |
def __deepcopy__(self, memo) -> "IC_Chain": | |
"""Implement deepcopy for IC_Chain.""" | |
existing = memo.get(id(self), False) | |
if existing: | |
return existing | |
dup = type(self).__new__(self.__class__) | |
memo[id(self)] = dup | |
dup.chain = memo[id(self.chain)] | |
dup.chain.child_dict = copy.deepcopy(self.chain.child_dict, memo) | |
# now have all res and ic_res but ic_res not complete | |
dup.chain.child_list = copy.deepcopy(self.chain.child_list, memo) | |
dup.akset = copy.deepcopy(self.akset, memo) | |
dup.aktuple = copy.deepcopy(self.aktuple, memo) | |
# now have all ak w/.ric | |
dup.ordered_aa_ic_list = copy.deepcopy(self.ordered_aa_ic_list, memo) | |
dup.atomArrayIndex = self.atomArrayIndex.copy() | |
dup.atomArrayValid = self.atomArrayValid.copy() | |
dup.atomArray = self.atomArray.copy() | |
dup.hedra = copy.deepcopy(self.hedra, memo) | |
dup.dihedra = copy.deepcopy(self.dihedra, memo) | |
dup.id3_dh_index = copy.deepcopy(self.id3_dh_index, memo) | |
dup.id32_dh_index = copy.deepcopy(self.id32_dh_index, memo) | |
# update missing items in ic_residues and | |
# set all bp residue atom coords to be views on dup.atomArray | |
# [similar in build_AtomArray() but does not copy from bpAtoms | |
# or modify atomArrayValid, and accesses dup] | |
dup.AAsiz = self.AAsiz | |
dup.bpAtomArray = [None] * dup.AAsiz # rtm | |
def setAtomVw(res, atm): | |
ak = AtomKey(res.internal_coord, atm) | |
ndx = dup.atomArrayIndex[ak] | |
atm.coord = dup.atomArray[ndx, 0:3] # make view on atomArray | |
dup.bpAtomArray[ndx] = atm # rtm | |
def setResAtmVws(res): | |
for atm in res.get_atoms(): | |
# copy not filter so ignore no_altloc | |
if atm.is_disordered(): | |
for altAtom in atm.child_dict.values(): | |
setAtomVw(res, altAtom) | |
else: | |
setAtomVw(res, atm) | |
for ric in dup.ordered_aa_ic_list: | |
setResAtmVws(ric.residue) | |
ric.rprev = copy.deepcopy(ric.rprev, memo) | |
ric.rnext = copy.deepcopy(ric.rnext, memo) | |
ric.ak_set = copy.deepcopy(ric.ak_set, memo) | |
ric.akc = copy.deepcopy(ric.akc, memo) | |
ric.dihedra = copy.deepcopy(ric.dihedra, memo) | |
ric.hedra = copy.deepcopy(ric.hedra, memo) | |
dup.sqMaxPeptideBond = self.sqMaxPeptideBond | |
dup.initNCaCs = copy.deepcopy(self.initNCaCs, memo) | |
dup.hedraLen = self.hedraLen | |
dup.hedraL12 = self.hedraL12.copy() | |
dup.hedraAngle = self.hedraAngle.copy() | |
dup.hedraL23 = self.hedraL23.copy() | |
dup.hedraNdx = copy.deepcopy(self.hedraNdx, memo) | |
dup.dihedraLen = self.dihedraLen | |
dup.dihedraAngle = self.dihedraAngle.copy() | |
dup.dihedraAngleRads = self.dihedraAngleRads.copy() | |
dup.dihedraNdx = copy.deepcopy(self.dihedraNdx, memo) | |
dup.a2da_map = self.a2da_map.copy() | |
dup.a2d_map = self.a2d_map.copy() | |
dup.d2a_map = self.d2a_map.copy() | |
dup.dH1ndx = self.dH1ndx.copy() | |
dup.dH2ndx = self.dH2ndx.copy() | |
dup.hAtoms = self.hAtoms.copy() | |
dup.hAtomsR = self.hAtomsR.copy() | |
dup.hAtoms_needs_update = self.hAtoms_needs_update.copy() | |
dup.dRev = self.dRev.copy() | |
dup.dFwd = self.dFwd.copy() | |
dup.dAtoms_needs_update = self.dAtoms_needs_update.copy() | |
dup.dAtoms = self.dAtoms.copy() | |
dup.a4_pre_rotation = self.a4_pre_rotation.copy() | |
dup.dCoordSpace = self.dCoordSpace.copy() | |
dup.dcsValid = self.dcsValid.copy() | |
for d in dup.dihedra.values(): | |
d.cst = dup.dCoordSpace[0][d.ndx] | |
d.rcst = dup.dCoordSpace[1][d.ndx] | |
return dup | |
# return True if a0, a1 within supplied cutoff | |
def _atm_dist_chk(self, a0: Atom, a1: Atom, cutoff: float, sqCutoff: float) -> bool: | |
return sqCutoff > np.sum(np.square(a0.coord - a1.coord)) | |
# return a string describing issue, or None if OK | |
def _peptide_check(self, prev: "Residue", curr: "Residue") -> Optional[str]: | |
if 0 == len(curr.child_dict): | |
# curr residue with no atoms => reading pic file, no break | |
return None | |
if (0 != len(curr.child_dict)) and (0 == len(prev.child_dict)): | |
# prev residue with no atoms, curr has atoms => reading pic file, | |
# have break | |
return "PIC data missing atoms" | |
# handle non-standard AA not marked as HETATM (1KQF, 1NTH) | |
if not prev.internal_coord.isAccept: | |
return "previous residue not standard/accepted amino acid" | |
# both biopython Residues have Atoms, so check distance | |
Natom = curr.child_dict.get("N", None) | |
pCatom = prev.child_dict.get("C", None) | |
if Natom is None or pCatom is None: | |
return f"missing {'previous C' if pCatom is None else 'N'} atom" | |
# confirm previous residue has all backbone atoms | |
pCAatom = prev.child_dict.get("CA", None) | |
pNatom = prev.child_dict.get("N", None) | |
if pNatom is None or pCAatom is None: | |
return "previous residue missing N or Ca" | |
if IC_Residue.no_altloc: | |
if Natom.is_disordered(): | |
Natom = Natom.selected_child | |
if pCatom.is_disordered(): | |
pCatom = pCatom.selected_child | |
if IC_Residue.no_altloc or ( | |
not Natom.is_disordered() and not pCatom.is_disordered() | |
): | |
dc = self._atm_dist_chk( | |
Natom, pCatom, IC_Chain.MaxPeptideBond, self.sqMaxPeptideBond | |
) | |
if dc: | |
return None | |
else: | |
return f"MaxPeptideBond ({IC_Chain.MaxPeptideBond} angstroms) exceeded" | |
# drop through for else Natom or pCatom is disordered: | |
Nlist: List[Atom] = [] | |
pClist: List[Atom] = [] | |
if Natom.is_disordered(): | |
Nlist.extend(Natom.child_dict.values()) | |
else: | |
Nlist = [Natom] | |
if pCatom.is_disordered(): | |
pClist.extend(pCatom.child_dict.values()) | |
else: | |
pClist = [pCatom] | |
for n in Nlist: | |
for c in pClist: | |
if self._atm_dist_chk( | |
n, c, IC_Chain.MaxPeptideBond, self.sqMaxPeptideBond | |
): | |
return None | |
return f"MaxPeptideBond ({IC_Chain.MaxPeptideBond} angstroms) exceeded" | |
def clear_ic(self): | |
"""Clear residue internal_coord settings for this chain.""" | |
for res in self.chain.get_residues(): | |
res.internal_coord = None | |
def _add_residue( | |
self, | |
res: "Residue", | |
last_res: List, | |
last_ord_res: List, | |
verbose: bool = False, | |
) -> bool: | |
"""Set rprev, rnext, manage chain break. | |
Returns True for no chain break or residue has sufficient data to | |
restart at this position after a chain break (sets initNCaC AtomKeys | |
in this case). False return means insufficient data to extend chain | |
with this residue. | |
""" | |
# overwrite any existing .internal_coord in case re-initialising chain | |
# expected state here is res.internal_coord = None | |
res.internal_coord = IC_Residue(res) | |
res.internal_coord.cic = self | |
ric = res.internal_coord | |
if ( | |
0 < len(last_res) | |
and last_ord_res == last_res | |
and self._peptide_check(last_ord_res[0].residue, res) is None | |
): | |
# no chain break | |
for prev in last_ord_res: | |
prev.rnext.append(res.internal_coord) | |
ric.rprev.append(prev) | |
return True | |
elif all(atm in res.child_dict for atm in ("N", "CA", "C")): | |
# chain break, save coords for restart | |
if verbose and len(last_res) != 0: # not first residue | |
if last_ord_res != last_res: | |
reason = f"disordered residues after {last_ord_res.pretty_str()}" | |
else: | |
reason = cast( | |
str, self._peptide_check(last_ord_res[0].residue, res) | |
) | |
print(f"chain break at {ric.pretty_str()} due to {reason}") | |
iNCaC = ric.split_akl( | |
(AtomKey(ric, "N"), AtomKey(ric, "CA"), AtomKey(ric, "C")) | |
) | |
self.initNCaCs.extend(iNCaC) | |
return True | |
# chain break but do not have N, Ca, C coords to restart from | |
return False | |
def _set_residues(self, verbose: bool = False) -> None: | |
"""Initialize .internal_coord for loaded Biopython Residue objects. | |
Add IC_Residue as .internal_coord attribute for each :class:`.Residue` | |
in parent :class:`Bio.PDB.Chain.Chain`; populate ordered_aa_ic_list with | |
:class:`IC_Residue` references for residues which can be built (amino | |
acids and some hetatms); set rprev and rnext on each sequential | |
IC_Residue, populate initNCaC at start and after chain breaks. | |
Generates: | |
self.akset : set of :class:`.AtomKey` s in this chain | |
""" | |
# ndx = 0 | |
last_res: List["IC_Residue"] = [] | |
last_ord_res: List["IC_Residue"] = [] | |
# atomCoordDict = {} | |
akset = set() | |
for res in self.chain.get_residues(): | |
# select only not hetero or accepted hetero | |
if res.id[0] == " " or res.id[0] in IC_Residue.accept_resnames: | |
this_res: List["IC_Residue"] = [] | |
if 2 == res.is_disordered() and not IC_Residue.no_altloc: | |
# print('disordered res:', res.is_disordered(), res) | |
for r in res.child_dict.values(): | |
if self._add_residue(r, last_res, last_ord_res, verbose): | |
this_res.append(r.internal_coord) | |
akset.update(r.internal_coord.ak_set) | |
else: | |
if self._add_residue(res, last_res, last_ord_res, verbose): | |
this_res.append(res.internal_coord) | |
akset.update(res.internal_coord.ak_set) | |
if 0 < len(this_res): | |
self.ordered_aa_ic_list.extend(this_res) | |
last_ord_res = this_res | |
last_res = this_res | |
self.akset = akset | |
self.initNCaCs = sorted(self.initNCaCs) | |
def build_atomArray(self) -> None: | |
"""Build :class:`IC_Chain` numpy coordinate array from biopython atoms. | |
See also :meth:`.init_edra` for more complete initialization of IC_Chain. | |
Inputs: | |
self.akset : set | |
:class:`AtomKey` s in this chain | |
Generates: | |
self.AAsiz : int | |
number of atoms in chain (len(akset)) | |
self.aktuple : AAsiz x AtomKeys | |
sorted akset AtomKeys | |
self.atomArrayIndex : [AAsiz] of int | |
numerical index for each AtomKey in aktuple | |
self.atomArrayValid : AAsiz x bool | |
atomArray coordinates current with internal coordinates if True | |
self.atomArray : AAsiz x np.float64[4] | |
homogeneous atom coordinates; Biopython :class:`.Atom` | |
coordinates are view into this array after execution | |
rak_cache : dict | |
lookup cache for AtomKeys for each residue | |
""" | |
def setAtom(res, atm): | |
ak = AtomKey(res.internal_coord, atm) | |
try: | |
ndx = self.atomArrayIndex[ak] | |
except KeyError: | |
return | |
self.atomArray[ndx, 0:3] = atm.coord | |
atm.coord = self.atomArray[ndx, 0:3] # make view on atomArray | |
self.atomArrayValid[ndx] = True | |
self.bpAtomArray[ndx] = atm # rtm | |
def setResAtms(res): | |
for atm in res.get_atoms(): | |
if atm.is_disordered(): | |
if IC_Residue.no_altloc: | |
setAtom(res, atm.selected_child) | |
else: | |
for altAtom in atm.child_dict.values(): | |
setAtom(res, altAtom) | |
else: | |
setAtom(res, atm) | |
self.AAsiz = len(self.akset) | |
# sorted(akset) needed here for pdb atom serial number and to maintain | |
# consistency between a2ic and i2ac | |
self.aktuple = tuple(sorted(self.akset)) | |
self.atomArrayIndex = dict(zip(self.aktuple, range(self.AAsiz))) | |
self.atomArrayValid = np.zeros(self.AAsiz, dtype=bool) | |
self.atomArray = np.zeros((self.AAsiz, 4), dtype=np.float64) | |
self.atomArray[:, 3] = 1.0 | |
self.bpAtomArray = [None] * self.AAsiz # rtm | |
for ric in self.ordered_aa_ic_list: | |
setResAtms(ric.residue) | |
if ric.akc == {}: # pic file read | |
ric._build_rak_cache() | |
def build_edraArrays(self) -> None: | |
"""Build chain level hedra and dihedra arrays. | |
Used by :meth:`init_edra` and :meth:`_hedraDict2chain`. Should be | |
private method but exposed for documentation. | |
Inputs: | |
self.dihedraLen : int | |
number of dihedra needed | |
self.hedraLen : int | |
number of hedra needed | |
self.AAsiz : int | |
length of atomArray | |
self.hedraNdx : dict | |
maps hedron keys to range(hedraLen) | |
self.dihedraNdx : dict | |
maps dihedron keys to range(dihedraLen) | |
self.hedra : dict | |
maps Hedra keys to Hedra for chain | |
self.atomArray : AAsiz x np.float64[4] | |
homogeneous atom coordinates for chain | |
self.atomArrayIndex : dict | |
maps AtomKeys to atomArray | |
self.atomArrayValid : AAsiz x bool | |
indicates coord is up-to-date | |
Generates: | |
self.dCoordSpace : [2][dihedraLen][4][4] | |
transforms to/from dihedron coordinate space | |
self.dcsValid : dihedraLen x bool | |
indicates dCoordSpace is current | |
self.hAtoms : hedraLen x 3 x np.float64[4] | |
atom coordinates in hCoordSpace | |
self.hAtomsR : hedraLen x 3 x np.float64[4] | |
hAtoms in reverse order (trading space for time) | |
self.hAtoms_needs_update : hedraLen x bool | |
indicates hAtoms, hAtoms current | |
self.a2h_map : AAsiz x [int ...] | |
maps atomArrayIndex to hedraNdx's with that atom | |
self.a2ha_map : [hedraLen x 3] | |
AtomNdx's in hedraNdx order | |
self.h2aa : hedraLen x [int ...] | |
maps hedraNdx to atomNdx's in hedron (reshaped later) | |
Hedron.ndx : int | |
self.hedraNdx value stored inside Hedron object | |
self.dRev : dihedraLen x bool | |
dihedron reversed if true | |
self.dH1ndx, dH2ndx : [dihedraLen] | |
hedraNdx's for 1st and 2nd hedra | |
self.h1d_map : hedraLen x [] | |
hedraNdx -> [dihedra using hedron] | |
Dihedron.h1key, h2key : [AtomKey ...] | |
hedron keys for dihedron, reversed as needed | |
Dihedron.hedron1, hedron2 : Hedron | |
references inside dihedron to hedra | |
Dihedron.ndx : int | |
self.dihedraNdx info inside Dihedron object | |
Dihedron.cst, rcst : np.float64p4][4] | |
dCoordSpace references inside Dihedron | |
self.a2da_map : [dihedraLen x 4] | |
AtomNdx's in dihedraNdx order | |
self.d2a_map : [dihedraLen x [4]] | |
AtomNdx's for each dihedron (reshaped a2da_map) | |
self.dFwd : bool | |
dihedron is not Reversed if True | |
self.a2d_map : AAsiz x [[dihedraNdx] | |
[atom ndx 0-3 of atom in dihedron]], maps atom indexes to | |
dihedra and atoms in them | |
self.dAtoms_needs_update : dihedraLen x bool | |
atoms in h1, h2 are current if False | |
""" | |
# dihedra coord space | |
self.dCoordSpace: np.ndarray = np.empty( | |
(2, self.dihedraLen, 4, 4), dtype=np.float64 | |
) | |
self.dcsValid: np.ndarray = np.zeros((self.dihedraLen), dtype=bool) | |
# hedra atoms | |
self.hAtoms: np.ndarray = np.zeros((self.hedraLen, 3, 4), dtype=np.float64) | |
self.hAtoms[:, :, 3] = 1.0 # homogeneous | |
self.hAtomsR: np.ndarray = np.copy(self.hAtoms) | |
self.hAtoms_needs_update = np.full(self.hedraLen, True) | |
# maps between hAtoms and atomArray | |
a2ha_map = {} | |
self.a2h_map = [[] for _ in range(self.AAsiz)] | |
h2aa = [[] for _ in range(self.hedraLen)] | |
for hk, hndx in self.hedraNdx.items(): | |
hstep = hndx * 3 | |
for i in range(3): | |
ndx = self.atomArrayIndex[hk[i]] | |
a2ha_map[hstep + i] = ndx | |
self.hedra[hk].ndx = hndx | |
for ak in self.hedra[hk].atomkeys: | |
akndx = self.atomArrayIndex[ak] | |
h2aa[hndx].append(akndx) | |
self.a2h_map[akndx].append(hndx) | |
self.a2ha_map = np.array(tuple(a2ha_map.values())) | |
self.h2aa = np.array(h2aa) | |
# dihedra atoms | |
self.dAtoms: np.ndarray = np.empty((self.dihedraLen, 4, 4), dtype=np.float64) | |
self.dAtoms[:, :, 3] = 1.0 # homogeneous | |
self.a4_pre_rotation = np.empty((self.dihedraLen, 4)) | |
# maps between dAtoms and atomArray | |
# hedra and dihedra | |
# dihedra forward/reverse data | |
a2da_map = {} | |
a2d_map = [[[], []] for _ in range(self.AAsiz)] | |
self.dRev: np.ndarray = np.zeros((self.dihedraLen), dtype=bool) | |
self.dH1ndx = np.empty(self.dihedraLen, dtype=np.int64) | |
self.dH2ndx = np.empty(self.dihedraLen, dtype=np.int64) | |
self.h1d_map = [[] for _ in range(self.hedraLen)] | |
self.id3_dh_index = {k[0:3]: [] for k in self.dihedraNdx.keys()} | |
self.id32_dh_index = {k[1:4]: [] for k in self.dihedraNdx.keys()} | |
for dk, dndx in self.dihedraNdx.items(): | |
# build map between atomArray and dAtoms | |
dstep = dndx * 4 | |
did3 = dk[0:3] | |
did32 = dk[1:4] | |
d = self.dihedra[dk] | |
for i in range(4): | |
ndx = self.atomArrayIndex[dk[i]] | |
a2da_map[dstep + i] = ndx | |
a2d_map[ndx][0].append(dndx) | |
a2d_map[ndx][1].append(i) | |
try: | |
d.h1key = did3 | |
d.h2key = did32 | |
h1ndx = self.hedraNdx[d.h1key] | |
except KeyError: | |
d.h1key = dk[2::-1] | |
d.h2key = dk[3:0:-1] | |
h1ndx = self.hedraNdx[d.h1key] | |
self.dRev[dndx] = True | |
d.reverse = True | |
h2ndx = self.hedraNdx[d.h2key] | |
d.hedron1 = self.hedra[d.h1key] | |
d.hedron2 = self.hedra[d.h2key] | |
self.dH1ndx[dndx] = h1ndx | |
self.dH2ndx[dndx] = h2ndx | |
self.h1d_map[h1ndx].append(dndx) | |
d.ndx = dndx | |
d.cst = self.dCoordSpace[0][dndx] | |
d.rcst = self.dCoordSpace[1][dndx] | |
self.id3_dh_index[did3].append(dk) | |
self.id32_dh_index[did32].append(dk) | |
self.a2da_map = np.array(tuple(a2da_map.values())) | |
self.d2a_map = self.a2da_map.reshape(-1, 4) | |
self.dFwd = self.dRev != True # noqa: E712 | |
# manually create np.where(atom in dihedral) | |
self.a2d_map = [(np.array(xi[0]), np.array(xi[1])) for xi in a2d_map] | |
self.dAtoms_needs_update = np.full(self.dihedraLen, True) | |
def _hedraDict2chain( | |
self, | |
hl12: Dict[str, float], | |
ha: Dict[str, float], | |
hl23: Dict[str, float], | |
da: Dict[str, float], | |
bfacs: Dict[str, float], | |
) -> None: | |
"""Generate chain numpy arrays from :func:`.read_PIC` dicts. | |
On entry: | |
* chain internal_coord has ordered_aa_ic_list built, akset; | |
* residues have rnext, rprev, ak_set and di/hedra dicts initialised | |
* Chain, residues do NOT have NCaC info, id3_dh_index | |
* Di/hedra have cic, atomkeys set | |
* Dihedra do NOT have valid reverse flag, h1/2 info | |
""" | |
for ric in self.ordered_aa_ic_list: | |
# log chain starts - beginning and after breaks | |
# chain starts are only atom coords in pic files | |
# assume valid pic files with all 3 of N, Ca, C coords | |
initNCaC = [] | |
for atm in ric.residue.get_atoms(): # n.b. only few PIC spec atoms | |
if 2 == atm.is_disordered(): | |
if IC_Residue.no_altloc: | |
initNCaC.append(AtomKey(ric, atm.selected_child)) | |
else: | |
for altAtom in atm.child_dict.values(): | |
if altAtom.coord is not None: | |
initNCaC.append(AtomKey(ric, altAtom)) | |
elif atm.coord is not None: | |
initNCaC.append(AtomKey(ric, atm)) | |
if initNCaC != []: | |
self.initNCaCs.append(tuple(initNCaC)) | |
# next residue NCaCKeys so can do per-residue assemble() | |
ric.NCaCKey = [] | |
ric.NCaCKey.extend( | |
ric.split_akl( | |
(AtomKey(ric, "N"), AtomKey(ric, "CA"), AtomKey(ric, "C")) | |
) | |
) | |
ric._link_dihedra() | |
# if STILL have no self.initNCacs, assume pic file w/o atoms and grab | |
# from first residue | |
if self.initNCaCs == []: | |
ric = self.ordered_aa_ic_list[0] | |
iNCaC = ric.split_akl( | |
(AtomKey(ric, "N"), AtomKey(ric, "CA"), AtomKey(ric, "C")) | |
) | |
self.initNCaCs.extend(iNCaC) | |
# set any supplied coordinates from biopython atoms | |
# just loaded pic file so only start/chain break residues | |
# will have atoms | |
self.build_atomArray() | |
self.initNCaCs = sorted(self.initNCaCs) | |
# now create all biopython atoms for parent chain, setting coords to be | |
# view on atomArray entry | |
spNdx, icNdx, resnNdx, atmNdx, altlocNdx, occNdx = AtomKey.fields | |
sn = None | |
for ak, ndx in self.atomArrayIndex.items(): | |
res = ak.ric.residue # read_PIC inits with IC_Residue | |
atm, altloc = ak.akl[atmNdx], ak.akl[altlocNdx] | |
occ = 1.00 if ak.akl[occNdx] is None else float(ak.akl[occNdx]) | |
bfac = bfacs.get(ak.id, 0.0) | |
sn = sn + 1 if sn is not None else ndx + 1 | |
bpAtm = None | |
if res.has_id(atm): | |
bpAtm = res[atm] | |
if bpAtm is None or ( | |
2 == bpAtm.is_disordered() and not bpAtm.disordered_has_id(altloc) | |
): | |
newAtom = Atom( | |
atm, | |
self.atomArray[ndx][0:3], # init as view on atomArray | |
bfac, | |
occ, | |
(" " if altloc is None else altloc), | |
atm, | |
sn, | |
atm[0], | |
) | |
if bpAtm is None: | |
if altloc is None: | |
res.add(newAtom) | |
else: | |
disordered_atom = DisorderedAtom(atm) | |
res.add(disordered_atom) | |
disordered_atom.disordered_add(newAtom) | |
res.flag_disordered() | |
else: | |
bpAtm.disordered_add(newAtom) | |
else: | |
if 2 == bpAtm.is_disordered() and bpAtm.disordered_has_id(altloc): | |
bpAtm.disordered_select(altloc) | |
bpAtm.set_bfactor(bfac) | |
bpAtm.set_occupancy(occ) | |
sn = bpAtm.get_serial_number() | |
# hedra | |
# dicts sorted on creation by init_edra and maintained by write_PIC | |
# python 3.7 minimum for Biopython as of 6 sept 2021 PR #3714 | |
self.hedraLen = len(ha) | |
self.hedraL12 = np.fromiter(hl12.values(), dtype=np.float64) | |
self.hedraAngle = np.fromiter(ha.values(), dtype=np.float64) | |
self.hedraL23 = np.fromiter(hl23.values(), dtype=np.float64) | |
self.hedraNdx = dict(zip(sorted(ha.keys()), range(self.hedraLen))) | |
# dihedra | |
self.dihedraLen = len(da) | |
self.dihedraAngle = np.fromiter(da.values(), dtype=np.float64) | |
self.dihedraAngleRads = np.deg2rad(self.dihedraAngle) | |
self.dihedraNdx = dict(zip(sorted(da.keys()), range(self.dihedraLen))) | |
self.build_edraArrays() | |
# @profile | |
def assemble_residues(self, verbose: bool = False) -> None: | |
"""Generate atom coords from internal coords (vectorised). | |
This is the 'Numpy parallel' version of :meth:`.assemble_residues_ser`. | |
Starting with dihedra already formed by :meth:`.init_atom_coords`, transform | |
each from dihedron local coordinate space into protein chain coordinate | |
space. Iterate until all dependencies satisfied. | |
Does not update :data:`dCoordSpace` as :meth:`assemble_residues_ser` | |
does. Call :meth:`.update_dCoordSpace` if needed. Faster to do in | |
single operation once all atom coordinates finished. | |
:param bool verbose: default False. | |
Report number of iterations to compute changed dihedra | |
generates: | |
self.dSet: AAsiz x dihedraLen x 4 | |
maps atoms in dihedra to atomArray | |
self.dSetValid : [dihedraLen][4] of bool | |
map of valid atoms into dihedra to detect 3 or 4 atoms valid | |
Output coordinates written to :data:`atomArray`. Biopython | |
:class:`Bio.PDB.Atom` coordinates are a view on this data. | |
""" | |
# dihedron atom positions of chain atom ndxs, maps atomArray to dihedra | |
a2da_map = self.a2da_map # 8468 x int | |
# each chain atom to list of [dihedron], [dihedron_position] | |
a2d_map = self.a2d_map # 2000 x ([int], [int]) | |
# every dihedron atom to chain atoms | |
d2a_map = self.d2a_map # 2117 x [4] ints | |
# all chain atoms | |
atomArray = self.atomArray # 2000 | |
# bool markers for chain atoms with valid coordinates | |
atomArrayValid = self.atomArrayValid # 2000 | |
# complete array of dihedra atoms | |
dAtoms = self.dAtoms # 2117 x [4][4] float | |
# coordinate space transformations optionally supplied | |
dCoordSpace1 = self.dCoordSpace[1] | |
dcsValid = self.dcsValid | |
# dSet is 4-atom arrays for every dihedral, multiple copies of | |
# many atoms as the dihedra overlap | |
self.dSet = atomArray[a2da_map].reshape(-1, 4, 4) | |
dSet = self.dSet | |
# dSetValid indicates accurate atom positions in each dSet dihedral | |
self.dSetValid = atomArrayValid[a2da_map].reshape(-1, 4) | |
dSetValid = self.dSetValid | |
# clear any transforms for dihedrals with outdated atoms | |
workSelector = (dSetValid == self._dihedraOK).all(axis=1) | |
self.dcsValid[np.logical_not(workSelector)] = False | |
dihedraWrk = None | |
if verbose: | |
dihedraWrk = workSelector.size - workSelector.sum() | |
# mask for dihedral with 3 valid atoms in dSet, ready to be processed: | |
targ = IC_Chain._dihedraSelect | |
# select the dihedrals ready for processing | |
workSelector = (dSetValid == targ).all(axis=1) | |
loopCount = 0 | |
while np.any(workSelector): | |
# indexes of dihedra in dset to update | |
workNdxs = np.where(workSelector) | |
# subset of dihedra to update | |
workSet = dSet[workSelector] | |
# will update coordinates of 4th atom in each workSet dihedron | |
updateMap = d2a_map[workNdxs, 3][0] | |
# get all coordSpace transforms | |
if np.all(dcsValid[workSelector]): | |
cspace = dCoordSpace1[workSelector] | |
else: | |
cspace = multi_coord_space(workSet, np.sum(workSelector), True)[1] | |
# generate new coords for 4th atoms in workSet dihedra | |
initCoords = dAtoms[workSelector].reshape(-1, 4, 4) | |
atomArray[updateMap] = np.einsum("ijk,ik->ij", cspace, initCoords[:, 3]) | |
# mark new computed atom positions as valid | |
atomArrayValid[updateMap] = True | |
# prep for next iteration | |
workSelector[:] = False | |
for a in updateMap: | |
# copy new atom positions into dihedra atom array | |
dSet[a2d_map[a]] = atomArray[a] | |
# build new workSelector from only updated dihedra | |
adlist = a2d_map[a] | |
for d in adlist[0]: | |
dvalid = atomArrayValid[d2a_map[d]] | |
workSelector[d] = (dvalid == targ).all() | |
loopCount += 1 | |
if verbose: | |
cid = self.chain.full_id | |
print( | |
f"{cid[0]} {cid[2]} coordinates for {dihedraWrk} dihedra" | |
f" updated in {loopCount} iterations" | |
) | |
def assemble_residues_ser( | |
self, | |
verbose: bool = False, | |
start: Optional[int] = None, | |
fin: Optional[int] = None, | |
) -> None: | |
"""Generate IC_Residue atom coords from internal coordinates (serial). | |
See :meth:`.assemble_residues` for 'numpy parallel' version. | |
Filter positions between start and fin if set, find appropriate start | |
coordinates for each residue and pass to :meth:`.assemble` | |
:param bool verbose: default False. | |
Describe runtime problems | |
:param int start,fin: default None. | |
Sequence position for begin, end of subregion to generate coords | |
for. | |
""" | |
self.dcsValid[:] = False | |
for ric in self.ordered_aa_ic_list: | |
# : # clear and skip if outside start ... fin | |
if (fin and fin < ric.residue.id[1]) or ( | |
start and start > ric.residue.id[1] | |
): | |
ric.ak_set = None | |
ric.akc = None | |
ric.residue.child_dict = {} | |
ric.residue.child_list = [] | |
continue | |
atom_coords = ric.assemble(verbose=verbose) | |
if atom_coords: | |
ric.ak_set = set(atom_coords.keys()) | |
def init_edra(self, verbose: bool = False) -> None: | |
"""Create chain and residue di/hedra structures, arrays, atomArray. | |
Inputs: | |
self.ordered_aa_ic_list : list of IC_Residue | |
Generates: | |
* edra objects, self.di/hedra (executes :meth:`._create_edra`) | |
* atomArray and support (executes :meth:`.build_atomArray`) | |
* self.hedraLen : number of hedra in structure | |
* hedraL12 : numpy arrays for lengths, angles (empty) | |
* hedraAngle .. | |
* hedraL23 .. | |
* self.hedraNdx : dict mapping hedrakeys to hedraL12 etc | |
* self.dihedraLen : number of dihedra in structure | |
* dihedraAngle .. | |
* dihedraAngleRads : np arrays for angles (empty) | |
* self.dihedraNdx : dict mapping dihedrakeys to dihedraAngle | |
""" | |
if self.ordered_aa_ic_list[0].hedra == {}: | |
for ric in self.ordered_aa_ic_list: | |
# build di/hedra objects in chain arrays | |
ric._create_edra(verbose=verbose) | |
if not hasattr(self, "atomArrayValid"): | |
self.build_atomArray() # ric.a2ic added gly CBs to akset | |
if not hasattr(self, "hedraLen"): | |
# hedra | |
self.hedraLen = len(self.hedra) | |
self.hedraL12 = np.empty((self.hedraLen), dtype=np.float64) | |
self.hedraAngle = np.empty((self.hedraLen), dtype=np.float64) | |
self.hedraL23 = np.empty((self.hedraLen), dtype=np.float64) | |
# python3.7 sorted dicts | |
self.hedraNdx = dict(zip(sorted(self.hedra.keys()), range(len(self.hedra)))) | |
# dihedra | |
self.dihedraLen = len(self.dihedra) | |
self.dihedraAngle = np.empty(self.dihedraLen) | |
self.dihedraAngleRads = np.empty(self.dihedraLen) | |
self.dihedraNdx = dict( | |
zip(sorted(self.dihedra.keys()), range(self.dihedraLen)) | |
) | |
if not hasattr(self, "hAtoms_needs_update"): | |
self.build_edraArrays() | |
# @profile | |
def init_atom_coords(self) -> None: | |
"""Set chain level di/hedra initial coords from angles and distances. | |
Initializes atom coordinates in local coordinate space for hedra and | |
dihedra, will be transformed appropriately later by :data:`dCoordSpace` | |
matrices for assembly. | |
""" | |
# dbg = True | |
if not np.all(self.dAtoms_needs_update): | |
self.dAtoms_needs_update |= (self.hAtoms_needs_update[self.dH1ndx]) | ( | |
self.hAtoms_needs_update[self.dH2ndx] | |
) | |
self.dcsValid &= np.logical_not(self.dAtoms_needs_update) | |
# dihedra full size masks: | |
mdFwd = self.dFwd & self.dAtoms_needs_update | |
mdRev = self.dRev & self.dAtoms_needs_update | |
# update size masks | |
udFwd = self.dFwd[self.dAtoms_needs_update] | |
udRev = self.dRev[self.dAtoms_needs_update] | |
""" | |
if dbg: | |
print("mdFwd", mdFwd[0:10]) | |
print("mdRev", mdRev[0:10]) | |
print("udFwd", udFwd[0:10]) | |
print("udRev", udRev[0:10]) | |
""" | |
if np.any(self.hAtoms_needs_update): | |
# hedra inital coords | |
# sar = supplementary angle radian: angles which add to 180 | |
sar = np.deg2rad(180.0 - self.hedraAngle[self.hAtoms_needs_update]) # angle | |
sinSar = np.sin(sar) | |
cosSarN = np.cos(sar) * -1 | |
""" | |
if dbg: | |
print("sar", sar[0:10]) | |
""" | |
# a2 is len3 up from a2 on Z axis, X=Y=0 | |
self.hAtoms[:, 2, 2][self.hAtoms_needs_update] = self.hedraL23[ | |
self.hAtoms_needs_update | |
] | |
# a0 X is sin( sar ) * len12 | |
self.hAtoms[:, 0, 0][self.hAtoms_needs_update] = ( | |
sinSar * self.hedraL12[self.hAtoms_needs_update] | |
) | |
# a0 Z is -(cos( sar ) * len12) | |
# (assume angle always obtuse, so a0 is in -Z) | |
self.hAtoms[:, 0, 2][self.hAtoms_needs_update] = ( | |
cosSarN * self.hedraL12[self.hAtoms_needs_update] | |
) | |
""" | |
if dbg: | |
print("hAtoms_needs_update", self.hAtoms_needs_update[0:10]) | |
print("self.hAtoms", self.hAtoms[0:10]) | |
""" | |
# same again but 'reversed' : a0 on Z axis, a1 at origin, a2 in -Z | |
# a0r is len12 up from a1 on Z axis, X=Y=0 | |
self.hAtomsR[:, 0, 2][self.hAtoms_needs_update] = self.hedraL12[ | |
self.hAtoms_needs_update | |
] | |
# a2r X is sin( sar ) * len23 | |
self.hAtomsR[:, 2, 0][self.hAtoms_needs_update] = ( | |
sinSar * self.hedraL23[self.hAtoms_needs_update] | |
) | |
# a2r Z is -(cos( sar ) * len23) | |
self.hAtomsR[:, 2, 2][self.hAtoms_needs_update] = ( | |
cosSarN * self.hedraL23[self.hAtoms_needs_update] | |
) | |
""" | |
if dbg: | |
print("self.hAtomsR", self.hAtomsR[0:10]) | |
""" | |
self.hAtoms_needs_update[...] = False | |
# dihedra parts other than dihedral angle | |
dhlen = np.sum(self.dAtoms_needs_update) # self.dihedraLen | |
# only 4th atom takes work: | |
# pick 4th atom based on rev flag | |
self.a4_pre_rotation[mdRev] = self.hAtoms[self.dH2ndx, 0][mdRev] | |
self.a4_pre_rotation[mdFwd] = self.hAtomsR[self.dH2ndx, 2][mdFwd] | |
# numpy multiply, add operations below intermediate array but out= | |
# not working with masking: | |
self.a4_pre_rotation[:, 2][self.dAtoms_needs_update] = np.multiply( | |
self.a4_pre_rotation[:, 2][self.dAtoms_needs_update], -1 | |
) # a4 to +Z | |
a4shift = np.empty(dhlen) | |
a4shift[udRev] = self.hedraL23[self.dH2ndx][mdRev] # len23 | |
a4shift[udFwd] = self.hedraL12[self.dH2ndx][mdFwd] # len12 | |
self.a4_pre_rotation[:, 2][self.dAtoms_needs_update] = np.add( | |
self.a4_pre_rotation[:, 2][self.dAtoms_needs_update], | |
a4shift, | |
) # so a2 at origin | |
""" | |
if dbg: | |
print("dhlen", dhlen) | |
print("a4shift", a4shift[0:10]) | |
print("a4_pre_rotation", self.a4_pre_rotation[0:10]) | |
""" | |
# now build dihedra initial coords | |
dH1atoms = self.hAtoms[self.dH1ndx] # fancy indexing so | |
dH1atomsR = self.hAtomsR[self.dH1ndx] # these copy not view | |
self.dAtoms[:, :3][mdFwd] = dH1atoms[mdFwd] | |
self.dAtoms[:, :3][mdRev] = dH1atomsR[:, 2::-1][mdRev] | |
""" | |
if dbg: | |
print("dH1atoms", dH1atoms[0:10]) | |
print("dH1atosR", dH1atomsR[0:10]) | |
print("dAtoms", self.dAtoms[0:10]) | |
""" | |
# build rz rotation matrix for dihedral angle | |
""" | |
if dbg: | |
print("dangle-rads", self.dihedraAngleRads[0:10]) | |
""" | |
rz = multi_rot_Z(self.dihedraAngleRads[self.dAtoms_needs_update]) | |
a4rot = np.matmul( | |
rz, | |
self.a4_pre_rotation[self.dAtoms_needs_update][:].reshape(-1, 4, 1), | |
).reshape(-1, 4) | |
self.dAtoms[:, 3][mdFwd] = a4rot[udFwd] # [self.dFwd] | |
self.dAtoms[:, 3][mdRev] = a4rot[udRev] # [self.dRev] | |
""" | |
if dbg: | |
print("rz", rz[0:3]) | |
print("dAtoms", self.dAtoms[0:10]) | |
""" | |
self.dAtoms_needs_update[...] = False | |
# can't start assembly if initial NCaC is not valid, so copy from | |
# hAtoms if needed | |
""" | |
if dbg: | |
print("initNCaCs", self.initNCaCs) | |
""" | |
for iNCaC in self.initNCaCs: | |
invalid = True | |
if np.all(self.atomArrayValid[[self.atomArrayIndex[ak] for ak in iNCaC]]): | |
invalid = False | |
if invalid: | |
hatoms = self.hAtoms[self.hedraNdx[iNCaC]] | |
for i in range(3): | |
andx = self.atomArrayIndex[iNCaC[i]] | |
self.atomArray[andx] = hatoms[i] | |
self.atomArrayValid[andx] = True | |
""" | |
if dbg: | |
hatoms = self.hAtoms[self.hedraNdx[iNCaC]] | |
print("hedraNdx iNCaC", self.hedraNdx[iNCaC]) | |
print("hatoms", hatoms) | |
""" | |
def update_dCoordSpace(self, workSelector: Optional[np.ndarray] = None) -> None: | |
"""Compute/update coordinate space transforms for chain dihedra. | |
Requires all atoms updated so calls :meth:`.assemble_residues` | |
(returns immediately if all atoms already assembled). | |
:param [bool] workSelector: | |
Optional mask to select dihedra for update | |
""" | |
if workSelector is None: | |
self.assemble_residues() # update atoms, fast if nothing to do | |
workSelector = np.logical_not(self.dcsValid) | |
workSet = self.dSet[workSelector] | |
self.dCoordSpace[:, workSelector] = multi_coord_space( | |
workSet, np.sum(workSelector), True | |
) | |
self.dcsValid[workSelector] = True | |
def propagate_changes(self) -> None: | |
"""Track through di/hedra to invalidate dependent atoms.""" | |
# cs : chain segment | |
# csStart, csNext : AtomArray indexes for chain segment | |
# process each chain segment | |
csNdx = 0 | |
csLen = len(self.initNCaCs) | |
atmNdx = AtomKey.fields.atm | |
posNdx = AtomKey.fields.respos | |
done = set() | |
while csNdx < csLen: # iterate over chain starts | |
startAK = self.initNCaCs[csNdx][0] | |
csStart = self.atomArrayIndex[startAK] | |
csnTry = csNdx + 1 | |
# set csNext to be atomArray index of segment end | |
if csLen == csnTry: | |
csNext = self.AAsiz # last segment to end of atomArray | |
else: # this segment to next chain start | |
finAK = self.initNCaCs[csnTry][0] | |
csNext = self.atomArrayIndex[finAK] | |
for andx in range(csStart, csNext): | |
if not self.atomArrayValid[andx]: | |
ak = self.aktuple[andx] | |
atm = ak.akl[atmNdx] | |
pos = ak.akl[posNdx] # sequence position = residue number | |
if atm in ("N", "CA", "C"): | |
# backbone moved so all to next start moved | |
self.atomArrayValid[andx:csNext] = False | |
# and done with this invalid_atom_ndxs segment | |
break | |
elif pos not in done and atm != "H": | |
# H is terminal so ignore, not effect subsequent atoms | |
# O is terminal but used to locate CB | |
# atomArray is sorted, sidechain atoms follow backbone | |
for i in range(andx, csNext): | |
if self.aktuple[i].akl[posNdx] == pos: | |
self.atomArrayValid[i] = False | |
else: | |
# done with residue sidechain when find next | |
# seq pos so need not go to fin | |
break | |
done.add(pos) | |
csNdx += 1 | |
# @profile | |
def internal_to_atom_coordinates( | |
self, | |
verbose: bool = False, | |
start: Optional[int] = None, | |
fin: Optional[int] = None, | |
) -> None: | |
"""Process IC data to Residue/Atom coords. | |
:param bool verbose: default False. | |
Describe runtime problems | |
:param int start,fin: | |
Optional sequence positions for begin, end of subregion | |
to process. | |
.. note:: | |
Setting start or fin activates serial :meth:`.assemble_residues_ser` | |
instead of (Numpy parallel) :meth:`.assemble_residues`. | |
Start C-alpha will be at origin. | |
.. seealso:: | |
:data:`ParallelAssembleResidues` | |
""" | |
if not hasattr(self, "dAtoms_needs_update"): | |
return # escape on no data to process | |
# if verbose: | |
# for ric in self.ordered_aa_ic_list: | |
# if not hasattr(ric, "NCaCKey"): | |
# print( | |
# f"no assembly for {ric} due to missing N, Ca" | |
# " and/or C atoms" | |
# ) | |
if IC_Chain.ParallelAssembleResidues and not (start or fin): | |
self.propagate_changes() | |
self.init_atom_coords() # compute initial di/hedra coords | |
# transform init di/hedra to chain coord space | |
self.assemble_residues(verbose=verbose) | |
if verbose and not np.all(self.atomArrayValid): | |
dSetValid = self.atomArrayValid[self.a2da_map].reshape(-1, 4) | |
for ric in self.ordered_aa_ic_list: | |
for d in ric.dihedra.values(): | |
if not dSetValid[d.ndx].all(): | |
print( | |
"missing coordinates for chain " | |
f"{ric.cic.chain.id} {ric.pretty_str()} " | |
f"dihedral: {d.id}" | |
) | |
else: | |
if start: # set initNCaC tag to build from | |
for ric in self.ordered_aa_ic_list: | |
if start != ric.residue.id[1]: | |
continue | |
iNCaC = ric.split_akl( | |
( | |
AtomKey(ric, "N"), | |
AtomKey(ric, "CA"), | |
AtomKey(ric, "C"), | |
) | |
) | |
self.initNCaCs.extend(iNCaC) | |
self.init_atom_coords() # compute initial di/hedra coords | |
self.assemble_residues_ser( | |
verbose=verbose, start=start, fin=fin | |
) # internal to XYZ coordinates | |
# @profile | |
def atom_to_internal_coordinates(self, verbose: bool = False) -> None: | |
"""Calculate dihedrals, angles, bond lengths for Atom data. | |
Generates atomArray (through init_edra), value arrays for hedra and | |
dihedra, and coordinate space transforms for dihedra. | |
Generates Gly C-beta if specified, see :data:`IC_Residue.gly_Cbeta` | |
:param bool verbose: default False. | |
describe runtime problems | |
""" | |
if self.ordered_aa_ic_list == []: | |
return # escape on no data to process | |
self.init_edra(verbose=verbose) | |
if self.dihedra == {}: | |
return # escape if no hedra loaded for this chain | |
# compute all hedra parameters with law of cosines on 3 atom coords | |
ha = self.atomArray[self.a2ha_map].reshape(-1, 3, 4) | |
self.hedraL12 = np.linalg.norm(ha[:, 0] - ha[:, 1], axis=1) | |
self.hedraL23 = np.linalg.norm(ha[:, 1] - ha[:, 2], axis=1) | |
h_a0a2 = np.linalg.norm(ha[:, 0] - ha[:, 2], axis=1) | |
np.rad2deg( | |
np.arccos( | |
( | |
np.square(self.hedraL12) | |
+ np.square(self.hedraL23) | |
- np.square(h_a0a2) | |
) | |
/ (2 * self.hedraL12 * self.hedraL23) | |
), | |
out=self.hedraAngle, | |
) | |
# now process dihedra | |
dha = self.atomArray[self.a2da_map].reshape(-1, 4, 4) | |
# develop coord_space matrix for 1st 3 atoms of dihedra: | |
# note use of [...] to modify in place, dihedra cst, rcst remain valid | |
self.dCoordSpace[...] = multi_coord_space(dha, self.dihedraLen, True) | |
self.dcsValid[:] = True | |
# now put atom 4 into that coordinate space | |
do4 = np.matmul(self.dCoordSpace[0], dha[:, 3].reshape(-1, 4, 1)).reshape(-1, 4) | |
# and read dihedral as azimuth | |
np.arctan2(do4[:, 1], do4[:, 0], out=self.dihedraAngleRads) | |
np.rad2deg(self.dihedraAngleRads, out=self.dihedraAngle) | |
if hasattr(self, "gcb"): | |
self._spec_glyCB() | |
def _spec_glyCB(self) -> None: | |
"""Populate values for Gly C-beta.""" | |
Ca_Cb_Len = 1.53363 | |
if hasattr(self, "scale"): # used for openscad output | |
Ca_Cb_Len *= self.scale # type: ignore | |
for gcbd in self.gcb.values(): # gcb dict created by _create_edra | |
cbak = gcbd[3] | |
self.atomArrayValid[self.atomArrayIndex[cbak]] = False | |
ric = cbak.ric | |
rN, rCA, rC, rO = ( | |
ric.rak("N"), | |
ric.rak("CA"), | |
ric.rak("C"), | |
ric.rak("O"), | |
) | |
gCBd = self.dihedra[gcbd] | |
dndx = gCBd.ndx | |
# generated dihedron is O-Ca-C-Cb | |
# hedron2 is reversed: Cb-Ca-C (also h1 reversed: C-Ca-O) | |
h2ndx = gCBd.hedron2.ndx | |
self.hedraL12[h2ndx] = Ca_Cb_Len | |
self.hedraAngle[h2ndx] = 110.17513 | |
self.hedraL23[h2ndx] = self.hedraL12[self.hedraNdx[(rCA, rC, rO)]] | |
self.hAtoms_needs_update[gCBd.hedron2.ndx] = True | |
for ak in gCBd.hedron2.atomkeys: | |
self.atomArrayValid[self.atomArrayIndex[ak]] = False | |
refval = self.dihedra.get((rN, rCA, rC, rO), None) | |
if refval: | |
angl = 122.68219 + self.dihedraAngle[refval.ndx] | |
self.dihedraAngle[dndx] = angl if (angl <= 180.0) else angl - 360.0 | |
else: | |
self.dihedraAngle[dndx] = 120 | |
def _write_mtx(fp: TextIO, mtx: np.array) -> None: | |
fp.write("[ ") | |
rowsStarted = False | |
for row in mtx: | |
if rowsStarted: | |
fp.write(", [ ") | |
else: | |
fp.write("[ ") | |
rowsStarted = True | |
colsStarted = False | |
for col in row: | |
if colsStarted: | |
fp.write(", " + str(col)) | |
else: | |
fp.write(str(col)) | |
colsStarted = True | |
fp.write(" ]") # close row | |
fp.write(" ]") | |
def _writeSCAD_dihed( | |
fp: TextIO, d: "Dihedron", hedraNdx: Dict, hedraSet: Set[EKT] | |
) -> None: | |
fp.write( | |
"[ {:9.5f}, {}, {}, {}, ".format( | |
d.angle, | |
hedraNdx[d.h1key], | |
hedraNdx[d.h2key], | |
(1 if d.reverse else 0), | |
) | |
) | |
fp.write( | |
f"{0 if d.h1key in hedraSet else 1}, " | |
f"{0 if d.h2key in hedraSet else 1}, " | |
) | |
fp.write( | |
" // {} [ {} -- {} ] {}\n".format( | |
d.id, | |
d.hedron1.id, | |
d.hedron2.id, | |
("reversed" if d.reverse else ""), | |
) | |
) | |
fp.write(" ") | |
IC_Chain._write_mtx(fp, d.rcst) | |
fp.write(" ]") # close residue array of dihedra entry | |
def _write_SCAD(self, fp: TextIO, backboneOnly: bool, start=None, fin=None) -> None: | |
"""Write self to file fp as OpenSCAD data matrices. | |
See `OpenSCAD <https://www.openscad.org>`_. | |
Works with :func:`.write_SCAD` and embedded OpenSCAD routines therein. | |
""" | |
fp.write(f' "{self.chain.id}", // chain id\n') | |
# generate dict for all hedra to eliminate redundant references | |
hedra = {} | |
for ric in self.ordered_aa_ic_list: | |
respos = ric.residue.id[1] | |
if start is not None and respos < start - 1: | |
# start-1 because rprev has some hedra for residue r | |
continue | |
if fin is not None and respos > fin: | |
continue | |
for k, h in ric.hedra.items(): | |
hedra[k] = h | |
atomSet: Set[AtomKey] = set() | |
bondDict: Dict = {} # set() | |
hedraSet: Set[EKT] = set() | |
ndx = 0 | |
hedraNdx = {} | |
for hk in sorted(hedra): | |
hedraNdx[hk] = ndx | |
ndx += 1 | |
# write residue dihedra table | |
fp.write(" [ // residue array of dihedra") | |
resNdx = {} | |
dihedraNdx = {} | |
ndx = 0 | |
chnStarted = False | |
for ric in self.ordered_aa_ic_list: | |
respos = ric.residue.id[1] | |
if start is not None and respos < start: | |
continue | |
if fin is not None and respos > fin: | |
continue | |
if "O" not in ric.akc: | |
if ric.lc != "G" and ric.lc != "A": | |
print( | |
"Unable to generate complete sidechain for " | |
f"{ric} {ric.lc} missing O atom" | |
) | |
resNdx[ric] = ndx | |
if chnStarted: | |
fp.write("\n ],") | |
else: | |
chnStarted = True | |
fp.write( | |
"\n [ // " | |
+ str(ndx) | |
+ " : " | |
+ str(ric.residue.id) | |
+ " " | |
+ ric.lc | |
+ " backbone\n" | |
) | |
ndx += 1 | |
# assemble with no start position, return transform matrices | |
ric.clear_transforms() | |
# compute residue atom coords for no start position | |
# dump results because only want rcst | |
# IC_Chain.adbg = True | |
ric.assemble(resetLocation=True) | |
# IC_Chain.adbg = False | |
ndx2 = 0 | |
started = False | |
for i in range(1 if backboneOnly else 2): | |
if i == 1: | |
cma = "," if started else "" | |
fp.write( | |
f"{cma}\n // {str(ric.residue.id)} {ric.lc}" | |
" sidechain\n" | |
) | |
started = False | |
for dk, d in sorted(ric.dihedra.items()): | |
if d.h2key in hedraNdx and ( | |
(i == 0 and d.is_backbone()) or (i == 1 and not d.is_backbone()) | |
): | |
if d.cic.dcsValid[d.ndx]: | |
if started: | |
fp.write(",\n") | |
else: | |
started = True | |
fp.write(" ") | |
IC_Chain._writeSCAD_dihed(fp, d, hedraNdx, hedraSet) | |
dihedraNdx[dk] = ndx2 | |
hedraSet.add(d.h1key) | |
hedraSet.add(d.h2key) | |
ndx2 += 1 | |
else: | |
print( | |
f"Atom missing for {d.id3}-{d.id32}, OpenSCAD" | |
f" chain may be discontiguous" | |
) | |
fp.write(" ],") # end of residue entry dihedra table | |
fp.write("\n ],\n") # end of all dihedra table | |
# write hedra table | |
fp.write(" [ //hedra\n") | |
for hk in sorted(hedra): | |
hed = hedra[hk] | |
fp.write(" [ ") | |
fp.write( | |
"{:9.5f}, {:9.5f}, {:9.5f}".format( | |
set_accuracy_95(hed.len12), | |
set_accuracy_95(hed.angle), | |
set_accuracy_95(hed.len23), | |
) | |
) | |
atom_str = "" # atom and bond state | |
atom_done_str = "" # create each only once | |
akndx = 0 | |
for ak in hed.atomkeys: | |
atm = ak.akl[AtomKey.fields.atm] | |
res = ak.akl[AtomKey.fields.resname] | |
# try first for generic backbone/Cbeta atoms | |
ab_state_res = residue_atom_bond_state["X"] | |
ab_state = ab_state_res.get(atm, None) | |
if "H" == atm[0]: | |
ab_state = "Hsb" | |
if ab_state is None: | |
# not found above, must be sidechain atom | |
ab_state_res = residue_atom_bond_state.get(res, None) | |
if ab_state_res is not None: | |
ab_state = ab_state_res.get(atm, "") | |
else: | |
ab_state = "" | |
atom_str += ', "' + ab_state + '"' | |
if ak in atomSet: | |
atom_done_str += ", 0" | |
elif hk in hedraSet: | |
if ( | |
hasattr(hed, "flex_female_1") or hasattr(hed, "flex_male_1") | |
) and akndx != 2: | |
if akndx == 0: | |
atom_done_str += ", 0" | |
elif akndx == 1: | |
atom_done_str += ", 1" | |
atomSet.add(ak) | |
elif ( | |
hasattr(hed, "flex_female_2") or hasattr(hed, "flex_male_2") | |
) and akndx != 0: | |
if akndx == 2: | |
atom_done_str += ", 0" | |
elif akndx == 1: | |
atom_done_str += ", 1" | |
atomSet.add(ak) | |
else: | |
atom_done_str += ", 1" | |
atomSet.add(ak) | |
else: | |
atom_done_str += ", 0" | |
akndx += 1 | |
fp.write(atom_str) | |
fp.write(atom_done_str) | |
# specify bond options | |
bond = [] | |
bond.append(hed.atomkeys[0].id + "-" + hed.atomkeys[1].id) | |
bond.append(hed.atomkeys[1].id + "-" + hed.atomkeys[2].id) | |
b0 = True | |
for b in bond: | |
wstr = "" | |
if b in bondDict and bondDict[b] == "StdBond": | |
wstr = ", 0" | |
elif hk in hedraSet: | |
bondType = "StdBond" | |
if b0: | |
if hasattr(hed, "flex_female_1"): | |
bondType = "FemaleJoinBond" | |
elif hasattr(hed, "flex_male_1"): | |
bondType = "MaleJoinBond" | |
elif hasattr(hed, "skinny_1"): | |
bondType = "SkinnyBond" | |
elif hasattr(hed, "hbond_1"): | |
bondType = "HBond" | |
else: | |
if hasattr(hed, "flex_female_2"): | |
bondType = "FemaleJoinBond" | |
elif hasattr(hed, "flex_male_2"): | |
bondType = "MaleJoinBond" | |
# elif hasattr(hed, 'skinny_2'): # unused | |
# bondType = 'SkinnyBond' | |
elif hasattr(hed, "hbond_2"): | |
bondType = "HBond" | |
if b in bondDict: | |
bondDict[b] = "StdBond" | |
else: | |
bondDict[b] = bondType | |
wstr = ", " + str(bondType) | |
else: | |
wstr = ", 0" | |
fp.write(wstr) | |
b0 = False | |
akl = hed.atomkeys[0].akl | |
fp.write( | |
', "' | |
+ akl[AtomKey.fields.resname] | |
+ '", ' | |
+ akl[AtomKey.fields.respos] | |
+ ', "' | |
+ hed.e_class | |
+ '"' | |
) | |
fp.write(" ], // " + str(hk) + "\n") | |
fp.write(" ],\n") # end of hedra table | |
# write chain table | |
self.atomArrayValid[:] = False | |
self.internal_to_atom_coordinates() | |
fp.write("\n[ // chain - world transform for each residue\n") | |
chnStarted = False | |
for ric in self.ordered_aa_ic_list: | |
# rtm handle start / end | |
respos = ric.residue.id[1] | |
if start is not None and respos < start: | |
continue | |
if fin is not None and respos > fin: | |
continue | |
for k, h in ric.hedra.items(): | |
hedra[k] = h | |
for NCaCKey in sorted(ric.NCaCKey): # type: ignore | |
mtr = None | |
if 0 < len(ric.rprev): | |
acl = [self.atomArray[self.atomArrayIndex[ak]] for ak in NCaCKey] | |
mt, mtr = coord_space(acl[0], acl[1], acl[2], True) | |
else: | |
mtr = np.identity(4, dtype=np.float64) | |
if chnStarted: | |
fp.write(",\n") | |
else: | |
chnStarted = True | |
fp.write(" [ " + str(resNdx[ric]) + ', "' + str(ric.residue.id[1])) | |
fp.write(ric.lc + '", //' + str(NCaCKey) + "\n") | |
IC_Chain._write_mtx(fp, mtr) | |
fp.write(" ]") | |
fp.write("\n ]\n") | |
def distance_plot( | |
self, filter: Optional[Union[np.ndarray, None]] = None | |
) -> np.ndarray: | |
"""Generate 2D distance plot from atomArray. | |
Default is to calculate distances for all atoms. To generate the | |
classic C-alpha distance plot, pass a boolean mask array like:: | |
atmNameNdx = internal_coords.AtomKey.fields.atm | |
CaSelect = [ | |
atomArrayIndex.get(k) | |
for k in atomArrayIndex.keys() | |
if k.akl[atmNameNdx] == "CA" | |
] | |
plot = cic.distance_plot(CaSelect) | |
Alternatively, this will select all backbone atoms:: | |
backboneSelect = [ | |
atomArrayIndex.get(k) | |
for k in atomArrayIndex.keys() | |
if k.is_backbone() | |
] | |
:param [bool] filter: restrict atoms for calculation | |
.. seealso:: | |
:meth:`.distance_to_internal_coordinates`, which requires the | |
default all atom distance plot. | |
""" | |
if filter is None: | |
atomSet = self.atomArray | |
else: | |
atomSet = self.atomArray[filter] | |
# create distance matrix without scipy | |
# see https://jbencook.com/pairwise-distance-in-numpy/ | |
return np.linalg.norm(atomSet[:, None, :] - atomSet[None, :, :], axis=-1) | |
def dihedral_signs(self) -> np.ndarray: | |
"""Get sign array (+1/-1) for each element of chain dihedraAngle array. | |
Required for :meth:`.distance_to_internal_coordinates` | |
""" | |
return np.sign(self.dihedraAngle) | |
def distplot_to_dh_arrays( | |
self, distplot: np.ndarray, dihedra_signs: np.ndarray | |
) -> None: | |
"""Load di/hedra distance arays from distplot. | |
Fill :class:`IC_Chain` arrays hedraL12, L23, L13 and dihedraL14 | |
distance value arrays from input distplot, dihedra_signs array from | |
input dihedra_signs. Distplot and di/hedra distance arrays must index | |
according to AtomKey mappings in :class:`IC_Chain` .hedraNdx and .dihedraNdx | |
(created in :meth:`IC_Chain.init_edra`) | |
Call :meth:`atom_to_internal_coordinates` (or at least :meth:`init_edra`) | |
to generate a2ha_map and d2a_map before running this. | |
Explcitly removed from :meth:`.distance_to_internal_coordinates` so | |
user may populate these chain di/hedra arrays by other | |
methods. | |
""" | |
ha = self.a2ha_map.reshape(-1, 3) | |
self.hedraL12 = distplot[ha[:, 0], ha[:, 1]] | |
self.hedraL23 = distplot[ha[:, 1], ha[:, 2]] | |
self.hedraL13 = distplot[ha[:, 0], ha[:, 2]] | |
da = self.d2a_map | |
self.dihedraL14 = distplot[da[:, 0], da[:, 3]] | |
self.dihedra_signs = dihedra_signs | |
def distance_to_internal_coordinates( | |
self, resetAtoms: Optional[Union[bool, None]] = True | |
) -> None: | |
"""Compute chain di/hedra from from distance and chirality data. | |
Distance properties on hedra L12, L23, L13 and dihedra L14 configured | |
by :meth:`.distplot_to_dh_arrays` or alternative loader. | |
dihedraAngles result is multiplied by dihedra_signs at final step | |
recover chirality information lost in distance plot (mirror image of | |
structure has same distances but opposite sign dihedral angles). | |
Note that chain breaks will cause errors in rebuilt structure, use | |
:meth:`.copy_initNCaCs` to avoid this | |
Based on Blue, the Hedronometer's answer to `The dihedral angles of a tetrahedron | |
in terms of its edge lengths <https://math.stackexchange.com/a/49340/972353>`_ | |
on `math.stackexchange.com <https://math.stackexchange.com/>`_. See also: | |
`"Heron-like Hedronometric Results for Tetrahedral Volume" | |
<http://daylateanddollarshort.com/mathdocs/Heron-like-Results-for-Tetrahedral-Volume.pdf>`_. | |
Other values from that analysis included here as comments for | |
completeness: | |
* oa = hedron1 L12 if reverse else hedron1 L23 | |
* ob = hedron1 L23 if reverse else hedron1 L12 | |
* ac = hedron2 L12 if reverse else hedron2 L23 | |
* ab = hedron1 L13 = law of cosines on OA, OB (hedron1 L12, L23) | |
* oc = hedron2 L13 = law of cosines on OA, AC (hedron2 L12, L23) | |
* bc = dihedron L14 | |
target is OA, the dihedral angle along edge oa. | |
:param bool resetAtoms: default True. | |
Mark all atoms in di/hedra and atomArray for updating by | |
:meth:`.internal_to_atom_coordinates`. Alternatvely set this to | |
False and manipulate `atomArrayValid`, `dAtoms_needs_update` and | |
`hAtoms_needs_update` directly to reduce computation. | |
""" # noqa | |
oa = self.hedraL12[self.dH1ndx] | |
oa[self.dFwd] = self.hedraL23[self.dH1ndx][self.dFwd] | |
ob = self.hedraL23[self.dH1ndx] | |
ob[self.dFwd] = self.hedraL12[self.dH1ndx][self.dFwd] | |
ac = self.hedraL12[self.dH2ndx] | |
ac[self.dFwd] = self.hedraL23[self.dH2ndx][self.dFwd] | |
ab = self.hedraL13[self.dH1ndx] | |
oc = self.hedraL13[self.dH2ndx] | |
bc = self.dihedraL14 | |
# Ws = (ab + ac + bc) / 2 | |
# Xs = (ob + bc + oc) / 2 | |
Ys = (oa + ac + oc) / 2 | |
Zs = (oa + ob + ab) / 2 | |
# Wsqr = Ws * (Ws - ab) * (Ws - ac) * (Ws - bc) | |
# Xsqr = Xs * (Xs - ob) * (Xs - bc) * (Xs - oc) | |
Ysqr = Ys * (Ys - oa) * (Ys - ac) * (Ys - oc) | |
Zsqr = Zs * (Zs - oa) * (Zs - ob) * (Zs - ab) | |
Hsqr = ( | |
4 * oa * oa * bc * bc - np.square((ob * ob + ac * ac) - (oc * oc + ab * ab)) | |
) / 16 | |
""" | |
Jsqr = ( | |
4 * ob * ob * ac * ac | |
- np.square((oc * oc + ab * ab) - (oa * oa + bc * bc)) | |
) / 16 | |
Ksqr = ( | |
4 * oc * oc * ab * ab | |
- np.square((oa * oa + bc * bc) - (ob * ob + ac * ac)) | |
) / 16 | |
""" | |
Y = np.sqrt(Ysqr) | |
Z = np.sqrt(Zsqr) | |
# X = np.sqrt(Xsqr) | |
# W = np.sqrt(Wsqr) | |
cosOA = (Ysqr + Zsqr - Hsqr) / (2 * Y * Z) | |
# cosOB = (Zsqr + Xsqr - Jsqr) / (2 * Z * X) | |
# cosOC = (Xsqr + Ysqr - Ksqr) / (2 * X * Y) | |
# cosBC = (Wsqr + Xsqr - Hsqr) / (2 * W * X) | |
# cosCA = (Wsqr + Ysqr - Jsqr) / (2 * W * Y) | |
# cosAB = (Wsqr + Zsqr - Ksqr) / (2 * W * Z) | |
# OA = | |
# compute dihedral angles | |
# ensure cosOA is in range [-1,1] for arccos | |
cosOA[cosOA < -1.0] = -1.0 | |
cosOA[cosOA > 1.0] = 1.0 | |
# without np.longdouble here a few OCCACB angles lose last digit match | |
np.arccos(cosOA, out=self.dihedraAngleRads, dtype=np.longdouble) | |
self.dihedraAngleRads *= self.dihedra_signs | |
np.rad2deg(self.dihedraAngleRads, out=self.dihedraAngle) | |
# OB = np.rad2deg(np.arccos(cosOB)) | |
# OC = np.rad2deg(np.arccos(cosOC)) | |
# BC = np.rad2deg(np.arccos(cosBC)) | |
# CA = np.rad2deg(np.arccos(cosCA)) | |
# AB = np.rad2deg(np.arccos(cosAB)) | |
# law of cosines for hedra angles | |
np.rad2deg( | |
np.arccos( | |
( | |
np.square(self.hedraL12) | |
+ np.square(self.hedraL23) | |
- np.square(self.hedraL13) | |
) | |
/ (2 * self.hedraL12 * self.hedraL23) | |
), | |
out=self.hedraAngle, | |
) | |
if resetAtoms: | |
self.atomArrayValid[:] = False | |
self.dAtoms_needs_update[:] = True | |
self.hAtoms_needs_update[:] = True | |
def copy_initNCaCs(self, other: "IC_Chain") -> None: | |
"""Copy atom coordinates for initNCaC atoms from other IC_Chain. | |
Copies the coordinates and sets atomArrayValid flags True for initial | |
NCaC and after any chain breaks. | |
Needed for :meth:`.distance_to_internal_coordinates` if target has | |
chain breaks (otherwise each fragment will start at origin). | |
Also useful if copying internal coordinates from another chain. | |
""" | |
ndx = [self.atomArrayIndex[ak] for iNCaC in other.initNCaCs for ak in iNCaC] | |
self.atomArray[ndx] = other.atomArray[ndx] | |
self.atomArrayValid[ndx] = True | |
class IC_Residue: | |
"""Class to extend Biopython Residue with internal coordinate data. | |
Parameters | |
---------- | |
parent: biopython Residue object this class extends | |
Attributes | |
---------- | |
no_altloc: bool default False | |
**Class** variable, disable processing of ALTLOC atoms if True, use | |
only selected atoms. | |
accept_atoms: tuple | |
**Class** variable :data:`accept_atoms`, list of PDB atom names to use | |
when generating internal coordinates. | |
Default is:: | |
accept_atoms = accept_mainchain + accept_hydrogens | |
to exclude hydrogens in internal coordinates and generated PDB files, | |
override as:: | |
IC_Residue.accept_atoms = IC_Residue.accept_mainchain | |
to get only mainchain atoms plus amide proton, use:: | |
IC_Residue.accept_atoms = IC_Residue.accept_mainchain + ('H',) | |
to convert D atoms to H, set :data:`AtomKey.d2h` = True and use:: | |
IC_Residue.accept_atoms = ( | |
accept_mainchain + accept_hydrogens + accept_deuteriums | |
) | |
Note that `accept_mainchain = accept_backbone + accept_sidechain`. | |
Thus to generate sequence-agnostic conformational data for e.g. | |
structure alignment in dihedral angle space, use:: | |
IC_Residue.accept_atoms = accept_backbone | |
or set gly_Cbeta = True and use:: | |
IC_Residue.accept_atoms = accept_backbone + ('CB',) | |
Changing accept_atoms will cause the default `structure_rebuild_test` in | |
:mod:`.ic_rebuild` to fail if some atoms are filtered (obviously). Use | |
the `quick=True` option to test only the coordinates of filtered atoms | |
to avoid this. | |
There is currently no option to output internal coordinates with D | |
instead of H. | |
accept_resnames: tuple | |
**Class** variable :data:`accept_resnames`, list of 3-letter residue | |
names for HETATMs to accept when generating internal coordinates from | |
atoms. HETATM sidechain will be ignored, but normal backbone atoms (N, | |
CA, C, O, CB) will be included. Currently only CYG, YCM and UNK; | |
override at your own risk. To generate sidechain, add appropriate | |
entries to `ic_data_sidechains` in :mod:`.ic_data` and support in | |
:meth:`IC_Chain.atom_to_internal_coordinates`. | |
gly_Cbeta: bool default False | |
**Class** variable :data:`gly_Cbeta`, override to True to generate | |
internal coordinates for glycine CB atoms in | |
:meth:`IC_Chain.atom_to_internal_coordinates` :: | |
IC_Residue.gly_Cbeta = True | |
pic_accuracy: str default "17.13f" | |
**Class** variable :data:`pic_accuracy` sets accuracy for numeric values | |
(angles, lengths) in .pic files. Default set high to support mmCIF file | |
accuracy in rebuild tests. If you find rebuild tests fail with | |
'ERROR -COORDINATES-' and verbose=True shows only small discrepancies, | |
try raising this value (or lower it to 9.5 if only working with PDB | |
format files). :: | |
IC_Residue.pic_accuracy = "9.5f" | |
residue: Biopython Residue object reference | |
The :class:`.Residue` object this extends | |
hedra: dict indexed by 3-tuples of AtomKeys | |
Hedra forming this residue | |
dihedra: dict indexed by 4-tuples of AtomKeys | |
Dihedra forming (overlapping) this residue | |
rprev, rnext: lists of IC_Residue objects | |
References to adjacent (bonded, not missing, possibly disordered) | |
residues in chain | |
atom_coords: AtomKey indexed dict of numpy [4] arrays | |
**removed** | |
Use AtomKeys and atomArrayIndex to build if needed | |
ak_set: set of AtomKeys in dihedra | |
AtomKeys in all dihedra overlapping this residue (see __contains__()) | |
alt_ids: list of char | |
AltLoc IDs from PDB file | |
bfactors: dict | |
AtomKey indexed B-factors as read from PDB file | |
NCaCKey: List of tuples of AtomKeys | |
List of tuples of N, Ca, C backbone atom AtomKeys; usually only 1 | |
but more if backbone altlocs. | |
is20AA: bool | |
True if residue is one of 20 standard amino acids, based on | |
Residue resname | |
isAccept: bool | |
True if is20AA or in accept_resnames below | |
rbase: tuple | |
residue position, insert code or none, resname (1 letter if standard | |
amino acid) | |
cic: IC_Chain default None | |
parent chain :class:`IC_Chain` object | |
scale: optional float | |
used for OpenSCAD output to generate gly_Cbeta bond length | |
Methods | |
------- | |
assemble(atomCoordsIn, resetLocation, verbose) | |
Compute atom coordinates for this residue from internal coordinates | |
get_angle() | |
Return angle for passed key | |
get_length() | |
Return bond length for specified pair | |
pick_angle() | |
Find Hedron or Dihedron for passed key | |
pick_length() | |
Find hedra for passed AtomKey pair | |
set_angle() | |
Set angle for passed key (no position updates) | |
set_length() | |
Set bond length in all relevant hedra for specified pair | |
bond_rotate(delta) | |
adjusts related dihedra angles by delta, e.g. rotating psi (N-Ca-C-N) | |
will adjust the adjacent N-Ca-C-O by the same amount to avoid clashes | |
bond_set(angle) | |
uses bond_rotate to set specified dihedral to angle and adjust related | |
dihedra accordingly | |
rak(atom info) | |
cached AtomKeys for this residue | |
""" | |
accept_resnames = ("CYG", "YCM", "UNK") | |
"""Add 3-letter residue name here for non-standard residues with | |
normal backbone. CYG included for test case 4LGY (1305 residue | |
contiguous chain). Safe to add more names for N-CA-C-O backbones, any | |
more complexity will need additions to :data:`accept_atoms`, | |
`ic_data_sidechains` in :mod:`.ic_data` and support in | |
:meth:`IC_Chain.atom_to_internal_coordinates`""" | |
_AllBonds: bool = False | |
"""For OpenSCAD output, generate explicit hedra covering all bonds. | |
**Class** variable, whereas a PDB file just specifies atoms, OpenSCAD | |
output for 3D printing needs all bonds specified explicitly - otherwise | |
e.g. PHE rings will not be closed. This variable is managed by the | |
:func:`.SCADIO.write_SCAD` code.""" | |
no_altloc: bool = False | |
"""Set True to filter altloc atoms on input and only work with Biopython | |
default Atoms""" | |
gly_Cbeta: bool = False | |
"""Create beta carbons on all Gly residues. | |
Setting this to True will generate internal coordinates for Gly C-beta | |
carbons in :meth:`atom_to_internal_coordinates`. | |
Data averaged from Sep 2019 Dunbrack cullpdb_pc20_res2.2_R1.0 | |
restricted to structures with amide protons. | |
Please see | |
`PISCES: A Protein Sequence Culling Server <https://dunbrack.fccc.edu/pisces/>`_ | |
'G. Wang and R. L. Dunbrack, Jr. PISCES: a protein sequence culling | |
server. Bioinformatics, 19:1589-1591, 2003.' | |
Ala avg rotation of OCCACB from NCACO query:: | |
select avg(g.rslt) as avg_rslt, stddev(g.rslt) as sd_rslt, count(*) | |
from | |
(select f.d1d, f.d2d, | |
(case when f.rslt > 0 then f.rslt-360.0 else f.rslt end) as rslt | |
from (select d1.angle as d1d, d2.angle as d2d, | |
(d2.angle - d1.angle) as rslt from dihedron d1, | |
dihedron d2 where d1.re_class='AOACACAACB' and | |
d2.re_class='ANACAACAO' and d1.pdb=d2.pdb and d1.chn=d2.chn | |
and d1.res=d2.res) as f) as g | |
results:: | |
| avg_rslt | sd_rslt | count | | |
| -122.682194862932 | 5.04403040513919 | 14098 | | |
""" | |
pic_accuracy: str = ( | |
"17.13f" # output accuracy for angle and len values in .pic files | |
) | |
accept_backbone = ( | |
"N", | |
"CA", | |
"C", | |
"O", | |
"OXT", | |
) | |
accept_sidechain = ( | |
"CB", | |
"CG", | |
"CG1", | |
"OG1", | |
"OG", | |
"SG", | |
"CG2", | |
"CD", | |
"CD1", | |
"SD", | |
"OD1", | |
"ND1", | |
"CD2", | |
"ND2", | |
"CE", | |
"CE1", | |
"NE", | |
"OE1", | |
"NE1", | |
"CE2", | |
"OE2", | |
"NE2", | |
"CE3", | |
"CZ", | |
"NZ", | |
"CZ2", | |
"CZ3", | |
"OD2", | |
"OH", | |
"CH2", | |
"NH1", | |
"NH2", | |
) | |
accept_mainchain = accept_backbone + accept_sidechain | |
accept_hydrogens = ( | |
"H", | |
"H1", | |
"H2", | |
"H3", | |
"HA", | |
"HA2", | |
"HA3", | |
"HB", | |
"HB1", | |
"HB2", | |
"HB3", | |
"HG2", | |
"HG3", | |
"HD2", | |
"HD3", | |
"HE2", | |
"HE3", | |
"HZ1", | |
"HZ2", | |
"HZ3", | |
"HG11", | |
"HG12", | |
"HG13", | |
"HG21", | |
"HG22", | |
"HG23", | |
"HZ", | |
"HD1", | |
"HE1", | |
"HD11", | |
"HD12", | |
"HD13", | |
"HG", | |
"HG1", | |
"HD21", | |
"HD22", | |
"HD23", | |
"NH1", | |
"NH2", | |
"HE", | |
"HH11", | |
"HH12", | |
"HH21", | |
"HH22", | |
"HE21", | |
"HE22", | |
"HE2", | |
"HH", | |
"HH2", | |
) | |
accept_deuteriums = ( | |
"D", | |
"D1", | |
"D2", | |
"D3", | |
"DA", | |
"DA2", | |
"DA3", | |
"DB", | |
"DB1", | |
"DB2", | |
"DB3", | |
"DG2", | |
"DG3", | |
"DD2", | |
"DD3", | |
"DE2", | |
"DE3", | |
"DZ1", | |
"DZ2", | |
"DZ3", | |
"DG11", | |
"DG12", | |
"DG13", | |
"DG21", | |
"DG22", | |
"DG23", | |
"DZ", | |
"DD1", | |
"DE1", | |
"DD11", | |
"DD12", | |
"DD13", | |
"DG", | |
"DG1", | |
"DD21", | |
"DD22", | |
"DD23", | |
"ND1", | |
"ND2", | |
"DE", | |
"DH11", | |
"DH12", | |
"DH21", | |
"DH22", | |
"DE21", | |
"DE22", | |
"DE2", | |
"DH", | |
"DH2", | |
) | |
accept_atoms = accept_mainchain + accept_hydrogens | |
"""Change accept_atoms to restrict atoms processed. See :class:`IC_Residue` | |
for usage.""" | |
def __init__(self, parent: "Residue") -> None: | |
"""Initialize IC_Residue with parent Biopython Residue. | |
:param Residue parent: Biopython Residue object. | |
The Biopython Residue this object extends | |
""" | |
self.residue = parent | |
self.cic: IC_Chain | |
# dict of hedron objects indexed by hedron keys | |
self.hedra: Dict[HKT, Hedron] = {} | |
# dict of dihedron objects indexed by dihedron keys | |
self.dihedra: Dict[DKT, Dihedron] = {} | |
# cache of AtomKey results for rak() | |
self.akc: Dict[Union[str, Atom], AtomKey] = {} | |
# set of AtomKeys involved in dihedra, used by split_akl, | |
# build_rak_cache. Built by __init__ for XYZ (PDB coord) input, | |
# _link_dihedra for PIC input | |
self.ak_set: Set[AtomKey] = set() | |
# reference to adjacent residues in chain | |
self.rprev: List[IC_Residue] = [] | |
self.rnext: List[IC_Residue] = [] | |
# bfactors copied from PDB file | |
self.bfactors: Dict[str, float] = {} | |
self.alt_ids: Union[List[str], None] = None if IC_Residue.no_altloc else [] | |
self.is20AA = True | |
self.isAccept = True | |
# self.NCaCKey Set by _link_dihedra() | |
# rbase = position, insert code or none, resname (1 letter if in 20) | |
rid = parent.id | |
rbase = [rid[1], rid[2] if " " != rid[2] else None, parent.resname] | |
try: | |
rbase[2] = protein_letters_3to1[rbase[2]] | |
except KeyError: | |
self.is20AA = False | |
if rbase[2] not in self.accept_resnames: | |
self.isAccept = False | |
self.rbase = tuple(rbase) | |
self.lc = rbase[2] | |
if self.isAccept: | |
for atom in parent.get_atoms(): | |
if hasattr(atom, "child_dict"): | |
if IC_Residue.no_altloc: | |
self._add_atom(atom.selected_child) | |
else: | |
for atm in atom.child_dict.values(): | |
self._add_atom(atm) | |
else: | |
self._add_atom(atom) | |
if self.ak_set: | |
# only for coordinate (pdb) input, _add_atom loads | |
# init cache ready for atom_to_internal_coords | |
self._build_rak_cache() | |
def __deepcopy__(self, memo): | |
"""Deep copy implementation for IC_Residue.""" | |
existing = memo.get(id(self), False) | |
if existing: | |
return existing | |
dup = type(self).__new__(self.__class__) | |
memo[id(self)] = dup | |
dup.__dict__.update(self.__dict__) # later replace what is not static | |
dup.cic = memo[id(self.cic)] | |
dup.residue = memo[id(self.residue)] | |
# still need to update: rnext, rprev, akc, ak_set, di/hedra | |
return dup | |
def __contains__(self, ak: "AtomKey") -> bool: | |
"""Return True if atomkey is in this residue.""" | |
if ak in self.ak_set: | |
akl = ak.akl | |
if ( | |
int(akl[0]) == self.rbase[0] | |
and akl[1] == self.rbase[1] | |
and akl[2] == self.rbase[2] | |
): | |
return True | |
return False | |
def rak(self, atm: Union[str, Atom]) -> "AtomKey": | |
"""Cache calls to AtomKey for this residue.""" | |
try: | |
ak = self.akc[atm] | |
except (KeyError): | |
ak = self.akc[atm] = AtomKey(self, atm) | |
if isinstance(atm, str): | |
ak.missing = True | |
return ak | |
def _build_rak_cache(self) -> None: | |
"""Create explicit entries for for atoms so don't miss altlocs. | |
This ensures that self.akc (atom key cache) has an entry for selected | |
atom name (e.g. "CA") amongst any that have altlocs. Without this, | |
rak() on the other altloc atom first may result in the main atom being | |
missed. | |
""" | |
for ak in sorted(self.ak_set): | |
atmName = ak.akl[3] | |
if self.akc.get(atmName) is None: | |
self.akc[atmName] = ak | |
def _add_atom(self, atm: Atom) -> None: | |
"""Filter Biopython Atom with accept_atoms; set ak_set. | |
Arbitrarily renames O' and O'' to O and OXT | |
""" | |
if "O" == atm.name[0]: | |
if "O'" == atm.name: | |
atm.name = "O" | |
elif "O''" == atm.name: | |
atm.name = "OXT" | |
if atm.name not in self.accept_atoms: | |
# print('skip:', atm.name) | |
return | |
ak = self.rak(atm) # passing Atom here not string | |
self.ak_set.add(ak) | |
def __repr__(self) -> str: | |
"""Print string is parent Residue ID.""" | |
return str(self.residue.full_id) | |
def pretty_str(self) -> str: | |
"""Nice string for residue ID.""" | |
id = self.residue.id | |
return f"{self.residue.resname} {id[0]}{str(id[1])}{id[2]}" | |
def _link_dihedra(self, verbose: bool = False) -> None: | |
"""Housekeeping after loading all residues and dihedra. | |
- Link dihedra to this residue | |
- form id3_dh_index | |
- form ak_set | |
- set NCaCKey to be available AtomKeys | |
called for loading PDB / atom coords | |
""" | |
for dh in self.dihedra.values(): | |
dh.ric = self # each dihedron can find its IC_Residue | |
dh.cic = self.cic # each dihedron can update chain dihedral angles | |
self.ak_set.update(dh.atomkeys) | |
for h in self.hedra.values(): # collect any atoms in orphan hedra | |
self.ak_set.update(h.atomkeys) # e.g. alternate CB path with no O | |
h.cic = self.cic # each hedron can update chain hedra | |
# if loaded PIC data, akc not initialised yet | |
if not self.akc: | |
self._build_rak_cache() | |
# initialise NCaCKey here: | |
self.NCaCKey = [] | |
self.NCaCKey.extend( | |
self.split_akl( | |
(AtomKey(self, "N"), AtomKey(self, "CA"), AtomKey(self, "C")) | |
) | |
) | |
def set_flexible(self) -> None: | |
"""For OpenSCAD, mark N-CA and CA-C bonds to be flexible joints. | |
See :func:`.SCADIO.write_SCAD` | |
""" | |
for h in self.hedra.values(): | |
if h.e_class == "NCAC": | |
h.flex_female_1 = True | |
h.flex_female_2 = True | |
elif h.e_class.endswith("NCA"): | |
h.flex_male_2 = True | |
elif h.e_class.startswith("CAC") and h.atomkeys[1].akl[3] == "C": | |
h.flex_male_1 = True | |
elif h.e_class == "CBCAC": | |
h.skinny_1 = True # CA-CB bond interferes with flex join | |
def set_hbond(self) -> None: | |
"""For OpenSCAD, mark H-N and C-O bonds to be hbonds (magnets). | |
See :func:`.SCADIO.write_SCAD` | |
""" | |
for h in self.hedra.values(): | |
if h.e_class == "HNCA": | |
h.hbond_1 = True | |
elif h.e_class == "CACO": | |
h.hbond_2 = True | |
def _default_startpos(self) -> Dict["AtomKey", np.array]: | |
"""Generate default N-Ca-C coordinates to build this residue from.""" | |
atomCoords = {} | |
cic = self.cic | |
dlist0 = [cic.id3_dh_index.get(akl, None) for akl in sorted(self.NCaCKey)] | |
dlist1 = [d for d in dlist0 if d is not None] | |
# https://stackoverflow.com/questions/11264684/flatten-list-of-lists | |
dlist = [cic.dihedra[val] for sublist in dlist1 for val in sublist] | |
# dlist = self.id3_dh_index[NCaCKey] | |
for d in dlist: | |
for i, a in enumerate(d.atomkeys): | |
# atomCoords[a] = d.initial_coords[i] | |
atomCoords[a] = cic.dAtoms[d.ndx][i] | |
# cic.atomArray[cic.atomArrayIndex[a]] = atomCoords[a] | |
# cic.atomArrayValid[cic.atomArrayIndex[a]] = True | |
return atomCoords | |
def _get_startpos(self) -> Dict["AtomKey", np.array]: | |
"""Find N-Ca-C coordinates to build this residue from.""" | |
# only used by assemble() | |
startPos = {} | |
cic = self.cic | |
for ncac in self.NCaCKey: | |
if np.all(cic.atomArrayValid[[cic.atomArrayIndex[ak] for ak in ncac]]): | |
for ak in ncac: | |
startPos[ak] = cic.atomArray[cic.atomArrayIndex[ak]] | |
if startPos == {}: | |
startPos = self._default_startpos() | |
return startPos | |
def clear_transforms(self): | |
"""Invalidate dihedra coordinate space attributes before assemble(). | |
Coordinate space attributes are Dihedron.cst and .rcst, and | |
:data:`IC_Chain.dCoordSpace` | |
""" | |
for d in self.dihedra.values(): | |
self.cic.dcsValid[d.ndx] = False | |
def assemble( | |
self, | |
resetLocation: bool = False, | |
verbose: bool = False, | |
) -> Union[Dict["AtomKey", np.array], Dict[HKT, np.array], None]: | |
"""Compute atom coordinates for this residue from internal coordinates. | |
This is the IC_Residue part of the :meth:`.assemble_residues_ser` serial | |
version, see :meth:`.assemble_residues` for numpy vectorized approach | |
which works at the :class:`IC_Chain` level. | |
Join prepared dihedra starting from N-CA-C and N-CA-CB hedrons, | |
computing protein space coordinates for backbone and sidechain atoms | |
Sets forward and reverse transforms on each Dihedron to convert from | |
protein coordinates to dihedron space coordinates for first three | |
atoms (see :data:`IC_Chain.dCoordSpace`) | |
Call :meth:`.init_atom_coords` to update any modified di/hedra before | |
coming here, this only assembles dihedra into protein coordinate space. | |
**Algorithm** | |
Form double-ended queue, start with c-ca-n, o-c-ca, n-ca-cb, n-ca-c. | |
if resetLocation=True, use initial coords from generating dihedron | |
for n-ca-c initial positions (result in dihedron coordinate space) | |
while queue not empty | |
get 3-atom hedron key | |
for each dihedron starting with hedron key (1st hedron of dihedron) | |
if have coordinates for all 4 atoms already | |
add 2nd hedron key to back of queue | |
else if have coordinates for 1st 3 atoms | |
compute forward and reverse transforms to take 1st 3 atoms | |
to/from dihedron initial coordinate space | |
use reverse transform to get position of 4th atom in | |
current coordinates from dihedron initial coordinates | |
add 2nd hedron key to back of queue | |
else | |
ordering failed, put hedron key at back of queue and hope | |
next time we have 1st 3 atom positions (should not happen) | |
loop terminates (queue drains) as hedron keys which do not start any | |
dihedra are removed without action | |
:param bool resetLocation: default False. | |
- Option to ignore start location and orient so initial N-Ca-C | |
hedron at origin. | |
:returns: | |
Dict of AtomKey -> homogeneous atom coords for residue in protein | |
space relative to previous residue | |
**Also** directly updates :data:`IC_Chain.atomArray` as | |
:meth:`.assemble_residues` does. | |
""" | |
# debug statements below still useful, commented for performance | |
# dbg = False | |
# if hasattr(IC_Chain, "adbg"): | |
# dbg = IC_Chain.adbg | |
cic = self.cic | |
dcsValid = cic.dcsValid | |
aaValid = cic.atomArrayValid | |
aaNdx = cic.atomArrayIndex | |
aa = cic.atomArray | |
if not self.ak_set: | |
return None # give up now if no atoms to work with | |
NCaCKey = sorted(self.NCaCKey) | |
rseqpos = self.rbase[0] | |
# order of these startLst entries matters | |
startLst = self.split_akl((self.rak("C"), self.rak("CA"), self.rak("N"))) | |
if "CB" in self.akc: | |
startLst.extend( | |
self.split_akl((self.rak("N"), self.rak("CA"), self.rak("CB"))) | |
) | |
if "O" in self.akc: | |
startLst.extend( | |
self.split_akl((self.rak("O"), self.rak("C"), self.rak("CA"))) | |
) | |
startLst.extend(NCaCKey) | |
q = deque(startLst) | |
# resnum = self.rbase[0] | |
# get initial coords from previous residue or IC_Chain info | |
# or default coords | |
if resetLocation: | |
# use N-CA-C initial coords from creating dihedral | |
atomCoords = self._default_startpos() | |
else: | |
atomCoords = self._get_startpos() | |
while q: # deque is not empty | |
""" | |
if dbg: | |
print("assemble loop start q=", q) | |
""" | |
h1k = cast(HKT, q.pop()) | |
dihedraKeys = cic.id3_dh_index.get(h1k, None) | |
""" | |
if dbg: | |
print( | |
" h1k:", | |
h1k, | |
"len dihedra: ", | |
len(dihedraKeys) if dihedraKeys is not None else "None", | |
) | |
""" | |
if dihedraKeys is not None: | |
for dk in dihedraKeys: | |
d = cic.dihedra[dk] | |
dseqpos = int(d.atomkeys[0].akl[AtomKey.fields.respos]) | |
d.initial_coords = cic.dAtoms[d.ndx] | |
if 4 == len(d.initial_coords) and d.initial_coords[3] is not None: | |
# skip incomplete dihedron if don't have 4th atom due | |
# to missing input data | |
d_h2key = d.hedron2.atomkeys | |
ak = d.atomkeys[3] | |
""" | |
if dbg: | |
print(" process", d, d_h2key, d.atomkeys) | |
""" | |
acount = len([a for a in d.atomkeys if a in atomCoords]) | |
if 4 == acount: | |
# dihedron already done, queue 2nd hedron key | |
if dseqpos == rseqpos: # only this residue | |
q.appendleft(d_h2key) | |
""" | |
if dbg: | |
print(" 4- already done, append left") | |
""" | |
if not dcsValid[d.ndx]: # missing transform | |
# can happen for altloc atoms | |
# only needed for write_SCAD output | |
acs = [atomCoords[a] for a in h1k] | |
d.cst, d.rcst = coord_space( | |
acs[0], acs[1], acs[2], True | |
) | |
dcsValid[d.ndx] = True | |
elif 3 == acount: | |
""" | |
if dbg: | |
print(" 3- call coord_space") | |
""" | |
acs = np.asarray([atomCoords[a] for a in h1k]) | |
d.cst, d.rcst = coord_space(acs[0], acs[1], acs[2], True) | |
dcsValid[d.ndx] = True | |
""" | |
if dbg: | |
print(" acs:", acs.transpose()) | |
print("cst", d.cst) | |
print("rcst", d.rcst) | |
print( | |
" initial_coords[3]=", | |
d.initial_coords[3].transpose(), | |
) | |
""" | |
acak3 = d.rcst.dot(d.initial_coords[3]) | |
""" | |
if dbg: | |
print(" acak3=", acak3.transpose()) | |
""" | |
atomCoords[ak] = acak3 | |
aa[aaNdx[ak]] = acak3 | |
aaValid[aaNdx[ak]] = True | |
""" | |
if dbg: | |
print( | |
" 3- finished, ak:", | |
ak, | |
"coords:", | |
atomCoords[ak].transpose(), | |
) | |
""" | |
if dseqpos == rseqpos: # only this residue | |
q.appendleft(d_h2key) | |
else: | |
if verbose: | |
print("no coords to start", d) | |
print( | |
[ | |
a | |
for a in d.atomkeys | |
if atomCoords.get(a, None) is not None | |
] | |
) | |
else: | |
if verbose: | |
print("no initial coords for", d) | |
return atomCoords | |
def split_akl( | |
self, | |
lst: Union[Tuple["AtomKey", ...], List["AtomKey"]], | |
missingOK: bool = False, | |
) -> List[Tuple["AtomKey", ...]]: | |
"""Get AtomKeys for this residue (ak_set) for generic list of AtomKeys. | |
Changes and/or expands a list of 'generic' AtomKeys (e.g. 'N, C, C') to | |
be specific to this Residue's altlocs etc., e.g. | |
'(N-Ca_A_0.3-C, N-Ca_B_0.7-C)' | |
Given a list of AtomKeys for a Hedron or Dihedron, | |
return: | |
list of matching atomkeys that have id3_dh in this residue | |
(ak may change if occupancy != 1.00) | |
or | |
multiple lists of matching atomkeys expanded for all atom altlocs | |
or | |
empty list if any of atom_coord(ak) missing and not missingOK | |
:param list lst: list[3] or [4] of AtomKeys. | |
Non-altloc AtomKeys to match to specific AtomKeys for this residue | |
:param bool missingOK: default False, see above. | |
""" | |
altloc_ndx = AtomKey.fields.altloc | |
occ_ndx = AtomKey.fields.occ | |
# step 1 | |
# given a list of AtomKeys | |
# form a new list of same atomkeys with coords or diheds in this residue | |
# plus lists of matching altloc atomkeys in coords or diheds | |
edraLst: List[Tuple[AtomKey, ...]] = [] | |
altlocs = set() | |
posnAltlocs: Dict["AtomKey", Set[str]] = {} | |
akMap = {} | |
for ak in lst: | |
posnAltlocs[ak] = set() | |
if ( | |
ak in self.ak_set | |
and ak.akl[altloc_ndx] is None | |
and ak.akl[occ_ndx] is None | |
): | |
# simple case no altloc and exact match in set | |
edraLst.append((ak,)) # tuple of ak | |
else: | |
ak2_lst = [] | |
for ak2 in self.ak_set: | |
if ak.altloc_match(ak2): | |
# print(key) | |
ak2_lst.append(ak2) | |
akMap[ak2] = ak | |
altloc = ak2.akl[altloc_ndx] | |
if altloc is not None: | |
altlocs.add(altloc) | |
posnAltlocs[ak].add(altloc) | |
edraLst.append(tuple(ak2_lst)) | |
# step 2 | |
# check and finish for | |
# missing atoms | |
# simple case no altlocs | |
# else form new AtomKey lists covering all altloc permutations | |
maxc = 0 | |
for akl in edraLst: | |
lenAKL = len(akl) | |
if 0 == lenAKL and not missingOK: | |
return [] # atom missing in atom_coords, cannot form object | |
elif maxc < lenAKL: | |
maxc = lenAKL | |
if 1 == maxc: # simple case no altlocs for any ak in list | |
newAKL = [] | |
for akl in edraLst: | |
if akl: # may have empty lists if missingOK, do not append | |
newAKL.append(akl[0]) | |
return [tuple(newAKL)] | |
else: | |
new_edraLst = [] | |
for al in altlocs: | |
# form complete new list for each altloc | |
alhl = [] | |
for akl in edraLst: | |
lenAKL = len(akl) | |
if 0 == lenAKL: | |
continue # ignore empty list from missingOK | |
if 1 == lenAKL: | |
alhl.append(akl[0]) # not all atoms will have altloc | |
# elif (lenAKL < maxc | |
# and al not in posnAltlocs[akMap[akl[0]]]): | |
elif al not in posnAltlocs[akMap[akl[0]]]: | |
# this position has fewer altlocs than other positions | |
# and this position does not have this al, | |
# so just grab first to form angle as could be any | |
alhl.append(sorted(akl)[0]) | |
else: | |
for ak in akl: | |
if ak.akl[altloc_ndx] == al: | |
alhl.append(ak) | |
new_edraLst.append(tuple(alhl)) | |
# print(new_edraLst) | |
return new_edraLst | |
def _gen_edra(self, lst: Union[Tuple["AtomKey", ...], List["AtomKey"]]) -> None: | |
"""Populate hedra/dihedra given edron ID tuple. | |
Given list of AtomKeys defining hedron or dihedron | |
convert to AtomKeys with coordinates in this residue | |
add appropriately to self.di/hedra, expand as needed atom altlocs | |
:param list lst: tuple of AtomKeys. | |
Specifies Hedron or Dihedron | |
""" | |
for ak in lst: | |
if ak.missing: | |
return # give up if atoms actually missing | |
lenLst = len(lst) | |
if 4 > lenLst: | |
cdct, dct, obj = self.cic.hedra, self.hedra, Hedron | |
else: | |
cdct, dct, obj = self.cic.dihedra, self.dihedra, Dihedron # type: ignore # noqa | |
if isinstance(lst, List): | |
tlst = tuple(lst) | |
else: | |
tlst = lst | |
hl = self.split_akl(tlst) # expand tlst with any altlocs | |
# returns list of tuples | |
for tnlst in hl: | |
# do not add edron if split_akl() made something shorter | |
if len(tnlst) == lenLst: | |
# if edron already exists, then update not replace with new | |
if tnlst not in cdct: | |
cdct[tnlst] = obj(tnlst) # type: ignore | |
if tnlst not in dct: | |
dct[tnlst] = cdct[tnlst] # type: ignore | |
dct[tnlst].needs_update = True # type: ignore | |
# @profile | |
def _create_edra(self, verbose: bool = False) -> None: | |
"""Create IC_Chain and IC_Residue di/hedra for atom coordinates. | |
AllBonds handled here. | |
:param bool verbose: default False. | |
Warn about missing N, Ca, C backbone atoms. | |
""" | |
# on entry we have all Biopython Atoms loaded | |
if not self.ak_set: | |
return # so give up if no atoms loaded for this residue | |
sN, sCA, sC = self.rak("N"), self.rak("CA"), self.rak("C") | |
if self.lc != "G": | |
sCB = self.rak("CB") | |
# first init di/hedra, AtomKey objects and atom_coords for di/hedra | |
# which extend into next residue. | |
if 0 < len(self.rnext) and self.rnext[0].ak_set: | |
# atom_coords, hedra and dihedra for backbone dihedra | |
# which reach into next residue | |
for rn in self.rnext: | |
nN, nCA, nC = rn.rak("N"), rn.rak("CA"), rn.rak("C") | |
nextNCaC = rn.split_akl((nN, nCA, nC), missingOK=True) | |
for tpl in nextNCaC: | |
for ak in tpl: | |
if ak in rn.ak_set: | |
self.ak_set.add(ak) | |
else: | |
for rn_ak in rn.ak_set: | |
if rn_ak.altloc_match(ak): | |
self.ak_set.add(rn_ak) | |
self._gen_edra((sN, sCA, sC, nN)) # psi | |
self._gen_edra((sCA, sC, nN, nCA)) # omega i+1 | |
self._gen_edra((sC, nN, nCA, nC)) # phi i+1 | |
self._gen_edra((sCA, sC, nN)) | |
self._gen_edra((sC, nN, nCA)) | |
self._gen_edra((nN, nCA, nC)) # tau i+1 | |
# redundant next residue C-beta locator (alternate CB path) | |
# otherwise missing O will cause no sidechain | |
try: | |
nO = rn.akc["O"] # noqa: F841 | |
except KeyError: | |
# not rn.rak here so don't trigger missing CB for Gly | |
nCB = rn.akc.get("CB", None) | |
if nCB is not None and nCB in rn.ak_set: | |
self.ak_set.add(nCB) | |
self._gen_edra((nN, nCA, nCB)) | |
self._gen_edra((sC, nN, nCA, nCB)) | |
# if start of chain then need to init NCaC hedron as not in previous | |
# residue | |
if 0 == len(self.rprev): | |
self._gen_edra((sN, sCA, sC)) | |
# now init di/hedra for standard backbone atoms independent of | |
# neighbours | |
backbone = ic_data_backbone | |
for edra in backbone: | |
# only need to build if this residue has all the atoms in the edra | |
if all(atm in self.akc for atm in edra): | |
r_edra = [self.rak(atom) for atom in edra] | |
self._gen_edra(r_edra) # [4] is label on some table entries | |
# next init sidechain di/hedra | |
if self.lc is not None: | |
sidechain = ic_data_sidechains.get(self.lc, []) | |
for edraLong in sidechain: | |
edra = edraLong[0:4] # [4] is label on some sidechain table entries | |
# lots of H di/hedra can be avoided if don't have those atoms | |
if all(atm in self.akc for atm in edra): | |
r_edra = [self.rak(atom) for atom in edra] | |
self._gen_edra(r_edra) | |
if ( | |
IC_Residue._AllBonds | |
): # openscad output needs all bond cylinders explicit | |
sidechain = ic_data_sidechain_extras.get(self.lc, []) | |
for edra in sidechain: | |
# test less useful here but avoids populating rak cache if | |
# possible | |
if all(atm in self.akc for atm in edra): | |
r_edra = [self.rak(atom) for atom in edra] | |
self._gen_edra(r_edra) | |
# create di/hedra for gly Cbeta if needed, populate values later | |
if self.gly_Cbeta and "G" == self.lc: | |
# add C-beta for Gly | |
self.ak_set.add(AtomKey(self, "CB")) | |
sCB = self.rak("CB") | |
sCB.missing = False # was True because akc cache did not have entry | |
self.cic.akset.add(sCB) | |
# main orientation comes from O-C-Ca-Cb so make Cb-Ca-C hedron | |
sO = self.rak("O") | |
htpl = (sCB, sCA, sC) | |
self._gen_edra(htpl) | |
# generate dihedral based on N-Ca-C-O offset from db query above | |
dtpl = (sO, sC, sCA, sCB) | |
self._gen_edra(dtpl) | |
d = self.dihedra[dtpl] | |
d.ric = self | |
d._set_hedra() | |
# prepare to add new Gly CB atom(s) | |
# in IC_Chain.atom_to_internal_coordinates() | |
if not hasattr(self.cic, "gcb"): | |
self.cic.gcb = {} | |
self.cic.gcb[sCB] = dtpl | |
# final processing of all dihedra just generated | |
self._link_dihedra(verbose) # re-run for new dihedra | |
if verbose: | |
# oAtom = | |
self.rak("O") # trigger missing flag if needed | |
missing = [] | |
for akk, akv in self.akc.items(): | |
if isinstance(akk, str) and akv.missing: | |
missing.append(akv) | |
if missing: | |
chn = self.residue.parent | |
chn_id = chn.id | |
chn_len = len(chn.internal_coord.ordered_aa_ic_list) | |
print(f"chain {chn_id} len {chn_len} missing atom(s): {missing}") | |
# rtm | |
atom_sernum = None | |
atom_chain = None | |
def _pdb_atom_string(atm: Atom, cif_extend: bool = False) -> str: | |
"""Generate PDB ATOM record. | |
:param Atom atm: Biopython Atom object reference | |
:param IC_Residue.atom_sernum: Class variable default None. | |
override atom serial number if not None | |
:param IC_Residue.atom_chain: Class variable default None. | |
override atom chain id if not None | |
""" | |
if 2 == atm.is_disordered(): | |
if IC_Residue.no_altloc: | |
return IC_Residue._pdb_atom_string(atm.selected_child, cif_extend) | |
s = "" | |
for a in atm.child_dict.values(): | |
s += IC_Residue._pdb_atom_string(a, cif_extend) | |
return s | |
else: | |
res = atm.parent | |
chn = res.parent | |
fmt = "{:6}{:5d} {:4}{:1}{:3} {:1}{:4}{:1} {:8.3f}{:8.3f}{:8.3f}{:6.2f}{:6.2f} {:>4}\n" | |
if cif_extend: | |
fmt = "{:6}{:5d} {:4}{:1}{:3} {:1}{:4}{:1} {:10.5f}{:10.5f}{:10.5f}{:7.3f}{:6.2f} {:>4}\n" | |
s = (fmt).format( | |
"ATOM", | |
IC_Residue.atom_sernum | |
if IC_Residue.atom_sernum is not None | |
else atm.serial_number, | |
atm.fullname, | |
atm.altloc, | |
res.resname, | |
IC_Residue.atom_chain if IC_Residue.atom_chain is not None else chn.id, | |
res.id[1], | |
res.id[2], | |
atm.coord[0], | |
atm.coord[1], | |
atm.coord[2], | |
atm.occupancy, | |
atm.bfactor, | |
atm.element, | |
) | |
# print(s) | |
return s | |
# rtm | |
def pdb_residue_string(self) -> str: | |
"""Generate PDB ATOM records for this residue as string. | |
Convenience method for functionality not exposed in PDBIO.py. | |
Increments :data:`IC_Residue.atom_sernum` if not None | |
:param IC_Residue.atom_sernum: Class variable default None. | |
Override and increment atom serial number if not None | |
:param IC_Residue.atom_chain: Class variable. | |
Override atom chain id if not None | |
.. todo:: | |
move to PDBIO | |
""" | |
str = "" | |
atomArrayIndex = self.cic.atomArrayIndex | |
bpAtomArray = self.cic.bpAtomArray | |
respos = self.rbase[0] | |
resposNdx = AtomKey.fields.respos | |
for ak in sorted(self.ak_set): | |
if int(ak.akl[resposNdx]) == respos: # skip rnext atoms | |
str += IC_Residue._pdb_atom_string(bpAtomArray[atomArrayIndex[ak]]) | |
if IC_Residue.atom_sernum is not None: | |
IC_Residue.atom_sernum += 1 | |
return str | |
def _residue_string(res: "Residue") -> str: | |
"""Generate PIC Residue string. | |
Enough to create Biopython Residue object without actual Atoms. | |
:param Residue res: Biopython Residue object reference | |
""" | |
segid = res.get_segid() | |
if segid.isspace() or "" == segid: | |
segid = "" | |
else: | |
segid = " [" + segid + "]" | |
return str(res.get_full_id()) + " " + res.resname + segid + "\n" | |
_pfDef = namedtuple( | |
# general supersedes specific, so pomg + omg = omg, tau + hedra = hedra | |
"_pfDef", | |
[ | |
"psi", # _b[0] | |
"omg", | |
"phi", | |
"tau", # tau hedron (N-Ca-C) | |
"chi1", | |
"chi2", | |
"chi3", | |
"chi4", | |
"chi5", | |
"pomg", # _b[9] : proline omega | |
"chi", # chi1 | ... | chi5 | |
"classic_b", # psi | phi | tau | pomg | |
"classic", # classic_b | chi | |
"hedra", # _b[10] : all hedra | |
"primary", # _b[11] : all primary dihedra | |
"secondary", # _b[12] : all secondary dihedra | |
"all", # hedra | primary | secondary | |
"initAtoms", # _b[13] : XYZ coordinates of initial Tau (N-Ca-C) | |
"bFactors", # _b[14] | |
], | |
) | |
_b = [1 << i for i in range(16)] | |
_bChi = _b[4] | _b[5] | _b[6] | _b[7] | _b[8] | |
_bClassB = _b[0] | _b[2] | _b[3] | _b[9] | |
_bClass = _bClassB | _bChi | |
_bAll = _b[10] | _b[11] | _b[12] | |
pic_flags = _pfDef( | |
_b[0], | |
_b[1], | |
_b[2], | |
_b[3], | |
_b[4], | |
_b[5], | |
_b[6], | |
_b[7], | |
_b[8], | |
_b[9], | |
_bChi, | |
_bClassB, | |
_bClass, | |
_b[10], | |
_b[11], | |
_b[12], | |
_bAll, | |
_b[13], | |
_b[14], | |
) | |
"""Used by :func:`.PICIO.write_PIC` to control classes of values to be defaulted.""" | |
picFlagsDefault = pic_flags.all | pic_flags.initAtoms | pic_flags.bFactors | |
"""Default is all dihedra + initial tau atoms + bFactors.""" | |
picFlagsDict = pic_flags._asdict() | |
"""Dictionary of pic_flags values to use as needed.""" | |
def _write_pic_bfac(self, atm: Atom, s: str, col: int) -> Tuple[str, int]: | |
ak = self.rak(atm) | |
if 0 == col % 5: | |
s += "BFAC:" | |
s += " " + ak.id + " " + f"{atm.get_bfactor():6.2f}" | |
col += 1 | |
if 0 == col % 5: | |
s += "\n" | |
return s, col | |
def _write_PIC( | |
self, | |
pdbid: str = "0PDB", | |
chainid: str = "A", | |
picFlags: int = picFlagsDefault, | |
hCut: Optional[Union[float, None]] = None, | |
pCut: Optional[Union[float, None]] = None, | |
) -> str: | |
"""Write PIC format lines for this residue. | |
See :func:`.PICIO.write_PIC`. | |
:param str pdbid: PDB idcode string; default 0PDB | |
:param str chainid: PDB Chain ID character; default A | |
:param int picFlags: control details written to PIC file; see | |
:meth:`.PICIO.write_PIC` | |
:param float hCut: only write hedra with ref db angle std dev > this | |
value; default None | |
:param float pCut: only write primary dihedra with ref db angle | |
std dev > this value; default None | |
""" | |
pAcc = IC_Residue.pic_accuracy | |
if pdbid is None: | |
pdbid = "0PDB" | |
if chainid is None: | |
chainid = "A" | |
icr = IC_Residue | |
s = icr._residue_string(self.residue) | |
if ( | |
picFlags & icr.pic_flags.initAtoms | |
and 0 == len(self.rprev) # no prev residue | |
and hasattr(self, "NCaCKey") | |
and self.NCaCKey is not None # have valid NCacKey | |
# N coords valid (e.g. not all 0.00) | |
and not (np.all(self.residue["N"].coord == self.residue["N"].coord[0])) | |
): | |
NCaChedron = self.pick_angle(self.NCaCKey[0]) # first tau | |
if NCaChedron is not None: | |
try: | |
ts = IC_Residue._pdb_atom_string(self.residue["N"], cif_extend=True) | |
ts += IC_Residue._pdb_atom_string( | |
self.residue["CA"], cif_extend=True | |
) | |
ts += IC_Residue._pdb_atom_string( | |
self.residue["C"], cif_extend=True | |
) | |
s += ts # only if no exception: have all 3 atoms | |
except KeyError: | |
pass | |
base = pdbid + " " + chainid + " " | |
cic = self.cic | |
if picFlags & icr.pic_flags.hedra or picFlags & icr.pic_flags.tau: | |
for h in sorted(self.hedra.values()): | |
if ( | |
not picFlags & icr.pic_flags.hedra # not all hedra | |
and picFlags & icr.pic_flags.tau # but yes tau hedron | |
and h.e_class != "NCAC" # and is not tau | |
): | |
continue | |
if hCut is not None: | |
hc = h.xrh_class if hasattr(h, "xrh_class") else h.e_class | |
if hc in hedra_defaults and hedra_defaults[hc][1] <= hCut: | |
continue | |
hndx = h.ndx | |
try: | |
s += ( | |
base | |
+ h.id | |
+ " " | |
+ f"{cic.hedraL12[hndx]:{pAcc}} {cic.hedraAngle[hndx]:{pAcc}} {cic.hedraL23[hndx]:{pAcc}}" | |
+ "\n" | |
) | |
except KeyError: | |
pass | |
for d in sorted(self.dihedra.values()): | |
if d.primary: | |
if not picFlags & icr.pic_flags.primary: | |
# primary and not primary flag so keep checking filters | |
# db = d.bits() | |
if not picFlags & d.bits(): | |
continue | |
elif not picFlags & icr.pic_flags.secondary: | |
continue # secondary and flag not set -> skip | |
if pCut is not None: | |
if ( | |
d.primary | |
and d.pclass in dihedra_primary_defaults | |
and dihedra_primary_defaults[d.pclass][1] <= pCut | |
): | |
continue | |
try: | |
s += base + d.id + " " + f"{cic.dihedraAngle[d.ndx]:{pAcc}}" + "\n" | |
except KeyError: | |
pass | |
if picFlags & icr.pic_flags.bFactors: | |
col = 0 | |
for a in sorted(self.residue.get_atoms()): | |
if 2 == a.is_disordered(): | |
if IC_Residue.no_altloc or self.alt_ids is None: | |
s, col = self._write_pic_bfac(a.selected_child, s, col) | |
else: | |
for atm in a.child_dict.values(): | |
s, col = self._write_pic_bfac(atm, s, col) | |
else: | |
s, col = self._write_pic_bfac(a, s, col) | |
if 0 != col % 5: | |
s += "\n" | |
return s | |
def _get_ak_tuple(self, ak_str: str) -> Optional[Tuple["AtomKey", ...]]: | |
"""Convert atom pair string to AtomKey tuple. | |
:param str ak_str: | |
Two atom names separated by ':', e.g. 'N:CA' | |
Optional position specifier relative to self, | |
e.g. '-1C:N' for preceding peptide bond. | |
""" | |
AK = AtomKey | |
S = self | |
angle_key2 = [] | |
akstr_list = ak_str.split(":") | |
lenInput = len(akstr_list) | |
for a in akstr_list: | |
m = self._relative_atom_re.match(a) | |
if m: | |
if m.group(1) == "-1": | |
if 0 < len(S.rprev): | |
angle_key2.append(AK(S.rprev[0], m.group(2))) | |
elif m.group(1) == "1": | |
if 0 < len(S.rnext): | |
angle_key2.append(AK(S.rnext[0], m.group(2))) | |
elif m.group(1) == "0": | |
angle_key2.append(self.rak(m.group(2))) | |
else: | |
angle_key2.append(self.rak(a)) | |
if len(angle_key2) != lenInput: | |
return None | |
return tuple(angle_key2) | |
_relative_atom_re = re.compile(r"^(-?[10])([A-Z]+)$") | |
def _get_angle_for_tuple( | |
self, angle_key: EKT | |
) -> Optional[Union["Hedron", "Dihedron"]]: | |
len_mkey = len(angle_key) | |
rval: Optional[Union["Hedron", "Dihedron"]] | |
if 4 == len_mkey: | |
rval = self.dihedra.get(cast(DKT, angle_key), None) | |
elif 3 == len_mkey: | |
rval = self.hedra.get(cast(HKT, angle_key), None) | |
else: | |
return None | |
return rval | |
# @profile | |
def pick_angle( | |
self, angle_key: Union[EKT, str] | |
) -> Optional[Union["Hedron", "Dihedron"]]: | |
"""Get Hedron or Dihedron for angle_key. | |
:param angle_key: | |
- tuple of 3 or 4 AtomKeys | |
- string of atom names ('CA') separated by :'s | |
- string of [-1, 0, 1]<atom name> separated by ':'s. -1 is | |
previous residue, 0 is this residue, 1 is next residue | |
- psi, phi, omg, omega, chi1, chi2, chi3, chi4, chi5 | |
- tau (N-CA-C angle) see Richardson1981 | |
- tuples of AtomKeys is only access for alternate disordered atoms | |
Observe that a residue's phi and omega dihedrals, as well as the hedra | |
comprising them (including the N:Ca:C `tau` hedron), are stored in the | |
n-1 di/hedra sets; this overlap is handled here, but may be an issue if | |
accessing directly. | |
The following print commands are equivalent (except for sidechains with | |
non-carbon atoms for chi2):: | |
ric = r.internal_coord | |
print( | |
r, | |
ric.get_angle("psi"), | |
ric.get_angle("phi"), | |
ric.get_angle("omg"), | |
ric.get_angle("tau"), | |
ric.get_angle("chi2"), | |
) | |
print( | |
r, | |
ric.get_angle("N:CA:C:1N"), | |
ric.get_angle("-1C:N:CA:C"), | |
ric.get_angle("-1CA:-1C:N:CA"), | |
ric.get_angle("N:CA:C"), | |
ric.get_angle("CA:CB:CG:CD"), | |
) | |
See ic_data.py for detail of atoms in the enumerated sidechain angles | |
and the backbone angles which do not span the peptide bond. Using 's' | |
for current residue ('self') and 'n' for next residue, the spanning | |
(overlapping) angles are:: | |
(sN, sCA, sC, nN) # psi | |
(sCA, sC, nN, nCA) # omega i+1 | |
(sC, nN, nCA, nC) # phi i+1 | |
(sCA, sC, nN) | |
(sC, nN, nCA) | |
(nN, nCA, nC) # tau i+1 | |
:return: Matching Hedron, Dihedron, or None. | |
""" | |
rval: Optional[Union["Hedron", "Dihedron"]] = None | |
if isinstance(angle_key, tuple): | |
rval = self._get_angle_for_tuple(angle_key) | |
if rval is None and self.rprev: | |
rval = self.rprev[0]._get_angle_for_tuple(angle_key) | |
elif ":" in angle_key: | |
angle_key = cast(EKT, self._get_ak_tuple(cast(str, angle_key))) | |
if angle_key is None: | |
return None | |
rval = self._get_angle_for_tuple(angle_key) | |
if rval is None and self.rprev: | |
rval = self.rprev[0]._get_angle_for_tuple(angle_key) | |
elif "psi" == angle_key: | |
if 0 == len(self.rnext): | |
return None | |
rn = self.rnext[0] | |
sN, sCA, sC = self.rak("N"), self.rak("CA"), self.rak("C") | |
nN = rn.rak("N") | |
rval = self.dihedra.get((sN, sCA, sC, nN), None) | |
elif "phi" == angle_key: | |
if 0 == len(self.rprev): | |
return None | |
rp = self.rprev[0] | |
pC, sN, sCA = rp.rak("C"), self.rak("N"), self.rak("CA") | |
sC = self.rak("C") | |
rval = rp.dihedra.get((pC, sN, sCA, sC), None) | |
elif "omg" == angle_key or "omega" == angle_key: | |
if 0 == len(self.rprev): | |
return None | |
rp = self.rprev[0] | |
pCA, pC, sN = rp.rak("CA"), rp.rak("C"), self.rak("N") | |
sCA = self.rak("CA") | |
rval = rp.dihedra.get((pCA, pC, sN, sCA), None) | |
elif "tau" == angle_key: | |
sN, sCA, sC = self.rak("N"), self.rak("CA"), self.rak("C") | |
rval = self.hedra.get((sN, sCA, sC), None) | |
if rval is None and 0 != len(self.rprev): | |
rp = self.rprev[0] # tau in prev residue for all but first | |
rval = rp.hedra.get((sN, sCA, sC), None) | |
elif angle_key.startswith("chi"): | |
sclist = ic_data_sidechains.get(self.lc, None) | |
if sclist is None: | |
return None | |
ndx = (2 * int(angle_key[-1])) - 1 | |
try: | |
akl = sclist[ndx] | |
if akl[4] == angle_key: | |
klst = [self.rak(a) for a in akl[0:4]] | |
tklst = cast(DKT, tuple(klst)) | |
rval = self.dihedra.get(tklst, None) | |
else: | |
return None | |
except IndexError: | |
return None | |
return rval | |
def get_angle(self, angle_key: Union[EKT, str]) -> Optional[float]: | |
"""Get dihedron or hedron angle for specified key. | |
See :meth:`.pick_angle` for key specifications. | |
""" | |
edron = self.pick_angle(angle_key) | |
if edron: | |
return edron.angle | |
return None | |
def set_angle(self, angle_key: Union[EKT, str], v: float): | |
"""Set dihedron or hedron angle for specified key. | |
See :meth:`.pick_angle` for key specifications. | |
""" | |
edron = self.pick_angle(angle_key) | |
if edron is not None: | |
edron.angle = v | |
def _do_bond_rotate(self, base: "Dihedron", delta: float): | |
"""Find and modify related dihedra through id3_dh_index.""" | |
try: | |
for dk in self.cic.id3_dh_index[base.id3]: | |
# change all diheds with same first hedron | |
dihed = self.dihedra[dk] | |
dihed.angle += delta # +/- 180 handled in setter | |
# for changed dihed, change any with reverse key 2nd hedron | |
# so change N-Ca-C-N will change O-Ca-C-Cb | |
try: | |
for d2rk in self.cic.id3_dh_index[dihed.id32[::-1]]: | |
self.dihedra[d2rk].angle += delta | |
except KeyError: | |
pass | |
except AttributeError: | |
raise RuntimeError("bond_rotate, bond_set only for dihedral angles") | |
def bond_rotate(self, angle_key: Union[EKT, str], delta: float): | |
"""Rotate set of overlapping dihedrals by delta degrees. | |
See :meth:`.pick_angle` for key specifications. | |
""" | |
base = self.pick_angle(angle_key) | |
self._do_bond_rotate(base, delta) | |
def bond_set(self, angle_key: Union[EKT, str], val: float): | |
"""Set dihedron to val, update overlapping dihedra by same amount. | |
See :meth:`.pick_angle` for key specifications. | |
""" | |
base = self.pick_angle(angle_key) | |
delta = Dihedron.angle_dif(base.angle, val) | |
self._do_bond_rotate(base, delta) | |
def pick_length( | |
self, ak_spec: Union[str, BKT] | |
) -> Tuple[Optional[List["Hedron"]], Optional[BKT]]: | |
"""Get list of hedra containing specified atom pair. | |
:param ak_spec: | |
- tuple of two AtomKeys | |
- string: two atom names separated by ':', e.g. 'N:CA' with | |
optional position specifier relative to self, e.g. '-1C:N' for | |
preceding peptide bond. Position specifiers are -1, 0, 1. | |
The following are equivalent:: | |
ric = r.internal_coord | |
print( | |
r, | |
ric.get_length("0C:1N"), | |
) | |
print( | |
r, | |
None | |
if not ric.rnext | |
else ric.get_length((ric.rak("C"), ric.rnext[0].rak("N"))), | |
) | |
If atom not found on current residue then will look on rprev[0] to | |
handle cases like Gly N:CA. For finer control please access | |
`IC_Chain.hedra` directly. | |
:return: list of hedra containing specified atom pair as tuples of | |
AtomKeys | |
""" | |
rlst: List[Hedron] = [] | |
# if ":" in ak_spec: | |
if isinstance(ak_spec, str): | |
ak_spec = cast(BKT, self._get_ak_tuple(ak_spec)) | |
if ak_spec is None: | |
return None, None | |
for hed_key, hed_val in self.hedra.items(): | |
if all(ak in hed_key for ak in ak_spec): | |
rlst.append(hed_val) | |
# handle bonds stored on rprev, e.g. set backbone, read gly N:CA | |
for rp in self.rprev: | |
for hed_key, hed_val in rp.hedra.items(): | |
if all(ak in hed_key for ak in ak_spec): | |
rlst.append(hed_val) | |
return rlst, ak_spec | |
def get_length(self, ak_spec: Union[str, BKT]) -> Optional[float]: | |
"""Get bond length for specified atom pair. | |
See :meth:`.pick_length` for ak_spec and details. | |
""" | |
hed_lst, ak_spec2 = self.pick_length(ak_spec) | |
if hed_lst is None or ak_spec2 is None: | |
return None | |
for hed in hed_lst: | |
val = hed.get_length(ak_spec2) | |
if val is not None: | |
return val | |
return None | |
def set_length(self, ak_spec: Union[str, BKT], val: float) -> None: | |
"""Set bond length for specified atom pair. | |
See :meth:`.pick_length` for ak_spec. | |
""" | |
hed_lst, ak_spec2 = self.pick_length(ak_spec) | |
if hed_lst is not None and ak_spec2 is not None: | |
for hed in hed_lst: | |
hed.set_length(ak_spec2, val) | |
def applyMtx(self, mtx: np.array) -> None: | |
"""Apply matrix to atom_coords for this IC_Residue.""" | |
aa = self.cic.atomArray | |
aai = self.cic.atomArrayIndex | |
rpndx = AtomKey.fields.respos | |
rp = str(self.rbase[0]) | |
aselect = [aai.get(k) for k in aai.keys() if k.akl[rpndx] == rp] | |
aas = aa[aselect] | |
# numpy will broadcast the transform matrix over all points if dot() | |
# applied in this order | |
aa[aselect] = aas.dot(mtx.transpose()) | |
""" | |
# slower way, one at a time | |
for ak in sorted(self.ak_set): | |
ndx = self.cic.atomArrayIndex[ak] | |
self.cic.atomArray[ndx] = mtx.dot(self.cic.atomArray[ndx]) | |
""" | |
class Edron: | |
"""Base class for Hedron and Dihedron classes. | |
Supports rich comparison based on lists of AtomKeys. | |
Attributes | |
---------- | |
atomkeys: tuple | |
3 (hedron) or 4 (dihedron) :class:`.AtomKey` s defining this di/hedron | |
id: str | |
':'-joined string of AtomKeys for this di/hedron | |
needs_update: bool | |
indicates di/hedron local atom_coords do NOT reflect current di/hedron | |
angle and length values in hedron local coordinate space | |
e_class: str | |
sequence of atoms (no position or residue) comprising di/hedron | |
for statistics | |
re_class: str | |
sequence of residue, atoms comprising di/hedron for statistics | |
cre_class: str | |
sequence of covalent radii classses comprising di/hedron for statistics | |
edron_re: compiled regex (Class Attribute) | |
A compiled regular expression matching string IDs for Hedron | |
and Dihedron objects | |
cic: IC_Chain reference | |
Chain internal coords object containing this hedron | |
ndx: int | |
index into IC_Chain level numpy data arrays for di/hedra. | |
Set in :meth:`IC_Chain.init_edra` | |
rc: int | |
number of residues involved in this edron | |
Methods | |
------- | |
gen_key([AtomKey, ...] or AtomKey, ...) (Static Method) | |
generate a ':'-joined string of AtomKey Ids | |
is_backbone() | |
Return True if all atomkeys atoms are N, Ca, C or O | |
""" | |
# regular expression to capture hedron and dihedron specifications, as in | |
# .pic files | |
edron_re = re.compile( | |
# pdbid and chain id | |
r"^(?P<pdbid>\w+)?\s(?P<chn>[\w|\s])?\s" | |
# 3 atom specifiers for hedron | |
r"(?P<a1>[\w\-\.]+):(?P<a2>[\w\-\.]+):(?P<a3>[\w\-\.]+)" | |
# 4th atom specifier for dihedron | |
r"(:(?P<a4>[\w\-\.]+))?" | |
r"\s+" | |
# len-angle-len for hedron | |
r"(((?P<len12>\S+)\s+(?P<angle>\S+)\s+(?P<len23>\S+)\s*$)|" | |
# dihedral angle for dihedron | |
r"((?P<dihedral>\S+)\s*$))" | |
) | |
""" A compiled regular expression matching string IDs for Hedron and | |
Dihedron objects""" | |
def gen_key(lst: List["AtomKey"]) -> str: | |
"""Generate string of ':'-joined AtomKey strings from input. | |
Generate '2_A_C:3_P_N:3_P_CA' from (2_A_C, 3_P_N, 3_P_CA) | |
:param list lst: list of AtomKey objects | |
""" | |
if 4 == len(lst): | |
return f"{lst[0].id}:{lst[1].id}:{lst[2].id}:{lst[3].id}" | |
else: | |
return f"{lst[0].id}:{lst[1].id}:{lst[2].id}" | |
def gen_tuple(akstr: str) -> Tuple: | |
"""Generate AtomKey tuple for ':'-joined AtomKey string. | |
Generate (2_A_C, 3_P_N, 3_P_CA) from '2_A_C:3_P_N:3_P_CA' | |
:param str akstr: string of ':'-separated AtomKey strings | |
""" | |
return tuple([AtomKey(i) for i in akstr.split(":")]) | |
# @profile | |
def __init__(self, *args: Union[List["AtomKey"], EKT], **kwargs: str) -> None: | |
"""Initialize Edron with sequence of AtomKeys. | |
Acceptable input: | |
[ AtomKey, ... ] : list of AtomKeys | |
AtomKey, ... : sequence of AtomKeys as args | |
{'a1': str, 'a2': str, ... } : dict of AtomKeys as 'a1', 'a2' ... | |
""" | |
atomkeys: List[AtomKey] = [] | |
for arg in args: | |
if isinstance(arg, list): | |
atomkeys = arg | |
elif isinstance(arg, tuple): | |
atomkeys = list(arg) | |
else: | |
if arg is not None: | |
atomkeys.append(arg) | |
if [] == atomkeys and all(k in kwargs for k in ("a1", "a2", "a3")): | |
atomkeys = [ | |
AtomKey(kwargs["a1"]), | |
AtomKey(kwargs["a2"]), | |
AtomKey(kwargs["a3"]), | |
] | |
if "a4" in kwargs and kwargs["a4"] is not None: | |
atomkeys.append(AtomKey(kwargs["a4"])) | |
self.atomkeys = tuple(atomkeys) | |
self.id = Edron.gen_key(atomkeys) | |
self._hash = hash(self.atomkeys) | |
# flag indicating that atom coordinates are up to date | |
# (do not need to be recalculated from angle and or length values) | |
self.needs_update = True | |
# IC_Chain which contains this di/hedron | |
self.cic: IC_Chain # set in :meth:`IC_Residue._link_dihedra` | |
# no residue or position, just atoms | |
self.e_class = "" | |
# same but residue specific | |
self.re_class = "" | |
self.cre_class = "" | |
rset = set() # what residues this involves | |
atmNdx = AtomKey.fields.atm | |
resNdx = AtomKey.fields.resname | |
resPos = AtomKey.fields.respos | |
icode = AtomKey.fields.icode | |
for ak in atomkeys: | |
akl = ak.akl | |
self.e_class += akl[atmNdx] | |
self.re_class += akl[resNdx] + akl[atmNdx] | |
rset.add(akl[resPos] + (akl[icode] or "")) | |
self.cre_class += ak.cr_class() | |
self.rc = len(rset) | |
def __deepcopy__(self, memo): | |
"""Deep copy implementation for Edron.""" | |
existing = memo.get(id(self), False) | |
if existing: | |
return existing | |
dup = type(self).__new__(self.__class__) | |
memo[id(self)] = dup | |
dup.__dict__.update(self.__dict__) # mostly static attribs | |
dup.cic = memo[id(self.cic)] | |
dup.atomkeys = copy.deepcopy(self.atomkeys, memo) | |
return dup | |
def __contains__(self, ak: "AtomKey") -> bool: | |
"""Return True if atomkey is in this edron.""" | |
return ak in self.atomkeys | |
def is_backbone(self) -> bool: | |
"""Report True for contains only N, C, CA, O, H atoms.""" | |
return all(ak.is_backbone() for ak in self.atomkeys) | |
def __repr__(self) -> str: | |
"""Tuple of AtomKeys is default repr string.""" | |
return str(self.atomkeys) | |
def __hash__(self) -> int: | |
"""Hash calculated at init from atomkeys tuple.""" | |
return self._hash | |
def _cmp(self, other: "Edron") -> Union[Tuple["AtomKey", "AtomKey"], bool]: | |
"""Comparison function ranking self vs. other; False on equal. | |
Priority is lowest value for sort: psi < chi1. | |
""" | |
for ak_s, ak_o in zip(self.atomkeys, other.atomkeys): | |
if ak_s != ak_o: | |
return ak_s, ak_o | |
return False | |
def __eq__(self, other: object) -> bool: | |
"""Test for equality.""" | |
if not isinstance(other, type(self)): | |
return NotImplemented | |
return self.id == other.id | |
def __ne__(self, other: object) -> bool: | |
"""Test for inequality.""" | |
if not isinstance(other, type(self)): | |
return NotImplemented | |
return self.id != other.id | |
def __gt__(self, other: object) -> bool: | |
"""Test greater than.""" | |
if not isinstance(other, type(self)): | |
return NotImplemented | |
rslt = self._cmp(other) | |
if rslt: | |
rslt = cast(Tuple[AtomKey, AtomKey], rslt) | |
return rslt[0] > rslt[1] | |
return False | |
def __ge__(self, other: object) -> bool: | |
"""Test greater or equal.""" | |
if not isinstance(other, type(self)): | |
return NotImplemented | |
rslt = self._cmp(other) | |
if rslt: | |
rslt = cast(Tuple[AtomKey, AtomKey], rslt) | |
return rslt[0] >= rslt[1] | |
return True | |
def __lt__(self, other: object) -> bool: | |
"""Test less than.""" | |
if not isinstance(other, type(self)): | |
return NotImplemented | |
rslt = self._cmp(other) | |
if rslt: | |
rslt = cast(Tuple[AtomKey, AtomKey], rslt) | |
return rslt[0] < rslt[1] | |
return False | |
def __le__(self, other: object) -> bool: | |
"""Test less or equal.""" | |
if not isinstance(other, type(self)): | |
return NotImplemented | |
rslt = self._cmp(other) | |
if rslt: | |
rslt = cast(Tuple[AtomKey, AtomKey], rslt) | |
return rslt[0] <= rslt[1] | |
return True | |
class Hedron(Edron): | |
"""Class to represent three joined atoms forming a plane. | |
Contains atom coordinates in local coordinate space: central atom | |
at origin, one terminal atom on XZ plane, and the other on the +Z axis. | |
Stored in two orientations, with the 3rd (forward) or first (reversed) | |
atom on the +Z axis. See :class:`Dihedron` for use of forward and | |
reverse orientations. | |
Attributes | |
---------- | |
len12: float | |
distance between first and second atoms | |
len23: float | |
distance between second and third atoms | |
angle: float | |
angle (degrees) formed by three atoms in hedron | |
xrh_class: string | |
only for hedron spanning 2 residues, will have 'X' for residue | |
contributing only one atom | |
Methods | |
------- | |
get_length() | |
get bond length for specified atom pair | |
set_length() | |
set bond length for specified atom pair | |
angle(), len12(), len23() | |
setters for relevant attributes (angle in degrees) | |
""" | |
def __init__(self, *args: Union[List["AtomKey"], HKT], **kwargs: str) -> None: | |
"""Initialize Hedron with sequence of AtomKeys, kwargs. | |
Acceptable input: | |
As for Edron, plus optional 'len12', 'angle', 'len23' | |
keyworded values. | |
""" | |
super().__init__(*args, **kwargs) | |
if self.rc == 2: # hedron crosses residue boundary | |
resPos = AtomKey.fields.respos | |
icode = AtomKey.fields.icode | |
resNdx = AtomKey.fields.resname | |
atmNdx = AtomKey.fields.atm | |
akl0, akl1 = self.atomkeys[0].akl, self.atomkeys[1].akl | |
if akl0[resPos] != akl1[resPos] or akl0[icode] != akl1[icode]: | |
self.xrh_class = "X" + self.re_class[1:] | |
else: | |
xrhc = "" | |
for i in range(2): | |
xrhc += self.atomkeys[i].akl[resNdx] + self.atomkeys[i].akl[atmNdx] | |
self.xrh_class = xrhc + "X" + self.atomkeys[2].akl[atmNdx] | |
# __deepcopy__ covered by Edron superclass | |
def __repr__(self) -> str: | |
"""Print string for Hedron object.""" | |
return ( | |
f"3-{self.id} {self.re_class} {str(self.len12)} " | |
f"{str(self.angle)} {str(self.len23)}" | |
) | |
def angle(self) -> float: | |
"""Get this hedron angle.""" | |
try: | |
return self.cic.hedraAngle[self.ndx] | |
except AttributeError: | |
return 0.0 | |
def _invalidate_atoms(self): | |
self.cic.hAtoms_needs_update[self.ndx] = True | |
for ak in self.atomkeys: | |
self.cic.atomArrayValid[self.cic.atomArrayIndex[ak]] = False | |
def angle(self, angle_deg) -> None: | |
"""Set this hedron angle; sets needs_update.""" | |
self.cic.hedraAngle[self.ndx] = angle_deg | |
self.cic.hAtoms_needs_update[self.ndx] = True | |
self.cic.atomArrayValid[self.cic.atomArrayIndex[self.atomkeys[2]]] = False | |
def len12(self): | |
"""Get first length for Hedron.""" | |
try: | |
return self.cic.hedraL12[self.ndx] | |
except AttributeError: | |
return 0.0 | |
def len12(self, len): | |
"""Set first length for Hedron; sets needs_update.""" | |
self.cic.hedraL12[self.ndx] = len | |
self.cic.hAtoms_needs_update[self.ndx] = True | |
self.cic.atomArrayValid[self.cic.atomArrayIndex[self.atomkeys[1]]] = False | |
self.cic.atomArrayValid[self.cic.atomArrayIndex[self.atomkeys[2]]] = False | |
def len23(self) -> float: | |
"""Get second length for Hedron.""" | |
try: | |
return self.cic.hedraL23[self.ndx] | |
except AttributeError: | |
return 0.0 | |
def len23(self, len): | |
"""Set second length for Hedron; sets needs_update.""" | |
self.cic.hedraL23[self.ndx] = len | |
self.cic.hAtoms_needs_update[self.ndx] = True | |
self.cic.atomArrayValid[self.cic.atomArrayIndex[self.atomkeys[2]]] = False | |
def get_length(self, ak_tpl: BKT) -> Optional[float]: | |
"""Get bond length for specified atom pair. | |
:param tuple ak_tpl: tuple of AtomKeys. | |
Pair of atoms in this Hedron | |
""" | |
if 2 > len(ak_tpl): | |
return None | |
if all(ak in self.atomkeys[:2] for ak in ak_tpl): | |
return self.cic.hedraL12[self.ndx] | |
if all(ak in self.atomkeys[1:] for ak in ak_tpl): | |
return self.cic.hedraL23[self.ndx] | |
return None | |
def set_length(self, ak_tpl: BKT, newLength: float): | |
"""Set bond length for specified atom pair; sets needs_update. | |
:param tuple .ak_tpl: tuple of AtomKeys | |
Pair of atoms in this Hedron | |
""" | |
if 2 > len(ak_tpl): | |
raise TypeError(f"Require exactly 2 AtomKeys: {str(ak_tpl)}") | |
elif all(ak in self.atomkeys[:2] for ak in ak_tpl): | |
self.cic.hedraL12[self.ndx] = newLength | |
elif all(ak in self.atomkeys[1:] for ak in ak_tpl): | |
self.cic.hedraL23[self.ndx] = newLength | |
else: | |
raise TypeError("%s not found in %s" % (str(ak_tpl), self)) | |
self._invalidate_atoms() | |
class Dihedron(Edron): | |
"""Class to represent four joined atoms forming a dihedral angle. | |
Attributes | |
---------- | |
angle: float | |
Measurement or specification of dihedral angle in degrees; prefer | |
:meth:`IC_Residue.bond_set` to set | |
hedron1, hedron2: Hedron object references | |
The two hedra which form the dihedral angle | |
h1key, h2key: tuples of AtomKeys | |
Hash keys for hedron1 and hedron2 | |
id3,id32: tuples of AtomKeys | |
First 3 and second 3 atoms comprising dihedron; hxkey orders may differ | |
ric: IC_Residue object reference | |
:class:`.IC_Residue` object containing this dihedral | |
reverse: bool | |
Indicates order of atoms in dihedron is reversed from order of atoms | |
in hedra | |
primary: bool | |
True if this is psi, phi, omega or a sidechain chi angle | |
pclass: string (primary angle class) | |
re_class with X for adjacent residue according to nomenclature | |
(psi, omega, phi) | |
cst, rcst: numpy [4][4] arrays | |
transformations to (cst) and from (rcst) Dihedron coordinate space | |
defined with atom 2 (Hedron 1 center atom) at the origin. Views on | |
:data:`IC_Chain.dCoordSpace`. | |
Methods | |
------- | |
angle() | |
getter/setter for dihdral angle in degrees; prefer | |
:meth:`IC_Residue.bond_set` | |
bits() | |
return :data:`IC_Residue.pic_flags` bitmask for dihedron psi, omega, etc | |
""" | |
def __init__(self, *args: Union[List["AtomKey"], DKT], **kwargs: str) -> None: | |
"""Init Dihedron with sequence of AtomKeys and optional dihedral angle. | |
Acceptable input: | |
As for Edron, plus optional 'dihedral' keyworded angle value. | |
""" | |
super().__init__(*args, **kwargs) | |
# hedra making up this dihedron; set by self:_set_hedra() | |
self.hedron1: Hedron # = None | |
self.hedron2: Hedron # = None | |
self.h1key: HKT # = None | |
self.h2key: HKT # = None | |
# h1, h2key above may be reversed; id3,2 will not be | |
self.id3: HKT = cast(HKT, tuple(self.atomkeys[0:3])) | |
self.id32: HKT = cast(HKT, tuple(self.atomkeys[1:4])) | |
self._setPrimary() | |
# IC_Residue object which includes this dihedron; | |
# set by Residue:linkDihedra() | |
self.ric: IC_Residue | |
# order of atoms in dihedron is reversed from order of atoms in hedra | |
self.reverse = False # configured by :meth:`._set_hedra` | |
def __repr__(self) -> str: | |
"""Print string for Dihedron object.""" | |
return f"4-{str(self.id)} {self.re_class} {str(self.angle)} {str(self.ric)}" | |
def _get_hedron(ic_res: IC_Residue, id3: HKT) -> Optional[Hedron]: | |
"""Find specified hedron on this residue or its adjacent neighbors.""" | |
hedron = ic_res.hedra.get(id3, None) | |
if not hedron and 0 < len(ic_res.rprev): | |
for rp in ic_res.rprev: | |
hedron = rp.hedra.get(id3, None) | |
if hedron is not None: | |
break | |
if not hedron and 0 < len(ic_res.rnext): | |
for rn in ic_res.rnext: | |
hedron = rn.hedra.get(id3, None) | |
if hedron is not None: | |
break | |
return hedron | |
def _setPrimary(self) -> bool: | |
"""Mark dihedra required for psi, phi, omega, chi and other angles.""" | |
# http://www.mlb.co.jp/linux/science/garlic/doc/commands/dihedrals.html | |
dhc = self.e_class | |
if dhc == "NCACN": # psi | |
self.pclass = self.re_class[0:7] + "XN" | |
self.primary = True | |
elif dhc == "CACNCA": # omg | |
self.pclass = "XCAXC" + self.re_class[5:] | |
self.primary = True | |
elif dhc == "CNCAC": # phi | |
self.pclass = "XC" + self.re_class[2:] | |
self.primary = True | |
elif dhc == "CNCACB": # alternate Cbeta locator | |
self.altCB_class = "XC" + self.re_class[2:] | |
self.primary = False | |
elif dhc in primary_angles: | |
self.primary = True | |
self.pclass = self.re_class | |
else: | |
self.primary = False | |
def _set_hedra(self) -> Tuple[bool, Hedron, Hedron]: | |
"""Work out hedra keys and set rev flag.""" | |
try: | |
return self.rev, self.hedron1, self.hedron2 | |
except AttributeError: | |
pass | |
rev = False | |
res = self.ric | |
h1key = self.id3 | |
hedron1 = Dihedron._get_hedron(res, h1key) | |
if not hedron1: | |
rev = True | |
h1key = cast(HKT, tuple(self.atomkeys[2::-1])) | |
hedron1 = Dihedron._get_hedron(res, h1key) | |
h2key = cast(HKT, tuple(self.atomkeys[3:0:-1])) | |
else: | |
h2key = self.id32 | |
if not hedron1: | |
raise HedronMatchError( | |
f"can't find 1st hedron for key {h1key} dihedron {self}" | |
) | |
hedron2 = Dihedron._get_hedron(res, h2key) | |
if not hedron2: | |
raise HedronMatchError( | |
f"can't find 2nd hedron for key {h2key} dihedron {self}" | |
) | |
self.hedron1 = hedron1 | |
self.h1key = h1key | |
self.hedron2 = hedron2 | |
self.h2key = h2key | |
self.reverse = rev | |
return rev, hedron1, hedron2 | |
def angle(self) -> float: | |
"""Get dihedral angle.""" | |
try: | |
return self.cic.dihedraAngle[self.ndx] | |
except AttributeError: | |
try: | |
return self._dihedral | |
except AttributeError: | |
return 360.0 # error value without type hint hassles | |
def angle(self, dangle_deg_in: float) -> None: | |
"""Save new dihedral angle; sets needs_update. | |
Faster to modify IC_Chain level arrays directly. | |
This is probably not the routine you are looking for. See | |
:meth:`IC_Residue.bond_set` to change a dihedral angle along with its | |
neighbours, i.e. without clashing atoms. | |
N.B. dihedron (i-1)C-N-CA-CB is ignored if O exists. | |
C-beta is by default placed using O-C-CA-CB, but O is missing | |
in some PDB file residues, which means the sidechain cannot be | |
placed. The alternate CB path (i-1)C-N-CA-CB is provided to | |
circumvent this, but if this is needed then it must be adjusted in | |
conjunction with PHI ((i-1)C-N-CA-C) as they overlap. | |
:param float dangle_deg: new dihedral angle in degrees | |
""" | |
if dangle_deg_in > 180.0: | |
dangle_deg = dangle_deg_in - 360.0 | |
elif dangle_deg_in < -180.0: | |
dangle_deg = dangle_deg_in + 360.0 | |
else: | |
dangle_deg = dangle_deg_in | |
self._dihedral = dangle_deg | |
self.needs_update = True | |
# rtm | |
if True: # try: | |
cic = self.cic | |
dndx = self.ndx | |
cic.dihedraAngle[dndx] = dangle_deg | |
cic.dihedraAngleRads[dndx] = np.deg2rad(dangle_deg) | |
cic.dAtoms_needs_update[dndx] = True | |
cic.atomArrayValid[cic.atomArrayIndex[self.atomkeys[3]]] = False | |
def angle_dif(a1: Union[float, np.ndarray], a2: Union[float, np.ndarray]): | |
"""Get angle difference between two +/- 180 angles. | |
https://stackoverflow.com/a/36001014/2783487 | |
""" | |
return 180.0 - ((180.0 - a2) + a1) % 360.0 | |
def angle_avg(alst: List, in_rads: bool = False, out_rads: bool = False): | |
"""Get average of list of +/-180 angles. | |
:param List alst: list of angles to average | |
:param bool in_rads: input values are in radians | |
:param bool out_rads: report result in radians | |
""" | |
walst = alst if in_rads else np.deg2rad(alst) | |
ravg = np.arctan2(np.sum(np.sin(walst)), np.sum(np.cos(walst))) | |
return ravg if out_rads else np.rad2deg(ravg) | |
def angle_pop_sd(alst: List, avg: float): | |
"""Get population standard deviation for list of +/-180 angles. | |
should be sample std dev but avoid len(alst)=1 -> div by 0 | |
""" | |
return np.sqrt(np.sum(np.square(Dihedron.angle_dif(alst, avg))) / len(alst)) | |
def difference(self, other: "Dihedron") -> float: | |
"""Get angle difference between this and other +/- 180 angles.""" | |
return Dihedron.angle_dif(self.angle, other.angle) | |
def bits(self) -> int: | |
"""Get :data:`IC_Residue.pic_flags` bitmasks for self is psi, omg, phi, pomg, chiX.""" | |
icr = IC_Residue | |
if self.e_class == "NCACN": | |
# i psi | |
return icr.pic_flags.psi | |
elif hasattr(self, "pclass") and self.pclass == "XCAXCPNPCA": | |
# i+1 is pro so i+1 omg | |
return icr.pic_flags.omg | icr.pic_flags.pomg | |
elif self.e_class == "CACNCA": | |
# i+1 omg | |
return icr.pic_flags.omg | |
elif self.e_class == "CNCAC": | |
# i+1 phi | |
return icr.pic_flags.phi | |
else: | |
# i chiX | |
atmNdx = AtomKey.fields.atm | |
scList = ic_data_sidechains.get(self.ric.lc) | |
aLst = tuple(ak.akl[atmNdx] for ak in self.atomkeys) | |
for e in scList: | |
if len(e) != 5: # only chi entries have label at [4] | |
continue | |
if aLst == e[0:4]: | |
return icr.pic_flags.chi1 << (int(e[4][-1]) - 1) | |
return 0 | |
class AtomKey: | |
"""Class for dict keys to reference atom coordinates. | |
AtomKeys capture residue and disorder information together, and | |
provide a no-whitespace string key for .pic files. | |
Supports rich comparison and multiple ways to instantiate. | |
AtomKeys contain: | |
residue position (respos), insertion code (icode), 1 or 3 char residue | |
name (resname), atom name (atm), altloc (altloc), and occupancy (occ) | |
Use :data:`AtomKey.fields` to get the index to the component of interest by | |
name: | |
Get C-alpha atoms from IC_Chain atomArray and atomArrayIndex with | |
AtomKeys:: | |
atmNameNdx = internal_coords.AtomKey.fields.atm | |
CaSelection = [ | |
atomArrayIndex.get(k) | |
for k in atomArrayIndex.keys() | |
if k.akl[atmNameNdx] == "CA" | |
] | |
AtomArrayCa = atomArray[CaSelection] | |
Get all phenylalanine atoms in a chain:: | |
resNameNdx = internal_coords.AtomKey.fields.resname | |
PheSelection = [ | |
atomArrayIndex.get(k) | |
for k in atomArrayIndex.keys() | |
if k.akl[resNameNdx] == "F" | |
] | |
AtomArrayPhe = atomArray[PheSelection] | |
'resname' will be the uppercase 1-letter amino acid code if one of the 20 | |
standard residues, otherwise the supplied 3-letter code. Supplied as input | |
or read from .rbase attribute of :class:`IC_Residue`. | |
Attributes | |
---------- | |
akl: tuple | |
All six fields of AtomKey | |
fieldNames: tuple (Class Attribute) | |
Mapping of key index positions to names | |
fields: namedtuple (Class Attribute) | |
Mapping of field names to index positions. | |
id: str | |
'_'-joined AtomKey fields, excluding 'None' fields | |
atom_re: compiled regex (Class Attribute) | |
A compiled regular expression matching the string form of the key | |
d2h: bool (Class Attribute) default False | |
Convert D atoms to H on input if True; must also modify | |
:data:`IC_Residue.accept_atoms` | |
missing: bool default False | |
AtomKey __init__'d from string is probably missing, set this flag to | |
note the issue. Set by :meth:`.IC_Residue.rak` | |
ric: IC_Residue default None | |
*If* initialised with IC_Residue, this references the IC_residue | |
Methods | |
------- | |
altloc_match(other) | |
Returns True if this AtomKey matches other AtomKey excluding altloc | |
and occupancy fields | |
is_backbone() | |
Returns True if atom is N, CA, C, O or H | |
atm() | |
Returns atom name, e.g. N, CA, CB, etc. | |
cr_class() | |
Returns covalent radii class e.g. Csb | |
""" | |
atom_re = re.compile( | |
r"^(?P<respos>-?\d+)(?P<icode>[A-Za-z])?" | |
r"_(?P<resname>[a-zA-Z]+)_(?P<atm>[A-Za-z0-9]+)" | |
r"(?:_(?P<altloc>\w))?(?:_(?P<occ>-?\d\.\d+?))?$" | |
) | |
"""Pre-compiled regular expression to match an AtomKey string.""" | |
_endnum_re = re.compile(r"\D+(\d+)$") | |
# PDB altLoc = Character = [\w ] (any non-ctrl ASCII incl space) | |
# PDB iCode = AChar = [A-Za-z] | |
fieldNames = ("respos", "icode", "resname", "atm", "altloc", "occ") | |
_fieldsDef = namedtuple( | |
"_fieldsDef", ["respos", "icode", "resname", "atm", "altloc", "occ"] | |
) | |
fields = _fieldsDef(0, 1, 2, 3, 4, 5) | |
"""Use this namedtuple to access AtomKey fields. See :class:`AtomKey`""" | |
d2h = False | |
"""Set True to convert D Deuterium to H Hydrogen on input.""" | |
def __init__( | |
self, *args: Union[IC_Residue, Atom, List, Dict, str], **kwargs: str | |
) -> None: | |
"""Initialize AtomKey with residue and atom data. | |
Examples of acceptable input:: | |
(<IC_Residue>, 'CA', ...) : IC_Residue with atom info | |
(<IC_Residue>, <Atom>) : IC_Residue with Biopython Atom | |
([52, None, 'G', 'CA', ...]) : list of ordered data fields | |
(52, None, 'G', 'CA', ...) : multiple ordered arguments | |
({respos: 52, icode: None, atm: 'CA', ...}) : dict with fieldNames | |
(respos: 52, icode: None, atm: 'CA', ...) : kwargs with fieldNames | |
52_G_CA, 52B_G_CA, 52_G_CA_0.33, 52_G_CA_B_0.33 : id strings | |
""" | |
akl: List[Optional[str]] = [] | |
self.ric = None | |
for arg in args: | |
if isinstance(arg, str): | |
if "_" in arg: | |
# AtomKey.icd["_"] += 1 | |
# got atom key string, recurse with regex parse | |
m = self.atom_re.match(arg) | |
if m is not None: | |
if akl != []: # [] != akl: | |
raise Exception( | |
"Atom Key init full key not first argument: " + arg | |
) | |
akl = list(map(m.group, AtomKey.fieldNames)) | |
else: | |
akl.append(arg) | |
elif isinstance(arg, IC_Residue): | |
if akl != []: | |
raise Exception("Atom Key init Residue not first argument") | |
akl = list(arg.rbase) | |
self.ric = arg | |
elif isinstance(arg, Atom): | |
if 3 != len(akl): | |
raise Exception("Atom Key init Atom before Residue info") | |
akl.append(arg.name) | |
if not IC_Residue.no_altloc: | |
altloc = arg.altloc | |
akl.append(altloc if altloc != " " else None) | |
occ = float(arg.occupancy) | |
akl.append(str(occ) if occ != 1.00 else None) | |
else: | |
akl += [None, None] | |
elif isinstance(arg, list) or isinstance(arg, tuple): | |
akl += arg | |
elif isinstance(arg, dict): | |
for k in AtomKey.fieldNames: | |
akl.append(arg.get(k, None)) | |
else: | |
raise Exception("Atom Key init not recognised") | |
# process kwargs, initialize occ and altloc to None | |
for i in range(len(akl), 6): | |
if len(akl) <= i: | |
fld = kwargs.get(AtomKey.fieldNames[i]) | |
if fld is not None: | |
akl.append(fld) | |
# tweak local akl to generate id string | |
if isinstance(akl[0], Integral): | |
akl[0] = str(akl[0]) # numeric residue position to string | |
if self.d2h: | |
atmNdx = AtomKey.fields.atm | |
if akl[atmNdx][0] == "D": | |
akl[atmNdx] = re.sub("D", "H", akl[atmNdx], count=1) | |
self.id = "_".join( | |
[ | |
"".join(filter(None, akl[:2])), | |
str(akl[2]), # exclude None | |
"_".join(filter(None, akl[3:])), | |
] | |
) | |
akl += [None] * (6 - len(akl)) | |
self.akl = tuple(akl) | |
self._hash = hash(self.akl) | |
self.missing = False | |
def __deepcopy__(self, memo): | |
"""Deep copy implementation for AtomKey.""" | |
# will fail if .ric not in memo | |
existing = memo.get(id(self), False) | |
if existing: | |
return existing | |
dup = type(self).__new__(self.__class__) | |
memo[id(self)] = dup | |
dup.__dict__.update(self.__dict__) # all static attribs except .ric | |
if self.ric is not None: | |
dup.ric = memo[id(self.ric)] | |
# deepcopy complete | |
return dup | |
def __repr__(self) -> str: | |
"""Repr string from id.""" | |
return self.id | |
def __hash__(self) -> int: | |
"""Hash calculated at init from akl tuple.""" | |
return self._hash | |
_backbone_sort_keys = {"N": 0, "CA": 1, "C": 2, "O": 3} | |
_sidechain_sort_keys = { | |
"CB": 1, | |
"CG": 2, | |
"CG1": 2, | |
"OG": 2, | |
"OG1": 2, | |
"SG": 2, | |
"CG2": 3, | |
"CD": 4, | |
"CD1": 4, | |
"SD": 4, | |
"OD1": 4, | |
"ND1": 4, | |
"CD2": 5, | |
"ND2": 5, | |
"OD2": 5, | |
"CE": 6, | |
"NE": 6, | |
"CE1": 6, | |
"OE1": 6, | |
"NE1": 6, | |
"CE2": 7, | |
"OE2": 7, | |
"NE2": 7, | |
"CE3": 8, | |
"CZ": 9, | |
"CZ2": 9, | |
"NZ": 9, | |
"NH1": 10, | |
"OH": 10, | |
"CZ3": 10, | |
"CH2": 11, | |
"NH2": 11, | |
"OXT": 12, | |
"H": 13, | |
} | |
_greek_sort_keys = {"A": 0, "B": 1, "G": 2, "D": 3, "E": 4, "Z": 5, "H": 6} | |
def altloc_match(self, other: "AtomKey") -> bool: | |
"""Test AtomKey match to other discounting occupancy and altloc.""" | |
if isinstance(other, type(self)): | |
return self.akl[:4] == other.akl[:4] | |
else: | |
return NotImplemented | |
def is_backbone(self) -> bool: | |
"""Return True if is N, C, CA, O, or H.""" | |
return self.akl[self.fields.atm] in ("N", "C", "CA", "O", "H") | |
def atm(self) -> str: | |
"""Return atom name : N, CA, CB, O etc.""" | |
return self.akl[self.fields.atm] | |
def cr_class(self) -> Union[str, None]: | |
"""Return covalent radii class for atom or None.""" | |
akl = self.akl | |
atmNdx = self.fields.atm | |
try: | |
return residue_atom_bond_state["X"][akl[atmNdx]] | |
except KeyError: | |
try: | |
resNdx = self.fields.resname | |
return residue_atom_bond_state[akl[resNdx]][akl[atmNdx]] | |
except KeyError: | |
return "Hsb" if akl[atmNdx][0] == "H" else None | |
# @profile | |
def _cmp(self, other: "AtomKey") -> Tuple[int, int]: | |
"""Comparison function ranking self vs. other. | |
Priority is lower value, i.e. (CA, CB) gives (0, 1) for sorting. | |
""" | |
for i in range(6): | |
s, o = self.akl[i], other.akl[i] | |
if s != o: | |
# insert_code, altloc can be None, deal with first | |
if s is None and o is not None: | |
# no insert code before named insert code | |
return 0, 1 | |
elif o is None and s is not None: | |
return 1, 0 | |
# now we know s, o not None | |
# s, o = cast(str, s), cast(str, o) # performance critical code | |
if AtomKey.fields.atm != i: | |
# only sorting complications at atom level, occ. | |
# otherwise respos, insertion code will trigger | |
# before residue name | |
if AtomKey.fields.occ == i: | |
oi = int(float(s) * 100) | |
si = int(float(o) * 100) | |
return si, oi # swap so higher occupancy comes first | |
elif AtomKey.fields.respos == i: | |
return int(s), int(o) | |
elif AtomKey.fields.resname == i: | |
sac, oac = ( | |
self.akl[AtomKey.fields.altloc], | |
other.akl[AtomKey.fields.altloc], | |
) | |
if sac is not None: | |
if oac is not None: | |
return ord(sac), ord(oac) # altloc over resname | |
else: # sac has val and oac is None | |
return 1, 0 | |
elif oac is not None: # oac has val and sac is None | |
return 0, 1 | |
# else: # altloc | |
# fall through for altloc, resname with both altloc = None | |
return ord(s), ord(o) | |
# atom names from here | |
# backbone atoms before sidechain atoms | |
sb = self._backbone_sort_keys.get(s, None) | |
ob = self._backbone_sort_keys.get(o, None) | |
if sb is not None and ob is not None: | |
return sb, ob | |
elif sb is not None and ob is None: | |
return 0, 1 | |
elif sb is None and ob is not None: | |
return 1, 0 | |
# finished backbone and backbone vs. sidechain atoms | |
# sidechain vs sidechain, sidechain vs H | |
ss = self._sidechain_sort_keys.get(s, None) | |
os = self._sidechain_sort_keys.get(o, None) | |
if ss is not None and os is not None: | |
return ss, os | |
elif ss is not None and os is None: | |
return 0, 1 | |
elif ss is None and os is not None: | |
return 1, 0 | |
# amide single 'H' captured above in sidechain sort | |
# now 'complex'' hydrogens after sidechain | |
s0, s1, o0, o1 = s[0], s[1], o[0], o[1] | |
s1d, o1d = s1.isdigit(), o1.isdigit() | |
# if "H" == s0 == o0: # breaks cython | |
if ("H" == s0) and ("H" == o0): | |
if (s1 == o1) or (s1d and o1d): | |
enmS = self._endnum_re.findall(s) | |
enmO = self._endnum_re.findall(o) | |
if (enmS != []) and (enmO != []): | |
return int(enmS[0]), int(enmO[0]) | |
elif enmS == []: | |
return 0, 1 | |
else: | |
return 1, 0 | |
elif s1d: | |
return 0, 1 | |
elif o1d: | |
return 1, 0 | |
else: | |
return ( | |
self._greek_sort_keys[s1], | |
self._greek_sort_keys[o1], | |
) | |
return int(s), int(o) # raise exception? | |
return 1, 1 | |
def __ne__(self, other: object) -> bool: | |
"""Test for inequality.""" | |
if isinstance(other, type(self)): | |
return self.akl != other.akl | |
else: | |
return NotImplemented | |
def __eq__(self, other: object) -> bool: # type: ignore | |
"""Test for equality.""" | |
if isinstance(other, type(self)): | |
return self.akl == other.akl | |
else: | |
return NotImplemented | |
def __gt__(self, other: object) -> bool: | |
"""Test greater than.""" | |
if isinstance(other, type(self)): | |
rslt = self._cmp(other) | |
return rslt[0] > rslt[1] | |
else: | |
return NotImplemented | |
def __ge__(self, other: object) -> bool: | |
"""Test greater or equal.""" | |
if isinstance(other, type(self)): | |
rslt = self._cmp(other) | |
return rslt[0] >= rslt[1] | |
else: | |
return NotImplemented | |
def __lt__(self, other: object) -> bool: | |
"""Test less than.""" | |
if isinstance(other, type(self)): | |
rslt = self._cmp(other) | |
return rslt[0] < rslt[1] | |
else: | |
return NotImplemented | |
def __le__(self, other: object) -> bool: | |
"""Test less or equal.""" | |
if isinstance(other, type(self)): | |
rslt = self._cmp(other) | |
return rslt[0] <= rslt[1] | |
else: | |
return NotImplemented | |
def set_accuracy_95(num: float) -> float: | |
"""Reduce floating point accuracy to 9.5 (xxxx.xxxxx). | |
Used by :class:`IC_Residue` class writing PIC and SCAD | |
files. | |
:param float num: input number | |
:returns: float with specified accuracy | |
""" | |
# return round(num, 5) # much slower | |
return float(f"{num:9.5f}") | |
# internal coordinates construction Exceptions | |
class HedronMatchError(Exception): | |
"""Cannot find hedron in residue for given key.""" | |
pass | |
class MissingAtomError(Exception): | |
"""Missing atom coordinates for hedron or dihedron.""" | |
pass | |