Spaces:
No application file
No application file
File size: 8,435 Bytes
b7731cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 |
# Copyright (C) 2020, Joao Rodrigues ([email protected])
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Calculation of solvent accessible surface areas for Bio.PDB entities.
Uses the "rolling ball" algorithm developed by Shrake & Rupley algorithm,
which uses a sphere (of equal radius to a solvent molecule) to probe the
surface of the molecule.
Reference:
Shrake, A; Rupley, JA. (1973). J Mol Biol
"Environment and exposure to solvent of protein atoms. Lysozyme and insulin".
"""
import collections
import math
import numpy as np
from Bio.PDB.kdtrees import KDTree
__all__ = ["ShrakeRupley"]
_ENTITY_HIERARCHY = {
"A": 0,
"R": 1,
"C": 2,
"M": 3,
"S": 4,
}
# vdW radii taken from:
# https://en.wikipedia.org/wiki/Atomic_radii_of_the_elements_(data_page)
#
# Radii for CL, K, NA, etc are _not_ ionic radii.
#
# References:
# A. Bondi (1964). "van der Waals Volumes and Radii".
# M. Mantina, A.C. et al., J. Phys. Chem. 2009, 113, 5806.
ATOMIC_RADII = collections.defaultdict(lambda: 2.0)
ATOMIC_RADII.update(
{
"H": 1.200,
"HE": 1.400,
"C": 1.700,
"N": 1.550,
"O": 1.520,
"F": 1.470,
"NA": 2.270,
"MG": 1.730,
"P": 1.800,
"S": 1.800,
"CL": 1.750,
"K": 2.750,
"CA": 2.310,
"NI": 1.630,
"CU": 1.400,
"ZN": 1.390,
"SE": 1.900,
"BR": 1.850,
"CD": 1.580,
"I": 1.980,
"HG": 1.550,
}
)
class ShrakeRupley:
"""Calculates SASAs using the Shrake-Rupley algorithm."""
def __init__(self, probe_radius=1.40, n_points=100, radii_dict=None):
"""Initialize the class.
:param probe_radius: radius of the probe in A. Default is 1.40, roughly
the radius of a water molecule.
:type probe_radius: float
:param n_points: resolution of the surface of each atom. Default is 100.
A higher number of points results in more precise measurements, but
slows down the calculation.
:type n_points: int
:param radii_dict: user-provided dictionary of atomic radii to use in
the calculation. Values will replace/complement those in the
default ATOMIC_RADII dictionary.
:type radii_dict: dict
>>> sr = ShrakeRupley()
>>> sr = ShrakeRupley(n_points=960)
>>> sr = ShrakeRupley(radii_dict={"O": 3.1415})
"""
if probe_radius <= 0.0:
raise ValueError(
f"Probe radius must be a positive number: {probe_radius} <= 0"
)
self.probe_radius = float(probe_radius)
if n_points < 1:
raise ValueError(
f"Number of sphere points must be larger than 1: {n_points}"
)
self.n_points = n_points
# Update radii list with user provided lists.
self.radii_dict = ATOMIC_RADII.copy()
if radii_dict is not None:
self.radii_dict.update(radii_dict)
# Pre-compute reference sphere
self._sphere = self._compute_sphere()
def _compute_sphere(self):
"""Return the 3D coordinates of n points on a sphere.
Uses the golden spiral algorithm to place points 'evenly' on the sphere
surface. We compute this once and then move the sphere to the centroid
of each atom as we compute the ASAs.
"""
n = self.n_points
dl = np.pi * (3 - 5**0.5)
dz = 2.0 / n
longitude = 0
z = 1 - dz / 2
coords = np.zeros((n, 3), dtype=np.float32)
for k in range(n):
r = (1 - z * z) ** 0.5
coords[k, 0] = math.cos(longitude) * r
coords[k, 1] = math.sin(longitude) * r
coords[k, 2] = z
z -= dz
longitude += dl
return coords
def compute(self, entity, level="A"):
"""Calculate surface accessibility surface area for an entity.
The resulting atomic surface accessibility values are attached to the
.sasa attribute of each entity (or atom), depending on the level. For
example, if level="R", all residues will have a .sasa attribute. Atoms
will always be assigned a .sasa attribute with their individual values.
:param entity: input entity.
:type entity: Bio.PDB.Entity, e.g. Residue, Chain, ...
:param level: the level at which ASA values are assigned, which can be
one of "A" (Atom), "R" (Residue), "C" (Chain), "M" (Model), or
"S" (Structure). The ASA value of an entity is the sum of all ASA
values of its children. Defaults to "A".
:type entity: Bio.PDB.Entity
>>> from Bio.PDB import PDBParser
>>> from Bio.PDB.SASA import ShrakeRupley
>>> p = PDBParser(QUIET=1)
>>> # This assumes you have a local copy of 1LCD.pdb in a directory called "PDB"
>>> struct = p.get_structure("1LCD", "PDB/1LCD.pdb")
>>> sr = ShrakeRupley()
>>> sr.compute(struct, level="S")
>>> print(round(struct.sasa, 2))
7053.43
>>> print(round(struct[0]["A"][11]["OE1"].sasa, 2))
9.64
"""
is_valid = hasattr(entity, "level") and entity.level in {"R", "C", "M", "S"}
if not is_valid:
raise ValueError(
f"Invalid entity type '{type(entity)}'. "
"Must be Residue, Chain, Model, or Structure"
)
if level not in _ENTITY_HIERARCHY:
raise ValueError(f"Invalid level '{level}'. Must be A, R, C, M, or S.")
elif _ENTITY_HIERARCHY[level] > _ENTITY_HIERARCHY[entity.level]:
raise ValueError(
f"Level '{level}' must be equal or smaller than input entity: {entity.level}"
)
# Get atoms onto list for lookup
atoms = list(entity.get_atoms())
n_atoms = len(atoms)
if not n_atoms:
raise ValueError("Entity has no child atoms.")
# Get coordinates as a numpy array
# We trust DisorderedAtom and friends to pick representatives.
coords = np.array([a.coord for a in atoms], dtype=np.float64)
# Pre-compute atom neighbors using KDTree
kdt = KDTree(coords, 10)
# Pre-compute radius * probe table
radii_dict = self.radii_dict
radii = np.array([radii_dict[a.element] for a in atoms], dtype=np.float64)
radii += self.probe_radius
twice_maxradii = np.max(radii) * 2
# Calculate ASAs
asa_array = np.zeros((n_atoms, 1), dtype=np.int64)
ptset = set(range(self.n_points))
for i in range(n_atoms):
r_i = radii[i]
# Move sphere to atom
s_on_i = (np.array(self._sphere, copy=True) * r_i) + coords[i]
available_set = ptset.copy()
# KDtree for sphere points
kdt_sphere = KDTree(s_on_i, 10)
# Iterate over neighbors of atom i
for jj in kdt.search(coords[i], twice_maxradii):
j = jj.index
if i == j:
continue
if jj.radius < (r_i + radii[j]):
# Remove overlapping points on sphere from available set
available_set -= {
pt.index for pt in kdt_sphere.search(coords[j], radii[j])
}
asa_array[i] = len(available_set) # update counts
# Convert accessible point count to surface area in A**2
f = radii * radii * (4 * np.pi / self.n_points)
asa_array = asa_array * f[:, np.newaxis]
# Set atom .sasa
for i, atom in enumerate(atoms):
atom.sasa = asa_array[i, 0]
# Aggregate values per entity level if necessary
if level != "A":
entities = set(atoms)
target = _ENTITY_HIERARCHY[level]
for _ in range(target):
entities = {e.parent for e in entities}
atomdict = {a.full_id: idx for idx, a in enumerate(atoms)}
for e in entities:
e_atoms = [atomdict[a.full_id] for a in e.get_atoms()]
e.sasa = asa_array[e_atoms].sum()
|