Spaces:
No application file
No application file
# Copyright (C) 2020, Joao Rodrigues (j.p.g.l.m.rodrigues@gmail.com) | |
# | |
# This file is part of the Biopython distribution and governed by your | |
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
# Please see the LICENSE file that should have been included as part of this | |
# package. | |
"""Calculation of solvent accessible surface areas for Bio.PDB entities. | |
Uses the "rolling ball" algorithm developed by Shrake & Rupley algorithm, | |
which uses a sphere (of equal radius to a solvent molecule) to probe the | |
surface of the molecule. | |
Reference: | |
Shrake, A; Rupley, JA. (1973). J Mol Biol | |
"Environment and exposure to solvent of protein atoms. Lysozyme and insulin". | |
""" | |
import collections | |
import math | |
import numpy as np | |
from Bio.PDB.kdtrees import KDTree | |
__all__ = ["ShrakeRupley"] | |
_ENTITY_HIERARCHY = { | |
"A": 0, | |
"R": 1, | |
"C": 2, | |
"M": 3, | |
"S": 4, | |
} | |
# vdW radii taken from: | |
# https://en.wikipedia.org/wiki/Atomic_radii_of_the_elements_(data_page) | |
# | |
# Radii for CL, K, NA, etc are _not_ ionic radii. | |
# | |
# References: | |
# A. Bondi (1964). "van der Waals Volumes and Radii". | |
# M. Mantina, A.C. et al., J. Phys. Chem. 2009, 113, 5806. | |
ATOMIC_RADII = collections.defaultdict(lambda: 2.0) | |
ATOMIC_RADII.update( | |
{ | |
"H": 1.200, | |
"HE": 1.400, | |
"C": 1.700, | |
"N": 1.550, | |
"O": 1.520, | |
"F": 1.470, | |
"NA": 2.270, | |
"MG": 1.730, | |
"P": 1.800, | |
"S": 1.800, | |
"CL": 1.750, | |
"K": 2.750, | |
"CA": 2.310, | |
"NI": 1.630, | |
"CU": 1.400, | |
"ZN": 1.390, | |
"SE": 1.900, | |
"BR": 1.850, | |
"CD": 1.580, | |
"I": 1.980, | |
"HG": 1.550, | |
} | |
) | |
class ShrakeRupley: | |
"""Calculates SASAs using the Shrake-Rupley algorithm.""" | |
def __init__(self, probe_radius=1.40, n_points=100, radii_dict=None): | |
"""Initialize the class. | |
:param probe_radius: radius of the probe in A. Default is 1.40, roughly | |
the radius of a water molecule. | |
:type probe_radius: float | |
:param n_points: resolution of the surface of each atom. Default is 100. | |
A higher number of points results in more precise measurements, but | |
slows down the calculation. | |
:type n_points: int | |
:param radii_dict: user-provided dictionary of atomic radii to use in | |
the calculation. Values will replace/complement those in the | |
default ATOMIC_RADII dictionary. | |
:type radii_dict: dict | |
>>> sr = ShrakeRupley() | |
>>> sr = ShrakeRupley(n_points=960) | |
>>> sr = ShrakeRupley(radii_dict={"O": 3.1415}) | |
""" | |
if probe_radius <= 0.0: | |
raise ValueError( | |
f"Probe radius must be a positive number: {probe_radius} <= 0" | |
) | |
self.probe_radius = float(probe_radius) | |
if n_points < 1: | |
raise ValueError( | |
f"Number of sphere points must be larger than 1: {n_points}" | |
) | |
self.n_points = n_points | |
# Update radii list with user provided lists. | |
self.radii_dict = ATOMIC_RADII.copy() | |
if radii_dict is not None: | |
self.radii_dict.update(radii_dict) | |
# Pre-compute reference sphere | |
self._sphere = self._compute_sphere() | |
def _compute_sphere(self): | |
"""Return the 3D coordinates of n points on a sphere. | |
Uses the golden spiral algorithm to place points 'evenly' on the sphere | |
surface. We compute this once and then move the sphere to the centroid | |
of each atom as we compute the ASAs. | |
""" | |
n = self.n_points | |
dl = np.pi * (3 - 5**0.5) | |
dz = 2.0 / n | |
longitude = 0 | |
z = 1 - dz / 2 | |
coords = np.zeros((n, 3), dtype=np.float32) | |
for k in range(n): | |
r = (1 - z * z) ** 0.5 | |
coords[k, 0] = math.cos(longitude) * r | |
coords[k, 1] = math.sin(longitude) * r | |
coords[k, 2] = z | |
z -= dz | |
longitude += dl | |
return coords | |
def compute(self, entity, level="A"): | |
"""Calculate surface accessibility surface area for an entity. | |
The resulting atomic surface accessibility values are attached to the | |
.sasa attribute of each entity (or atom), depending on the level. For | |
example, if level="R", all residues will have a .sasa attribute. Atoms | |
will always be assigned a .sasa attribute with their individual values. | |
:param entity: input entity. | |
:type entity: Bio.PDB.Entity, e.g. Residue, Chain, ... | |
:param level: the level at which ASA values are assigned, which can be | |
one of "A" (Atom), "R" (Residue), "C" (Chain), "M" (Model), or | |
"S" (Structure). The ASA value of an entity is the sum of all ASA | |
values of its children. Defaults to "A". | |
:type entity: Bio.PDB.Entity | |
>>> from Bio.PDB import PDBParser | |
>>> from Bio.PDB.SASA import ShrakeRupley | |
>>> p = PDBParser(QUIET=1) | |
>>> # This assumes you have a local copy of 1LCD.pdb in a directory called "PDB" | |
>>> struct = p.get_structure("1LCD", "PDB/1LCD.pdb") | |
>>> sr = ShrakeRupley() | |
>>> sr.compute(struct, level="S") | |
>>> print(round(struct.sasa, 2)) | |
7053.43 | |
>>> print(round(struct[0]["A"][11]["OE1"].sasa, 2)) | |
9.64 | |
""" | |
is_valid = hasattr(entity, "level") and entity.level in {"R", "C", "M", "S"} | |
if not is_valid: | |
raise ValueError( | |
f"Invalid entity type '{type(entity)}'. " | |
"Must be Residue, Chain, Model, or Structure" | |
) | |
if level not in _ENTITY_HIERARCHY: | |
raise ValueError(f"Invalid level '{level}'. Must be A, R, C, M, or S.") | |
elif _ENTITY_HIERARCHY[level] > _ENTITY_HIERARCHY[entity.level]: | |
raise ValueError( | |
f"Level '{level}' must be equal or smaller than input entity: {entity.level}" | |
) | |
# Get atoms onto list for lookup | |
atoms = list(entity.get_atoms()) | |
n_atoms = len(atoms) | |
if not n_atoms: | |
raise ValueError("Entity has no child atoms.") | |
# Get coordinates as a numpy array | |
# We trust DisorderedAtom and friends to pick representatives. | |
coords = np.array([a.coord for a in atoms], dtype=np.float64) | |
# Pre-compute atom neighbors using KDTree | |
kdt = KDTree(coords, 10) | |
# Pre-compute radius * probe table | |
radii_dict = self.radii_dict | |
radii = np.array([radii_dict[a.element] for a in atoms], dtype=np.float64) | |
radii += self.probe_radius | |
twice_maxradii = np.max(radii) * 2 | |
# Calculate ASAs | |
asa_array = np.zeros((n_atoms, 1), dtype=np.int64) | |
ptset = set(range(self.n_points)) | |
for i in range(n_atoms): | |
r_i = radii[i] | |
# Move sphere to atom | |
s_on_i = (np.array(self._sphere, copy=True) * r_i) + coords[i] | |
available_set = ptset.copy() | |
# KDtree for sphere points | |
kdt_sphere = KDTree(s_on_i, 10) | |
# Iterate over neighbors of atom i | |
for jj in kdt.search(coords[i], twice_maxradii): | |
j = jj.index | |
if i == j: | |
continue | |
if jj.radius < (r_i + radii[j]): | |
# Remove overlapping points on sphere from available set | |
available_set -= { | |
pt.index for pt in kdt_sphere.search(coords[j], radii[j]) | |
} | |
asa_array[i] = len(available_set) # update counts | |
# Convert accessible point count to surface area in A**2 | |
f = radii * radii * (4 * np.pi / self.n_points) | |
asa_array = asa_array * f[:, np.newaxis] | |
# Set atom .sasa | |
for i, atom in enumerate(atoms): | |
atom.sasa = asa_array[i, 0] | |
# Aggregate values per entity level if necessary | |
if level != "A": | |
entities = set(atoms) | |
target = _ENTITY_HIERARCHY[level] | |
for _ in range(target): | |
entities = {e.parent for e in entities} | |
atomdict = {a.full_id: idx for idx, a in enumerate(atoms)} | |
for e in entities: | |
e_atoms = [atomdict[a.full_id] for a in e.get_atoms()] | |
e.sasa = asa_array[e_atoms].sum() | |