Spaces:
No application file
No application file
File size: 6,557 Bytes
b7731cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
# Copyright 2003 Yair Benita. All rights reserved.
# Revisions copyright 2020 by Tianyi Shi. All rights reserved.
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Calculate isoelectric points of polypeptides using methods of Bjellqvist.
pK values and the methods are taken from::
* Bjellqvist, B.,Hughes, G.J., Pasquali, Ch., Paquet, N., Ravier, F.,
Sanchez, J.-Ch., Frutiger, S. & Hochstrasser, D.F.
The focusing positions of polypeptides in immobilized pH gradients can be
predicted from their amino acid sequences. Electrophoresis 1993, 14,
1023-1031.
* Bjellqvist, B., Basse, B., Olsen, E. and Celis, J.E.
Reference points for comparisons of two-dimensional maps of proteins from
different human cell types defined in a pH scale where isoelectric points
correlate with polypeptide compositions. Electrophoresis 1994, 15, 529-539.
I designed the algorithm according to a note by David L. Tabb, available at:
http://fields.scripps.edu/DTASelect/20010710-pI-Algorithm.pdf
"""
positive_pKs = {"Nterm": 7.5, "K": 10.0, "R": 12.0, "H": 5.98}
negative_pKs = {"Cterm": 3.55, "D": 4.05, "E": 4.45, "C": 9.0, "Y": 10.0}
pKcterminal = {"D": 4.55, "E": 4.75}
pKnterminal = {
"A": 7.59,
"M": 7.0,
"S": 6.93,
"P": 8.36,
"T": 6.82,
"V": 7.44,
"E": 7.7,
}
charged_aas = ("K", "R", "H", "D", "E", "C", "Y")
class IsoelectricPoint:
"""A class for calculating the IEP or charge at given pH of a protein.
Parameters
----------
:protein_sequence: A ``Bio.Seq`` or string object containing a protein
sequence.
:aa_content: A dictionary with amino acid letters as keys and its
occurrences as integers, e.g. ``{"A": 3, "C": 0, ...}``.
Default: ``None``. If ``None``, the dic will be calculated
from the given sequence.
Methods
-------
:charge_at_pH(pH): Calculates the charge of the protein for a given pH
:pi(): Calculates the isoelectric point
Examples
--------
The methods of this class can either be accessed from the class itself
or from a ``ProtParam.ProteinAnalysis`` object (with partially different
names):
>>> from Bio.SeqUtils.IsoelectricPoint import IsoelectricPoint as IP
>>> protein = IP("INGAR")
>>> print(f"IEP of peptide {protein.sequence} is {protein.pi():.2f}")
IEP of peptide INGAR is 9.75
>>> print(f"Its charge at pH 7 is {protein.charge_at_pH(7.0):.2f}")
Its charge at pH 7 is 0.76
>>> from Bio.SeqUtils.ProtParam import ProteinAnalysis as PA
>>> protein = PA("PETER")
>>> print(f"IEP of {protein.sequence}: {protein.isoelectric_point():.2f}")
IEP of PETER: 4.53
>>> print(f"Charge at pH 4.53: {protein.charge_at_pH(4.53):.2f}")
Charge at pH 4.53: 0.00
"""
def __init__(self, protein_sequence, aa_content=None):
"""Initialize the class."""
self.sequence = protein_sequence.upper()
if not aa_content:
from Bio.SeqUtils.ProtParam import ProteinAnalysis as _PA
aa_content = _PA(self.sequence).count_amino_acids()
self.charged_aas_content = self._select_charged(aa_content)
self.pos_pKs, self.neg_pKs = self._update_pKs_tables()
# This function creates a dictionary with the contents of each charged aa,
# plus Cterm and Nterm.
def _select_charged(self, aa_content):
charged = {}
for aa in charged_aas:
charged[aa] = float(aa_content[aa])
charged["Nterm"] = 1.0
charged["Cterm"] = 1.0
return charged
def _update_pKs_tables(self):
"""Update pKs tables with seq specific values for N- and C-termini."""
pos_pKs = positive_pKs.copy()
neg_pKs = negative_pKs.copy()
nterm, cterm = self.sequence[0], self.sequence[-1]
if nterm in pKnterminal:
pos_pKs["Nterm"] = pKnterminal[nterm]
if cterm in pKcterminal:
neg_pKs["Cterm"] = pKcterminal[cterm]
return pos_pKs, neg_pKs
def charge_at_pH(self, pH):
"""Calculate the charge of a protein at given pH."""
# derivation:
# Henderson Hasselbalch equation: pH = pKa + log([A-]/[HA])
# Rearranging: [HA]/[A-] = 10 ** (pKa - pH)
# partial_charge =
# [A-]/[A]total = [A-]/([A-] + [HA]) = 1 / { ([A-] + [HA])/[A-] } =
# 1 / (1 + [HA]/[A-]) = 1 / (1 + 10 ** (pKa - pH)) for acidic residues;
# 1 / (1 + 10 ** (pH - pKa)) for basic residues
positive_charge = 0.0
for aa, pK in self.pos_pKs.items():
partial_charge = 1.0 / (10 ** (pH - pK) + 1.0)
positive_charge += self.charged_aas_content[aa] * partial_charge
negative_charge = 0.0
for aa, pK in self.neg_pKs.items():
partial_charge = 1.0 / (10 ** (pK - pH) + 1.0)
negative_charge += self.charged_aas_content[aa] * partial_charge
return positive_charge - negative_charge
# This is the action function, it tries different pH until the charge of
# the protein is 0 (or close).
def pi(self, pH=7.775, min_=4.05, max_=12):
r"""Calculate and return the isoelectric point as float.
This is a recursive function that uses bisection method.
Wiki on bisection: https://en.wikipedia.org/wiki/Bisection_method
Arguments:
- pH: the pH at which the current charge of the protein is computed.
This pH lies at the centre of the interval (mean of `min_` and `max_`).
- min\_: the minimum of the interval. Initial value defaults to 4.05,
which is below the theoretical minimum, when the protein is composed
exclusively of aspartate.
- max\_: the maximum of the the interval. Initial value defaults to 12,
which is above the theoretical maximum, when the protein is composed
exclusively of arginine.
"""
charge = self.charge_at_pH(pH)
if max_ - min_ > 0.0001:
if charge > 0.0:
min_ = pH
else:
max_ = pH
next_pH = (min_ + max_) / 2
return self.pi(next_pH, min_, max_)
return pH
if __name__ == "__main__":
from Bio._utils import run_doctest
run_doctest()
|