Spaces:
No application file
No application file
# Copyright 2003 Yair Benita. All rights reserved. | |
# Revisions copyright 2020 by Tianyi Shi. All rights reserved. | |
# This file is part of the Biopython distribution and governed by your | |
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
# Please see the LICENSE file that should have been included as part of this | |
# package. | |
"""Calculate isoelectric points of polypeptides using methods of Bjellqvist. | |
pK values and the methods are taken from:: | |
* Bjellqvist, B.,Hughes, G.J., Pasquali, Ch., Paquet, N., Ravier, F., | |
Sanchez, J.-Ch., Frutiger, S. & Hochstrasser, D.F. | |
The focusing positions of polypeptides in immobilized pH gradients can be | |
predicted from their amino acid sequences. Electrophoresis 1993, 14, | |
1023-1031. | |
* Bjellqvist, B., Basse, B., Olsen, E. and Celis, J.E. | |
Reference points for comparisons of two-dimensional maps of proteins from | |
different human cell types defined in a pH scale where isoelectric points | |
correlate with polypeptide compositions. Electrophoresis 1994, 15, 529-539. | |
I designed the algorithm according to a note by David L. Tabb, available at: | |
http://fields.scripps.edu/DTASelect/20010710-pI-Algorithm.pdf | |
""" | |
positive_pKs = {"Nterm": 7.5, "K": 10.0, "R": 12.0, "H": 5.98} | |
negative_pKs = {"Cterm": 3.55, "D": 4.05, "E": 4.45, "C": 9.0, "Y": 10.0} | |
pKcterminal = {"D": 4.55, "E": 4.75} | |
pKnterminal = { | |
"A": 7.59, | |
"M": 7.0, | |
"S": 6.93, | |
"P": 8.36, | |
"T": 6.82, | |
"V": 7.44, | |
"E": 7.7, | |
} | |
charged_aas = ("K", "R", "H", "D", "E", "C", "Y") | |
class IsoelectricPoint: | |
"""A class for calculating the IEP or charge at given pH of a protein. | |
Parameters | |
---------- | |
:protein_sequence: A ``Bio.Seq`` or string object containing a protein | |
sequence. | |
:aa_content: A dictionary with amino acid letters as keys and its | |
occurrences as integers, e.g. ``{"A": 3, "C": 0, ...}``. | |
Default: ``None``. If ``None``, the dic will be calculated | |
from the given sequence. | |
Methods | |
------- | |
:charge_at_pH(pH): Calculates the charge of the protein for a given pH | |
:pi(): Calculates the isoelectric point | |
Examples | |
-------- | |
The methods of this class can either be accessed from the class itself | |
or from a ``ProtParam.ProteinAnalysis`` object (with partially different | |
names): | |
>>> from Bio.SeqUtils.IsoelectricPoint import IsoelectricPoint as IP | |
>>> protein = IP("INGAR") | |
>>> print(f"IEP of peptide {protein.sequence} is {protein.pi():.2f}") | |
IEP of peptide INGAR is 9.75 | |
>>> print(f"Its charge at pH 7 is {protein.charge_at_pH(7.0):.2f}") | |
Its charge at pH 7 is 0.76 | |
>>> from Bio.SeqUtils.ProtParam import ProteinAnalysis as PA | |
>>> protein = PA("PETER") | |
>>> print(f"IEP of {protein.sequence}: {protein.isoelectric_point():.2f}") | |
IEP of PETER: 4.53 | |
>>> print(f"Charge at pH 4.53: {protein.charge_at_pH(4.53):.2f}") | |
Charge at pH 4.53: 0.00 | |
""" | |
def __init__(self, protein_sequence, aa_content=None): | |
"""Initialize the class.""" | |
self.sequence = protein_sequence.upper() | |
if not aa_content: | |
from Bio.SeqUtils.ProtParam import ProteinAnalysis as _PA | |
aa_content = _PA(self.sequence).count_amino_acids() | |
self.charged_aas_content = self._select_charged(aa_content) | |
self.pos_pKs, self.neg_pKs = self._update_pKs_tables() | |
# This function creates a dictionary with the contents of each charged aa, | |
# plus Cterm and Nterm. | |
def _select_charged(self, aa_content): | |
charged = {} | |
for aa in charged_aas: | |
charged[aa] = float(aa_content[aa]) | |
charged["Nterm"] = 1.0 | |
charged["Cterm"] = 1.0 | |
return charged | |
def _update_pKs_tables(self): | |
"""Update pKs tables with seq specific values for N- and C-termini.""" | |
pos_pKs = positive_pKs.copy() | |
neg_pKs = negative_pKs.copy() | |
nterm, cterm = self.sequence[0], self.sequence[-1] | |
if nterm in pKnterminal: | |
pos_pKs["Nterm"] = pKnterminal[nterm] | |
if cterm in pKcterminal: | |
neg_pKs["Cterm"] = pKcterminal[cterm] | |
return pos_pKs, neg_pKs | |
def charge_at_pH(self, pH): | |
"""Calculate the charge of a protein at given pH.""" | |
# derivation: | |
# Henderson Hasselbalch equation: pH = pKa + log([A-]/[HA]) | |
# Rearranging: [HA]/[A-] = 10 ** (pKa - pH) | |
# partial_charge = | |
# [A-]/[A]total = [A-]/([A-] + [HA]) = 1 / { ([A-] + [HA])/[A-] } = | |
# 1 / (1 + [HA]/[A-]) = 1 / (1 + 10 ** (pKa - pH)) for acidic residues; | |
# 1 / (1 + 10 ** (pH - pKa)) for basic residues | |
positive_charge = 0.0 | |
for aa, pK in self.pos_pKs.items(): | |
partial_charge = 1.0 / (10 ** (pH - pK) + 1.0) | |
positive_charge += self.charged_aas_content[aa] * partial_charge | |
negative_charge = 0.0 | |
for aa, pK in self.neg_pKs.items(): | |
partial_charge = 1.0 / (10 ** (pK - pH) + 1.0) | |
negative_charge += self.charged_aas_content[aa] * partial_charge | |
return positive_charge - negative_charge | |
# This is the action function, it tries different pH until the charge of | |
# the protein is 0 (or close). | |
def pi(self, pH=7.775, min_=4.05, max_=12): | |
r"""Calculate and return the isoelectric point as float. | |
This is a recursive function that uses bisection method. | |
Wiki on bisection: https://en.wikipedia.org/wiki/Bisection_method | |
Arguments: | |
- pH: the pH at which the current charge of the protein is computed. | |
This pH lies at the centre of the interval (mean of `min_` and `max_`). | |
- min\_: the minimum of the interval. Initial value defaults to 4.05, | |
which is below the theoretical minimum, when the protein is composed | |
exclusively of aspartate. | |
- max\_: the maximum of the the interval. Initial value defaults to 12, | |
which is above the theoretical maximum, when the protein is composed | |
exclusively of arginine. | |
""" | |
charge = self.charge_at_pH(pH) | |
if max_ - min_ > 0.0001: | |
if charge > 0.0: | |
min_ = pH | |
else: | |
max_ = pH | |
next_pH = (min_ + max_) / 2 | |
return self.pi(next_pH, min_, max_) | |
return pH | |
if __name__ == "__main__": | |
from Bio._utils import run_doctest | |
run_doctest() | |