aakash0017's picture
Upload folder using huggingface_hub
b7731cd
# Copyright 2003 Yair Benita. All rights reserved.
# Revisions copyright 2020 by Tianyi Shi. All rights reserved.
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Calculate isoelectric points of polypeptides using methods of Bjellqvist.
pK values and the methods are taken from::
* Bjellqvist, B.,Hughes, G.J., Pasquali, Ch., Paquet, N., Ravier, F.,
Sanchez, J.-Ch., Frutiger, S. & Hochstrasser, D.F.
The focusing positions of polypeptides in immobilized pH gradients can be
predicted from their amino acid sequences. Electrophoresis 1993, 14,
1023-1031.
* Bjellqvist, B., Basse, B., Olsen, E. and Celis, J.E.
Reference points for comparisons of two-dimensional maps of proteins from
different human cell types defined in a pH scale where isoelectric points
correlate with polypeptide compositions. Electrophoresis 1994, 15, 529-539.
I designed the algorithm according to a note by David L. Tabb, available at:
http://fields.scripps.edu/DTASelect/20010710-pI-Algorithm.pdf
"""
positive_pKs = {"Nterm": 7.5, "K": 10.0, "R": 12.0, "H": 5.98}
negative_pKs = {"Cterm": 3.55, "D": 4.05, "E": 4.45, "C": 9.0, "Y": 10.0}
pKcterminal = {"D": 4.55, "E": 4.75}
pKnterminal = {
"A": 7.59,
"M": 7.0,
"S": 6.93,
"P": 8.36,
"T": 6.82,
"V": 7.44,
"E": 7.7,
}
charged_aas = ("K", "R", "H", "D", "E", "C", "Y")
class IsoelectricPoint:
"""A class for calculating the IEP or charge at given pH of a protein.
Parameters
----------
:protein_sequence: A ``Bio.Seq`` or string object containing a protein
sequence.
:aa_content: A dictionary with amino acid letters as keys and its
occurrences as integers, e.g. ``{"A": 3, "C": 0, ...}``.
Default: ``None``. If ``None``, the dic will be calculated
from the given sequence.
Methods
-------
:charge_at_pH(pH): Calculates the charge of the protein for a given pH
:pi(): Calculates the isoelectric point
Examples
--------
The methods of this class can either be accessed from the class itself
or from a ``ProtParam.ProteinAnalysis`` object (with partially different
names):
>>> from Bio.SeqUtils.IsoelectricPoint import IsoelectricPoint as IP
>>> protein = IP("INGAR")
>>> print(f"IEP of peptide {protein.sequence} is {protein.pi():.2f}")
IEP of peptide INGAR is 9.75
>>> print(f"Its charge at pH 7 is {protein.charge_at_pH(7.0):.2f}")
Its charge at pH 7 is 0.76
>>> from Bio.SeqUtils.ProtParam import ProteinAnalysis as PA
>>> protein = PA("PETER")
>>> print(f"IEP of {protein.sequence}: {protein.isoelectric_point():.2f}")
IEP of PETER: 4.53
>>> print(f"Charge at pH 4.53: {protein.charge_at_pH(4.53):.2f}")
Charge at pH 4.53: 0.00
"""
def __init__(self, protein_sequence, aa_content=None):
"""Initialize the class."""
self.sequence = protein_sequence.upper()
if not aa_content:
from Bio.SeqUtils.ProtParam import ProteinAnalysis as _PA
aa_content = _PA(self.sequence).count_amino_acids()
self.charged_aas_content = self._select_charged(aa_content)
self.pos_pKs, self.neg_pKs = self._update_pKs_tables()
# This function creates a dictionary with the contents of each charged aa,
# plus Cterm and Nterm.
def _select_charged(self, aa_content):
charged = {}
for aa in charged_aas:
charged[aa] = float(aa_content[aa])
charged["Nterm"] = 1.0
charged["Cterm"] = 1.0
return charged
def _update_pKs_tables(self):
"""Update pKs tables with seq specific values for N- and C-termini."""
pos_pKs = positive_pKs.copy()
neg_pKs = negative_pKs.copy()
nterm, cterm = self.sequence[0], self.sequence[-1]
if nterm in pKnterminal:
pos_pKs["Nterm"] = pKnterminal[nterm]
if cterm in pKcterminal:
neg_pKs["Cterm"] = pKcterminal[cterm]
return pos_pKs, neg_pKs
def charge_at_pH(self, pH):
"""Calculate the charge of a protein at given pH."""
# derivation:
# Henderson Hasselbalch equation: pH = pKa + log([A-]/[HA])
# Rearranging: [HA]/[A-] = 10 ** (pKa - pH)
# partial_charge =
# [A-]/[A]total = [A-]/([A-] + [HA]) = 1 / { ([A-] + [HA])/[A-] } =
# 1 / (1 + [HA]/[A-]) = 1 / (1 + 10 ** (pKa - pH)) for acidic residues;
# 1 / (1 + 10 ** (pH - pKa)) for basic residues
positive_charge = 0.0
for aa, pK in self.pos_pKs.items():
partial_charge = 1.0 / (10 ** (pH - pK) + 1.0)
positive_charge += self.charged_aas_content[aa] * partial_charge
negative_charge = 0.0
for aa, pK in self.neg_pKs.items():
partial_charge = 1.0 / (10 ** (pK - pH) + 1.0)
negative_charge += self.charged_aas_content[aa] * partial_charge
return positive_charge - negative_charge
# This is the action function, it tries different pH until the charge of
# the protein is 0 (or close).
def pi(self, pH=7.775, min_=4.05, max_=12):
r"""Calculate and return the isoelectric point as float.
This is a recursive function that uses bisection method.
Wiki on bisection: https://en.wikipedia.org/wiki/Bisection_method
Arguments:
- pH: the pH at which the current charge of the protein is computed.
This pH lies at the centre of the interval (mean of `min_` and `max_`).
- min\_: the minimum of the interval. Initial value defaults to 4.05,
which is below the theoretical minimum, when the protein is composed
exclusively of aspartate.
- max\_: the maximum of the the interval. Initial value defaults to 12,
which is above the theoretical maximum, when the protein is composed
exclusively of arginine.
"""
charge = self.charge_at_pH(pH)
if max_ - min_ > 0.0001:
if charge > 0.0:
min_ = pH
else:
max_ = pH
next_pH = (min_ + max_) / 2
return self.pi(next_pH, min_, max_)
return pH
if __name__ == "__main__":
from Bio._utils import run_doctest
run_doctest()