Spaces:
No application file
No application file
# Copyright (C) 2002, Thomas Hamelryck ([email protected]) | |
# | |
# This file is part of the Biopython distribution and governed by your | |
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
# Please see the LICENSE file that should have been included as part of this | |
# package. | |
"""Classify protein backbone structure with Kolodny et al's fragment libraries. | |
It can be regarded as a form of objective secondary structure classification. | |
Only fragments of length 5 or 7 are supported (ie. there is a 'central' | |
residue). | |
Full reference: | |
Kolodny R, Koehl P, Guibas L, Levitt M. | |
Small libraries of protein fragments model native protein structures accurately. | |
J Mol Biol. 2002 323(2):297-307. | |
The definition files of the fragments can be obtained from: | |
http://github.com/csblab/fragments/ | |
You need these files to use this module. | |
The following example uses the library with 10 fragments of length 5. | |
The library files can be found in directory 'fragment_data'. | |
>>> from Bio.PDB.PDBParser import PDBParser | |
>>> from Bio.PDB.FragmentMapper import FragmentMapper | |
>>> parser = PDBParser() | |
>>> structure = parser.get_structure("1a8o", "PDB/1A8O.pdb") | |
>>> model = structure[0] | |
>>> fm = FragmentMapper(model, lsize=10, flength=5, fdir="PDB") | |
>>> chain = model['A'] | |
>>> res152 = chain[152] | |
>>> res157 = chain[157] | |
>>> res152 in fm # is res152 mapped? (fragment of a C-alpha polypeptide) | |
False | |
>>> res157 in fm # is res157 mapped? (fragment of a C-alpha polypeptide) | |
True | |
""" | |
import numpy | |
from Bio.SVDSuperimposer import SVDSuperimposer | |
from Bio.PDB.PDBExceptions import PDBException | |
from Bio.PDB.Polypeptide import PPBuilder | |
# fragment file (lib_SIZE_z_LENGTH.txt) | |
# SIZE=number of fragments | |
# LENGTH=length of fragment (4,5,6,7) | |
_FRAGMENT_FILE = "lib_%s_z_%s.txt" | |
def _read_fragments(size, length, dir="."): | |
"""Read a fragment spec file (PRIVATE). | |
Read a fragment spec file available from | |
http://github.com/csblab/fragments/ | |
and return a list of Fragment objects. | |
:param size: number of fragments in the library | |
:type size: int | |
:param length: length of the fragments | |
:type length: int | |
:param dir: directory where the fragment spec files can be found | |
:type dir: string | |
""" | |
filename = (dir + "/" + _FRAGMENT_FILE) % (size, length) | |
with open(filename) as fp: | |
flist = [] | |
# ID of fragment=rank in spec file | |
fid = 0 | |
for line in fp: | |
# skip comment and blank lines | |
if line[0] == "*" or line[0] == "\n": | |
continue | |
sl = line.split() | |
if sl[1] == "------": | |
# Start of fragment definition | |
f = Fragment(length, fid) | |
flist.append(f) | |
# increase fragment id (rank) | |
fid += 1 | |
continue | |
# Add CA coord to Fragment | |
coord = numpy.array([float(x) for x in sl[0:3]]) | |
# XXX= dummy residue name | |
f.add_residue("XXX", coord) | |
return flist | |
class Fragment: | |
"""Represent a polypeptide C-alpha fragment.""" | |
def __init__(self, length, fid): | |
"""Initialize fragment object. | |
:param length: length of the fragment | |
:type length: int | |
:param fid: id for the fragment | |
:type fid: int | |
""" | |
# nr of residues in fragment | |
self.length = length | |
# nr of residues added | |
self.counter = 0 | |
self.resname_list = [] | |
# CA coordinate matrix | |
self.coords_ca = numpy.zeros((length, 3), "d") | |
self.fid = fid | |
def get_resname_list(self): | |
"""Get residue list. | |
:return: the residue names | |
:rtype: [string, string,...] | |
""" | |
return self.resname_list | |
def get_id(self): | |
"""Get identifier for the fragment. | |
:return: id for the fragment | |
:rtype: int | |
""" | |
return self.fid | |
def get_coords(self): | |
"""Get the CA coordinates in the fragment. | |
:return: the CA coords in the fragment | |
:rtype: Numeric (Nx3) array | |
""" | |
return self.coords_ca | |
def add_residue(self, resname, ca_coord): | |
"""Add a residue. | |
:param resname: residue name (eg. GLY). | |
:type resname: string | |
:param ca_coord: the c-alpha coordinates of the residues | |
:type ca_coord: Numeric array with length 3 | |
""" | |
if self.counter >= self.length: | |
raise PDBException("Fragment boundary exceeded.") | |
self.resname_list.append(resname) | |
self.coords_ca[self.counter] = ca_coord | |
self.counter = self.counter + 1 | |
def __len__(self): | |
"""Return length of the fragment.""" | |
return self.length | |
def __sub__(self, other): | |
"""Return rmsd between two fragments. | |
:return: rmsd between fragments | |
:rtype: float | |
Examples | |
-------- | |
This is an incomplete but illustrative example:: | |
rmsd = fragment1 - fragment2 | |
""" | |
sup = SVDSuperimposer() | |
sup.set(self.coords_ca, other.coords_ca) | |
sup.run() | |
return sup.get_rms() | |
def __repr__(self): | |
"""Represent the fragment object as a string. | |
Returns <Fragment length=L id=ID> where L=length of fragment | |
and ID the identifier (rank in the library). | |
""" | |
return "<Fragment length=%i id=%i>" % (self.length, self.fid) | |
def _make_fragment_list(pp, length): | |
"""Dice up a peptide in fragments of length "length" (PRIVATE). | |
:param pp: a list of residues (part of one peptide) | |
:type pp: [L{Residue}, L{Residue}, ...] | |
:param length: fragment length | |
:type length: int | |
""" | |
frag_list = [] | |
for i in range(0, len(pp) - length + 1): | |
f = Fragment(length, -1) | |
for j in range(0, length): | |
residue = pp[i + j] | |
resname = residue.get_resname() | |
if residue.has_id("CA"): | |
ca = residue["CA"] | |
else: | |
raise PDBException("CHAINBREAK") | |
if ca.is_disordered(): | |
raise PDBException("CHAINBREAK") | |
ca_coord = ca.get_coord() | |
f.add_residue(resname, ca_coord) | |
frag_list.append(f) | |
return frag_list | |
def _map_fragment_list(flist, reflist): | |
"""Map flist fragments to closest entry in reflist (PRIVATE). | |
Map all frgaments in flist to the closest (in RMSD) fragment in reflist. | |
Returns a list of reflist indices. | |
:param flist: list of protein fragments | |
:type flist: [L{Fragment}, L{Fragment}, ...] | |
:param reflist: list of reference (ie. library) fragments | |
:type reflist: [L{Fragment}, L{Fragment}, ...] | |
""" | |
mapped = [] | |
for f in flist: | |
rank = [] | |
for i in range(0, len(reflist)): | |
rf = reflist[i] | |
rms = f - rf | |
rank.append((rms, rf)) | |
rank.sort() | |
fragment = rank[0][1] | |
mapped.append(fragment) | |
return mapped | |
class FragmentMapper: | |
"""Map polypeptides in a model to lists of representative fragments.""" | |
def __init__(self, model, lsize=20, flength=5, fdir="."): | |
"""Create instance of FragmentMapper. | |
:param model: the model that will be mapped | |
:type model: L{Model} | |
:param lsize: number of fragments in the library | |
:type lsize: int | |
:param flength: length of fragments in the library | |
:type flength: int | |
:param fdir: directory where the definition files are | |
found (default=".") | |
:type fdir: string | |
""" | |
if flength == 5: | |
self.edge = 2 | |
elif flength == 7: | |
self.edge = 3 | |
else: | |
raise PDBException("Fragment length should be 5 or 7.") | |
self.flength = flength | |
self.lsize = lsize | |
self.reflist = _read_fragments(lsize, flength, fdir) | |
self.model = model | |
self.fd = self._map(self.model) | |
def _map(self, model): | |
"""Map (PRIVATE). | |
:param model: the model that will be mapped | |
:type model: L{Model} | |
""" | |
ppb = PPBuilder() | |
ppl = ppb.build_peptides(model) | |
fd = {} | |
for pp in ppl: | |
try: | |
# make fragments | |
flist = _make_fragment_list(pp, self.flength) | |
# classify fragments | |
mflist = _map_fragment_list(flist, self.reflist) | |
for i in range(0, len(pp)): | |
res = pp[i] | |
if i < self.edge: | |
# start residues | |
continue | |
elif i >= (len(pp) - self.edge): | |
# end residues | |
continue | |
else: | |
# fragment | |
index = i - self.edge | |
assert index >= 0 | |
fd[res] = mflist[index] | |
except PDBException as why: | |
if why == "CHAINBREAK": | |
# Funny polypeptide - skip | |
pass | |
else: | |
raise PDBException(why) from None | |
return fd | |
def __contains__(self, res): | |
"""Check if the given residue is in any of the mapped fragments. | |
:type res: L{Residue} | |
""" | |
return res in self.fd | |
def __getitem__(self, res): | |
"""Get an entry. | |
:type res: L{Residue} | |
:return: fragment classification | |
:rtype: L{Fragment} | |
""" | |
return self.fd[res] | |