Spaces:
No application file
No application file
# Copyright 2001 by Gavin E. Crooks. All rights reserved. | |
# Revisions copyright 2010 Jeffrey Finkelstein. All rights reserved. | |
# | |
# This file is part of the Biopython distribution and governed by your | |
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
# Please see the LICENSE file that should have been included as part of this | |
# package. | |
"""Handle the SCOP CLAssification file, which describes SCOP domains. | |
The file format is described in the scop | |
"release notes.":http://scop.mrc-lmb.cam.ac.uk/scop/release-notes.html | |
The latest CLA file can be found | |
"elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/ | |
"Release 1.73": http://scop.mrc-lmb.cam.ac.uk/scop/parse/dir.cla.scop.txt_1.73 | |
(July 2008) | |
""" | |
from . import Residues | |
class Record: | |
"""Holds information for one SCOP domain. | |
Attributes: | |
- sid - SCOP identifier. e.g. d1danl2 | |
- residues - The domain definition as a Residues object | |
- sccs - SCOP concise classification strings. e.g. b.1.2.1 | |
- sunid - SCOP unique identifier for this domain | |
- hierarchy - A dictionary, keys are nodetype, values are sunid, | |
describing the location of this domain in the SCOP hierarchy. See | |
the Scop module for a description of nodetypes. This used to be a | |
list of (key,value) tuples in older versions of Biopython (see | |
Bug 3109). | |
""" | |
def __init__(self, line=None): | |
"""Initialize the class.""" | |
self.sid = "" | |
self.residues = None | |
self.sccs = "" | |
self.sunid = "" | |
self.hierarchy = {} | |
if line: | |
self._process(line) | |
def _process(self, line): | |
line = line.rstrip() # no trailing whitespace | |
columns = line.split("\t") # separate the tab-delineated cols | |
if len(columns) != 6: | |
raise ValueError(f"I don't understand the format of {line}") | |
self.sid, pdbid, residues, self.sccs, self.sunid, hierarchy = columns | |
self.residues = Residues.Residues(residues) | |
self.residues.pdbid = pdbid | |
self.sunid = int(self.sunid) | |
for ht in hierarchy.split(","): | |
key, value = ht.split("=") | |
self.hierarchy[key] = int(value) | |
def __str__(self): | |
"""Represent the SCOP classification record as a tab-separated string.""" | |
s = [] | |
s.append(self.sid) | |
s += str(self.residues).split(" ") | |
s.append(self.sccs) | |
s.append(self.sunid) | |
s.append( | |
",".join( | |
"=".join((key, str(value))) for key, value in self.hierarchy.items() | |
) | |
) | |
return "\t".join(map(str, s)) + "\n" | |
def parse(handle): | |
"""Iterate over a CLA file as Cla records for each line. | |
Arguments: | |
- handle - file-like object. | |
""" | |
for line in handle: | |
if line.startswith("#"): | |
continue | |
yield Record(line) | |
class Index(dict): | |
"""A CLA file indexed by SCOP identifiers for rapid random access.""" | |
def __init__(self, filename): | |
"""Create CLA index. | |
Arguments: | |
- filename - The file to index | |
""" | |
dict.__init__(self) | |
self.filename = filename | |
with open(self.filename) as f: | |
position = 0 | |
while True: | |
line = f.readline() | |
if not line: | |
break | |
if line.startswith("#"): | |
continue | |
record = Record(line) | |
key = record.sid | |
if key is not None: | |
self[key] = position | |
position = f.tell() | |
def __getitem__(self, key): | |
"""Return an item from the indexed file.""" | |
position = dict.__getitem__(self, key) | |
with open(self.filename) as f: | |
f.seek(position) | |
line = f.readline() | |
record = Record(line) | |
return record | |