Spaces:
No application file
No application file
# Copyright 2001 by Gavin E. Crooks. All rights reserved. | |
# This file is part of the Biopython distribution and governed by your | |
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
# Please see the LICENSE file that should have been included as part of this | |
# package. | |
"""Handle the SCOP HIErarchy files. | |
The SCOP Hierarchy files describe the SCOP hierarchy in terms of SCOP | |
unique identifiers (sunid). | |
The file format is described in the SCOP `release notes | |
<http://scop.berkeley.edu/release-notes-1.55.html>`_. | |
The latest HIE file can be found `elsewhere at SCOP | |
<http://scop.mrc-lmb.cam.ac.uk/scop/parse/>`_. | |
`Release 1.55 <http://scop.berkeley.edu/parse/dir.hie.scop.txt_1.55>`_ | |
(July 2001). | |
""" | |
# TODO - Update the above URLs | |
class Record: | |
"""Holds information for one node in the SCOP hierarchy. | |
Attributes: | |
- sunid - SCOP unique identifiers of this node | |
- parent - Parents sunid | |
- children - Sequence of children sunids | |
""" | |
def __init__(self, line=None): | |
"""Initialize the class.""" | |
self.sunid = "" | |
self.parent = "" | |
self.children = [] | |
if line: | |
self._process(line) | |
def _process(self, line): | |
"""Parse HIE records (PRIVATE). | |
Records consist of 3 tab deliminated fields; node's sunid, | |
parent's sunid, and a list of children's sunids. | |
""" | |
# For example :: | |
# | |
# 0 - 46456,48724,51349,53931,56572,56835,56992,57942 | |
# 21953 49268 - | |
# 49267 49266 49268,49269 | |
line = line.rstrip() # no trailing whitespace | |
columns = line.split("\t") # separate the tab-delineated cols | |
if len(columns) != 3: | |
raise ValueError(f"I don't understand the format of {line}") | |
sunid, parent, children = columns | |
if sunid == "-": | |
self.sunid = "" | |
else: | |
self.sunid = int(sunid) | |
if parent == "-": | |
self.parent = "" | |
else: | |
self.parent = int(parent) | |
if children == "-": | |
self.children = () | |
else: | |
children = children.split(",") | |
self.children = [int(x) for x in children] | |
def __str__(self): | |
"""Represent the SCOP hierarchy record as a string.""" | |
s = [] | |
s.append(str(self.sunid)) | |
if self.parent: | |
s.append(str(self.parent)) | |
else: | |
if self.sunid != 0: | |
s.append("0") | |
else: | |
s.append("-") | |
if self.children: | |
s.append(",".join(str(x) for x in self.children)) | |
else: | |
s.append("-") | |
return "\t".join(s) + "\n" | |
def parse(handle): | |
"""Iterate over a HIE file as Hie records for each line. | |
Arguments: | |
- handle - file-like object. | |
""" | |
for line in handle: | |
if line.startswith("#"): | |
continue | |
yield Record(line) | |