File size: 2,963 Bytes
b7731cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# Copyright 2001 by Gavin E. Crooks.  All rights reserved.
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Handle the SCOP HIErarchy files.

The SCOP Hierarchy files describe the SCOP hierarchy in terms of SCOP
unique identifiers (sunid).

The file format is described in the SCOP `release notes
<http://scop.berkeley.edu/release-notes-1.55.html>`_.

The latest HIE file can be found `elsewhere at SCOP
<http://scop.mrc-lmb.cam.ac.uk/scop/parse/>`_.

`Release 1.55 <http://scop.berkeley.edu/parse/dir.hie.scop.txt_1.55>`_
(July 2001).
"""
# TODO - Update the above URLs


class Record:
    """Holds information for one node in the SCOP hierarchy.

    Attributes:
     - sunid - SCOP unique identifiers of this node
     - parent - Parents sunid
     - children - Sequence of children sunids

    """

    def __init__(self, line=None):
        """Initialize the class."""
        self.sunid = ""
        self.parent = ""
        self.children = []
        if line:
            self._process(line)

    def _process(self, line):
        """Parse HIE records (PRIVATE).

        Records consist of 3 tab deliminated fields; node's sunid,
        parent's sunid, and a list of children's sunids.
        """
        # For example ::
        #
        # 0       -       46456,48724,51349,53931,56572,56835,56992,57942
        # 21953   49268   -
        # 49267   49266   49268,49269
        line = line.rstrip()  # no trailing whitespace
        columns = line.split("\t")  # separate the tab-delineated cols
        if len(columns) != 3:
            raise ValueError(f"I don't understand the format of {line}")

        sunid, parent, children = columns

        if sunid == "-":
            self.sunid = ""
        else:
            self.sunid = int(sunid)

        if parent == "-":
            self.parent = ""
        else:
            self.parent = int(parent)

        if children == "-":
            self.children = ()
        else:
            children = children.split(",")
            self.children = [int(x) for x in children]

    def __str__(self):
        """Represent the SCOP hierarchy record as a string."""
        s = []
        s.append(str(self.sunid))

        if self.parent:
            s.append(str(self.parent))
        else:
            if self.sunid != 0:
                s.append("0")
            else:
                s.append("-")

        if self.children:
            s.append(",".join(str(x) for x in self.children))
        else:
            s.append("-")

        return "\t".join(s) + "\n"


def parse(handle):
    """Iterate over a HIE file as Hie records for each line.

    Arguments:
     - handle - file-like object.

    """
    for line in handle:
        if line.startswith("#"):
            continue
        yield Record(line)