File size: 6,195 Bytes
b7731cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# Copyright 2013 by Leighton Pritchard.  All rights reserved.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.

"""Classes and functions to parse a KGML pathway map.

The KGML pathway map is parsed into the object structure defined in
KGML_Pathway.py in this module.

Classes:
 - KGMLParser - Parses KGML file

Functions:
 - read - Returns a single Pathway object, using KGMLParser internally

"""

from xml.etree import ElementTree

from io import StringIO

from Bio.KEGG.KGML.KGML_pathway import Component, Entry, Graphics
from Bio.KEGG.KGML.KGML_pathway import Pathway, Reaction, Relation


def read(handle):
    """Parse a single KEGG Pathway from given file handle.

    Returns a single Pathway object.  There should be one and only
    one pathway in each file, but there may well be pathological
    examples out there.
    """
    pathways = parse(handle)
    try:
        pathway = next(pathways)
    except StopIteration:
        raise ValueError("No pathways found in handle") from None
    try:
        next(pathways)
        raise ValueError("More than one pathway found in handle")
    except StopIteration:
        pass
    return pathway


def parse(handle):
    """Return an iterator over Pathway elements.

    Arguments:
     - handle - file handle to a KGML file for parsing, or a KGML string

    This is a generator for the return of multiple Pathway objects.

    """
    # Check handle
    try:
        handle.read(0)
    except AttributeError:
        try:
            handle = StringIO(handle)
        except TypeError:
            raise TypeError(
                "An XML-containing handle or an XML string must be provided"
            ) from None
    # Parse XML and return each Pathway
    for event, elem in ElementTree.iterparse(handle, events=("start", "end")):
        if event == "end" and elem.tag == "pathway":
            yield KGMLParser(elem).parse()
            elem.clear()


class KGMLParser:
    """Parses a KGML XML Pathway entry into a Pathway object.

    Example: Read and parse large metabolism file

    >>> from Bio.KEGG.KGML.KGML_parser import read
    >>> pathway = read(open('KEGG/ko01100.xml', 'r'))
    >>> print(len(pathway.entries))
    3628
    >>> print(len(pathway.reactions))
    1672
    >>> print(len(pathway.maps))
    149

    >>> pathway = read(open('KEGG/ko00010.xml', 'r'))
    >>> print(pathway) #doctest: +NORMALIZE_WHITESPACE
    Pathway: Glycolysis / Gluconeogenesis
    KEGG ID: path:ko00010
    Image file: http://www.kegg.jp/kegg/pathway/ko/ko00010.png
    Organism: ko
    Entries: 99
    Entry types:
        ortholog: 61
        compound: 31
        map: 7

    """

    def __init__(self, elem):
        """Initialize the class."""
        self.entry = elem

    def parse(self):
        """Parse the input elements."""

        def _parse_pathway(attrib):
            for k, v in attrib.items():
                self.pathway.__setattr__(k, v)

        def _parse_entry(element):
            new_entry = Entry()
            for k, v in element.attrib.items():
                new_entry.__setattr__(k, v)
            for subelement in element:
                if subelement.tag == "graphics":
                    _parse_graphics(subelement, new_entry)
                elif subelement.tag == "component":
                    _parse_component(subelement, new_entry)
            self.pathway.add_entry(new_entry)

        def _parse_graphics(element, entry):
            new_graphics = Graphics(entry)
            for k, v in element.attrib.items():
                new_graphics.__setattr__(k, v)
            entry.add_graphics(new_graphics)

        def _parse_component(element, entry):
            new_component = Component(entry)
            for k, v in element.attrib.items():
                new_component.__setattr__(k, v)
            entry.add_component(new_component)

        def _parse_reaction(element):
            new_reaction = Reaction()
            for k, v in element.attrib.items():
                new_reaction.__setattr__(k, v)
            for subelement in element:
                if subelement.tag == "substrate":
                    new_reaction.add_substrate(int(subelement.attrib["id"]))
                elif subelement.tag == "product":
                    new_reaction.add_product(int(subelement.attrib["id"]))
            self.pathway.add_reaction(new_reaction)

        def _parse_relation(element):
            new_relation = Relation()
            new_relation.entry1 = int(element.attrib["entry1"])
            new_relation.entry2 = int(element.attrib["entry2"])
            new_relation.type = element.attrib["type"]
            for subtype in element:
                name, value = subtype.attrib["name"], subtype.attrib["value"]
                if name in ("compound", "hidden compound"):
                    new_relation.subtypes.append((name, int(value)))
                else:
                    new_relation.subtypes.append((name, value))
            self.pathway.add_relation(new_relation)

        # ==========
        # Initialize Pathway
        self.pathway = Pathway()
        # Get information about the pathway itself
        _parse_pathway(self.entry.attrib)
        for element in self.entry:
            if element.tag == "entry":
                _parse_entry(element)
            elif element.tag == "reaction":
                _parse_reaction(element)
            elif element.tag == "relation":
                _parse_relation(element)
            # Parsing of some elements not implemented - no examples yet
            else:
                # This should warn us of any unimplemented tags
                import warnings
                from Bio import BiopythonParserWarning

                warnings.warn(
                    f"Warning: tag {element.tag} not implemented in parser",
                    BiopythonParserWarning,
                )
        return self.pathway


if __name__ == "__main__":
    from Bio._utils import run_doctest

    run_doctest(verbose=0)