Spaces:
No application file
No application file
File size: 6,195 Bytes
b7731cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
# Copyright 2013 by Leighton Pritchard. All rights reserved.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Classes and functions to parse a KGML pathway map.
The KGML pathway map is parsed into the object structure defined in
KGML_Pathway.py in this module.
Classes:
- KGMLParser - Parses KGML file
Functions:
- read - Returns a single Pathway object, using KGMLParser internally
"""
from xml.etree import ElementTree
from io import StringIO
from Bio.KEGG.KGML.KGML_pathway import Component, Entry, Graphics
from Bio.KEGG.KGML.KGML_pathway import Pathway, Reaction, Relation
def read(handle):
"""Parse a single KEGG Pathway from given file handle.
Returns a single Pathway object. There should be one and only
one pathway in each file, but there may well be pathological
examples out there.
"""
pathways = parse(handle)
try:
pathway = next(pathways)
except StopIteration:
raise ValueError("No pathways found in handle") from None
try:
next(pathways)
raise ValueError("More than one pathway found in handle")
except StopIteration:
pass
return pathway
def parse(handle):
"""Return an iterator over Pathway elements.
Arguments:
- handle - file handle to a KGML file for parsing, or a KGML string
This is a generator for the return of multiple Pathway objects.
"""
# Check handle
try:
handle.read(0)
except AttributeError:
try:
handle = StringIO(handle)
except TypeError:
raise TypeError(
"An XML-containing handle or an XML string must be provided"
) from None
# Parse XML and return each Pathway
for event, elem in ElementTree.iterparse(handle, events=("start", "end")):
if event == "end" and elem.tag == "pathway":
yield KGMLParser(elem).parse()
elem.clear()
class KGMLParser:
"""Parses a KGML XML Pathway entry into a Pathway object.
Example: Read and parse large metabolism file
>>> from Bio.KEGG.KGML.KGML_parser import read
>>> pathway = read(open('KEGG/ko01100.xml', 'r'))
>>> print(len(pathway.entries))
3628
>>> print(len(pathway.reactions))
1672
>>> print(len(pathway.maps))
149
>>> pathway = read(open('KEGG/ko00010.xml', 'r'))
>>> print(pathway) #doctest: +NORMALIZE_WHITESPACE
Pathway: Glycolysis / Gluconeogenesis
KEGG ID: path:ko00010
Image file: http://www.kegg.jp/kegg/pathway/ko/ko00010.png
Organism: ko
Entries: 99
Entry types:
ortholog: 61
compound: 31
map: 7
"""
def __init__(self, elem):
"""Initialize the class."""
self.entry = elem
def parse(self):
"""Parse the input elements."""
def _parse_pathway(attrib):
for k, v in attrib.items():
self.pathway.__setattr__(k, v)
def _parse_entry(element):
new_entry = Entry()
for k, v in element.attrib.items():
new_entry.__setattr__(k, v)
for subelement in element:
if subelement.tag == "graphics":
_parse_graphics(subelement, new_entry)
elif subelement.tag == "component":
_parse_component(subelement, new_entry)
self.pathway.add_entry(new_entry)
def _parse_graphics(element, entry):
new_graphics = Graphics(entry)
for k, v in element.attrib.items():
new_graphics.__setattr__(k, v)
entry.add_graphics(new_graphics)
def _parse_component(element, entry):
new_component = Component(entry)
for k, v in element.attrib.items():
new_component.__setattr__(k, v)
entry.add_component(new_component)
def _parse_reaction(element):
new_reaction = Reaction()
for k, v in element.attrib.items():
new_reaction.__setattr__(k, v)
for subelement in element:
if subelement.tag == "substrate":
new_reaction.add_substrate(int(subelement.attrib["id"]))
elif subelement.tag == "product":
new_reaction.add_product(int(subelement.attrib["id"]))
self.pathway.add_reaction(new_reaction)
def _parse_relation(element):
new_relation = Relation()
new_relation.entry1 = int(element.attrib["entry1"])
new_relation.entry2 = int(element.attrib["entry2"])
new_relation.type = element.attrib["type"]
for subtype in element:
name, value = subtype.attrib["name"], subtype.attrib["value"]
if name in ("compound", "hidden compound"):
new_relation.subtypes.append((name, int(value)))
else:
new_relation.subtypes.append((name, value))
self.pathway.add_relation(new_relation)
# ==========
# Initialize Pathway
self.pathway = Pathway()
# Get information about the pathway itself
_parse_pathway(self.entry.attrib)
for element in self.entry:
if element.tag == "entry":
_parse_entry(element)
elif element.tag == "reaction":
_parse_reaction(element)
elif element.tag == "relation":
_parse_relation(element)
# Parsing of some elements not implemented - no examples yet
else:
# This should warn us of any unimplemented tags
import warnings
from Bio import BiopythonParserWarning
warnings.warn(
f"Warning: tag {element.tag} not implemented in parser",
BiopythonParserWarning,
)
return self.pathway
if __name__ == "__main__":
from Bio._utils import run_doctest
run_doctest(verbose=0)
|