aakash0017's picture
Upload folder using huggingface_hub
b7731cd
# Copyright 2013 by Leighton Pritchard. All rights reserved.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Classes and functions to parse a KGML pathway map.
The KGML pathway map is parsed into the object structure defined in
KGML_Pathway.py in this module.
Classes:
- KGMLParser - Parses KGML file
Functions:
- read - Returns a single Pathway object, using KGMLParser internally
"""
from xml.etree import ElementTree
from io import StringIO
from Bio.KEGG.KGML.KGML_pathway import Component, Entry, Graphics
from Bio.KEGG.KGML.KGML_pathway import Pathway, Reaction, Relation
def read(handle):
"""Parse a single KEGG Pathway from given file handle.
Returns a single Pathway object. There should be one and only
one pathway in each file, but there may well be pathological
examples out there.
"""
pathways = parse(handle)
try:
pathway = next(pathways)
except StopIteration:
raise ValueError("No pathways found in handle") from None
try:
next(pathways)
raise ValueError("More than one pathway found in handle")
except StopIteration:
pass
return pathway
def parse(handle):
"""Return an iterator over Pathway elements.
Arguments:
- handle - file handle to a KGML file for parsing, or a KGML string
This is a generator for the return of multiple Pathway objects.
"""
# Check handle
try:
handle.read(0)
except AttributeError:
try:
handle = StringIO(handle)
except TypeError:
raise TypeError(
"An XML-containing handle or an XML string must be provided"
) from None
# Parse XML and return each Pathway
for event, elem in ElementTree.iterparse(handle, events=("start", "end")):
if event == "end" and elem.tag == "pathway":
yield KGMLParser(elem).parse()
elem.clear()
class KGMLParser:
"""Parses a KGML XML Pathway entry into a Pathway object.
Example: Read and parse large metabolism file
>>> from Bio.KEGG.KGML.KGML_parser import read
>>> pathway = read(open('KEGG/ko01100.xml', 'r'))
>>> print(len(pathway.entries))
3628
>>> print(len(pathway.reactions))
1672
>>> print(len(pathway.maps))
149
>>> pathway = read(open('KEGG/ko00010.xml', 'r'))
>>> print(pathway) #doctest: +NORMALIZE_WHITESPACE
Pathway: Glycolysis / Gluconeogenesis
KEGG ID: path:ko00010
Image file: http://www.kegg.jp/kegg/pathway/ko/ko00010.png
Organism: ko
Entries: 99
Entry types:
ortholog: 61
compound: 31
map: 7
"""
def __init__(self, elem):
"""Initialize the class."""
self.entry = elem
def parse(self):
"""Parse the input elements."""
def _parse_pathway(attrib):
for k, v in attrib.items():
self.pathway.__setattr__(k, v)
def _parse_entry(element):
new_entry = Entry()
for k, v in element.attrib.items():
new_entry.__setattr__(k, v)
for subelement in element:
if subelement.tag == "graphics":
_parse_graphics(subelement, new_entry)
elif subelement.tag == "component":
_parse_component(subelement, new_entry)
self.pathway.add_entry(new_entry)
def _parse_graphics(element, entry):
new_graphics = Graphics(entry)
for k, v in element.attrib.items():
new_graphics.__setattr__(k, v)
entry.add_graphics(new_graphics)
def _parse_component(element, entry):
new_component = Component(entry)
for k, v in element.attrib.items():
new_component.__setattr__(k, v)
entry.add_component(new_component)
def _parse_reaction(element):
new_reaction = Reaction()
for k, v in element.attrib.items():
new_reaction.__setattr__(k, v)
for subelement in element:
if subelement.tag == "substrate":
new_reaction.add_substrate(int(subelement.attrib["id"]))
elif subelement.tag == "product":
new_reaction.add_product(int(subelement.attrib["id"]))
self.pathway.add_reaction(new_reaction)
def _parse_relation(element):
new_relation = Relation()
new_relation.entry1 = int(element.attrib["entry1"])
new_relation.entry2 = int(element.attrib["entry2"])
new_relation.type = element.attrib["type"]
for subtype in element:
name, value = subtype.attrib["name"], subtype.attrib["value"]
if name in ("compound", "hidden compound"):
new_relation.subtypes.append((name, int(value)))
else:
new_relation.subtypes.append((name, value))
self.pathway.add_relation(new_relation)
# ==========
# Initialize Pathway
self.pathway = Pathway()
# Get information about the pathway itself
_parse_pathway(self.entry.attrib)
for element in self.entry:
if element.tag == "entry":
_parse_entry(element)
elif element.tag == "reaction":
_parse_reaction(element)
elif element.tag == "relation":
_parse_relation(element)
# Parsing of some elements not implemented - no examples yet
else:
# This should warn us of any unimplemented tags
import warnings
from Bio import BiopythonParserWarning
warnings.warn(
f"Warning: tag {element.tag} not implemented in parser",
BiopythonParserWarning,
)
return self.pathway
if __name__ == "__main__":
from Bio._utils import run_doctest
run_doctest(verbose=0)