# Copyright 2008 by Bartek Wilczynski. # Revisions copyright 2019 by Victor Lin. # Adapted from Bio.MEME.Parser by Jason A. Hackney. All rights reserved. # This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. """Module for the support of MEME motif format.""" import xml.etree.ElementTree as ET from Bio import Seq from Bio import motifs def read(handle): """Parse the text output of the MEME program into a meme.Record object. Examples -------- >>> from Bio.motifs import meme >>> with open("motifs/meme.INO_up800.classic.oops.xml") as f: ... record = meme.read(f) >>> for motif in record: ... for instance in motif.instances: ... print(instance.motif_name, instance.sequence_name, instance.sequence_id, instance.strand, instance.pvalue) GSKGCATGTGAAA INO1 sequence_5 + 1.21e-08 GSKGCATGTGAAA FAS1 sequence_2 - 1.87e-08 GSKGCATGTGAAA ACC1 sequence_4 - 6.62e-08 GSKGCATGTGAAA CHO2 sequence_1 - 1.05e-07 GSKGCATGTGAAA CHO1 sequence_0 - 1.69e-07 GSKGCATGTGAAA FAS2 sequence_3 - 5.62e-07 GSKGCATGTGAAA OPI3 sequence_6 + 1.08e-06 TTGACWCYTGCYCWG CHO2 sequence_1 + 7.2e-10 TTGACWCYTGCYCWG OPI3 sequence_6 - 2.56e-08 TTGACWCYTGCYCWG ACC1 sequence_4 - 1.59e-07 TTGACWCYTGCYCWG CHO1 sequence_0 + 2.05e-07 TTGACWCYTGCYCWG FAS1 sequence_2 + 3.85e-07 TTGACWCYTGCYCWG FAS2 sequence_3 - 5.11e-07 TTGACWCYTGCYCWG INO1 sequence_5 + 8.01e-07 """ record = Record() try: xml_tree = ET.parse(handle) except ET.ParseError: raise ValueError( "Improper MEME XML input file. XML root tag should start with >> from Bio import motifs >>> with open("motifs/meme.INO_up800.classic.oops.xml") as f: ... record = motifs.parse(f, 'MEME') >>> motif = record[0] >>> print(motif.name) GSKGCATGTGAAA >>> motif = record['GSKGCATGTGAAA'] >>> print(motif.name) GSKGCATGTGAAA """ def __init__(self): """Initialize the class.""" self.version = "" self.datafile = "" self.command = "" self.alphabet = "" self.sequences = [] def __getitem__(self, key): """Return the motif of index key.""" if isinstance(key, str): for motif in self: if motif.name == key: return motif else: return list.__getitem__(self, key) # Everything below is private def __read_metadata(record, xml_tree): record.version = xml_tree.getroot().get("version") record.datafile = xml_tree.find("training_set").get("primary_sequences") record.command = xml_tree.find("model").find("command_line").text # TODO - background_frequencies, other metadata under model def __read_alphabet(record, xml_tree): alphabet_tree = ( xml_tree.find("training_set").find("letter_frequencies").find("alphabet_array") ) for value in alphabet_tree.findall("value"): record.alphabet += value.get("letter_id") def __get_sequence_id_name_map(xml_tree): return { sequence_tree.get("id"): sequence_tree.get("name") for sequence_tree in xml_tree.find("training_set").findall("sequence") } def __read_motifs(record, xml_tree, sequence_id_name_map): for motif_tree in xml_tree.find("motifs").findall("motif"): instances = [] for site_tree in motif_tree.find("contributing_sites").findall( "contributing_site" ): letters = [ letter_ref.get("letter_id") for letter_ref in site_tree.find("site").findall("letter_ref") ] sequence = "".join(letters) instance = Instance(sequence) instance.motif_name = motif_tree.get("name") instance.sequence_id = site_tree.get("sequence_id") instance.sequence_name = sequence_id_name_map[instance.sequence_id] # TODO - left flank, right flank instance.start = int(site_tree.get("position")) + 1 instance.pvalue = float(site_tree.get("pvalue")) instance.strand = __convert_strand(site_tree.get("strand")) instance.length = len(sequence) instances.append(instance) instances = motifs.Instances(instances, record.alphabet) motif = Motif(record.alphabet, instances) motif.id = motif_tree.get("id") motif.name = motif_tree.get("name") motif.alt_id = motif_tree.get("alt") motif.length = int(motif_tree.get("width")) motif.num_occurrences = int(motif_tree.get("sites")) motif.evalue = float(motif_tree.get("e_value")) # TODO - ic, re, llr, pvalue, bayes_threshold, elapsed_time record.append(motif) def __convert_strand(strand): """Convert strand (+/-) from XML if present. Default: + """ if strand == "minus": return "-" if strand == "plus" or strand == "none": return "+" if __name__ == "__main__": from Bio._utils import run_doctest run_doctest()