Spaces:
No application file
No application file
# Copyright 2018 by Ariel Aptekmann. | |
# All rights reserved. | |
# | |
# This file is part of the Biopython distribution and governed by your | |
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
# Please see the LICENSE file that should have been included as part of this | |
# package. | |
"""Module for the support of MEME minimal motif format.""" | |
from Bio import motifs | |
def read(handle): | |
"""Parse the text output of the MEME program into a meme.Record object. | |
Examples | |
-------- | |
>>> from Bio.motifs import minimal | |
>>> with open("motifs/meme.out") as f: | |
... record = minimal.read(f) | |
... | |
>>> for motif in record: | |
... print(motif.name, motif.evalue) | |
... | |
1 1.1e-22 | |
You can access individual motifs in the record by their index or find a motif | |
by its name: | |
>>> from Bio import motifs | |
>>> with open("motifs/minimal_test.meme") as f: | |
... record = motifs.parse(f, 'minimal') | |
... | |
>>> motif = record[0] | |
>>> print(motif.name) | |
KRP | |
>>> motif = record['IFXA'] | |
>>> print(motif.name) | |
IFXA | |
This function won't retrieve instances, as there are none in minimal meme format. | |
""" | |
motif_number = 0 | |
record = Record() | |
_read_version(record, handle) | |
_read_alphabet(record, handle) | |
_read_background(record, handle) | |
while True: | |
for line in handle: | |
if line.startswith("MOTIF"): | |
break | |
else: | |
return record | |
name = line.split()[1] | |
motif_number += 1 | |
length, num_occurrences, evalue = _read_motif_statistics(handle) | |
counts = _read_lpm(handle, num_occurrences) | |
# {'A': 0.25, 'C': 0.25, 'T': 0.25, 'G': 0.25} | |
motif = motifs.Motif(alphabet=record.alphabet, counts=counts) | |
motif.background = record.background | |
motif.length = length | |
motif.num_occurrences = num_occurrences | |
motif.evalue = evalue | |
motif.name = name | |
record.append(motif) | |
assert len(record) == motif_number | |
return record | |
class Record(list): | |
"""Class for holding the results of a minimal MEME run.""" | |
def __init__(self): | |
"""Initialize record class values.""" | |
self.version = "" | |
self.datafile = "" | |
self.command = "" | |
self.alphabet = None | |
self.background = {} | |
self.sequences = [] | |
def __getitem__(self, key): | |
"""Return the motif of index key.""" | |
if isinstance(key, str): | |
for motif in self: | |
if motif.name == key: | |
return motif | |
else: | |
return list.__getitem__(self, key) | |
# Everything below is private | |
def _read_background(record, handle): | |
"""Read background letter frequencies (PRIVATE).""" | |
for line in handle: | |
if line.startswith("Background letter frequencies"): | |
break | |
else: | |
raise ValueError( | |
"Improper input file. File should contain a line starting background frequencies." | |
) | |
try: | |
line = next(handle) | |
except StopIteration: | |
raise ValueError( | |
"Unexpected end of stream: Expected to find line starting background frequencies." | |
) | |
line = line.strip() | |
ls = line.split() | |
A, C, G, T = float(ls[1]), float(ls[3]), float(ls[5]), float(ls[7]) | |
record.background = {"A": A, "C": C, "G": G, "T": T} | |
def _read_version(record, handle): | |
"""Read MEME version (PRIVATE).""" | |
for line in handle: | |
if line.startswith("MEME version"): | |
break | |
else: | |
raise ValueError( | |
"Improper input file. File should contain a line starting MEME version." | |
) | |
line = line.strip() | |
ls = line.split() | |
record.version = ls[2] | |
def _read_alphabet(record, handle): | |
"""Read alphabet (PRIVATE).""" | |
for line in handle: | |
if line.startswith("ALPHABET"): | |
break | |
else: | |
raise ValueError( | |
"Unexpected end of stream: Expected to find line starting with 'ALPHABET'" | |
) | |
if not line.startswith("ALPHABET= "): | |
raise ValueError("Line does not start with 'ALPHABET':\n%s" % line) | |
line = line.strip().replace("ALPHABET= ", "") | |
if line == "ACGT": | |
al = "ACGT" | |
else: | |
al = "ACDEFGHIKLMNPQRSTVWY" | |
record.alphabet = al | |
def _read_lpm(handle, num_occurrences): | |
"""Read letter probability matrix (PRIVATE).""" | |
counts = [[], [], [], []] | |
for line in handle: | |
freqs = line.split() | |
if len(freqs) != 4: | |
break | |
counts[0].append(round(float(freqs[0]) * num_occurrences)) | |
counts[1].append(round(float(freqs[1]) * num_occurrences)) | |
counts[2].append(round(float(freqs[2]) * num_occurrences)) | |
counts[3].append(round(float(freqs[3]) * num_occurrences)) | |
c = {} | |
c["A"] = counts[0] | |
c["C"] = counts[1] | |
c["G"] = counts[2] | |
c["T"] = counts[3] | |
return c | |
def _read_motif_statistics(handle): | |
"""Read motif statistics (PRIVATE).""" | |
# minimal : | |
# letter-probability matrix: alength= 4 w= 19 nsites= 17 E= 4.1e-009 | |
for line in handle: | |
if line.startswith("letter-probability matrix:"): | |
break | |
num_occurrences = int(line.split("nsites=")[1].split()[0]) | |
length = int(line.split("w=")[1].split()[0]) | |
evalue = float(line.split("E=")[1].split()[0]) | |
return length, num_occurrences, evalue | |
def _read_motif_name(handle): | |
"""Read motif name (PRIVATE).""" | |
for line in handle: | |
if "sorted by position p-value" in line: | |
break | |
else: | |
raise ValueError("Unexpected end of stream: Failed to find motif name") | |
line = line.strip() | |
words = line.split() | |
name = " ".join(words[0:2]) | |
return name | |
if __name__ == "__main__": | |
from Bio._utils import run_doctest | |
run_doctest() | |