Spaces:
No application file
No application file
File size: 5,855 Bytes
b7731cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 |
# Copyright 2018 by Ariel Aptekmann.
# All rights reserved.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Module for the support of MEME minimal motif format."""
from Bio import motifs
def read(handle):
"""Parse the text output of the MEME program into a meme.Record object.
Examples
--------
>>> from Bio.motifs import minimal
>>> with open("motifs/meme.out") as f:
... record = minimal.read(f)
...
>>> for motif in record:
... print(motif.name, motif.evalue)
...
1 1.1e-22
You can access individual motifs in the record by their index or find a motif
by its name:
>>> from Bio import motifs
>>> with open("motifs/minimal_test.meme") as f:
... record = motifs.parse(f, 'minimal')
...
>>> motif = record[0]
>>> print(motif.name)
KRP
>>> motif = record['IFXA']
>>> print(motif.name)
IFXA
This function won't retrieve instances, as there are none in minimal meme format.
"""
motif_number = 0
record = Record()
_read_version(record, handle)
_read_alphabet(record, handle)
_read_background(record, handle)
while True:
for line in handle:
if line.startswith("MOTIF"):
break
else:
return record
name = line.split()[1]
motif_number += 1
length, num_occurrences, evalue = _read_motif_statistics(handle)
counts = _read_lpm(handle, num_occurrences)
# {'A': 0.25, 'C': 0.25, 'T': 0.25, 'G': 0.25}
motif = motifs.Motif(alphabet=record.alphabet, counts=counts)
motif.background = record.background
motif.length = length
motif.num_occurrences = num_occurrences
motif.evalue = evalue
motif.name = name
record.append(motif)
assert len(record) == motif_number
return record
class Record(list):
"""Class for holding the results of a minimal MEME run."""
def __init__(self):
"""Initialize record class values."""
self.version = ""
self.datafile = ""
self.command = ""
self.alphabet = None
self.background = {}
self.sequences = []
def __getitem__(self, key):
"""Return the motif of index key."""
if isinstance(key, str):
for motif in self:
if motif.name == key:
return motif
else:
return list.__getitem__(self, key)
# Everything below is private
def _read_background(record, handle):
"""Read background letter frequencies (PRIVATE)."""
for line in handle:
if line.startswith("Background letter frequencies"):
break
else:
raise ValueError(
"Improper input file. File should contain a line starting background frequencies."
)
try:
line = next(handle)
except StopIteration:
raise ValueError(
"Unexpected end of stream: Expected to find line starting background frequencies."
)
line = line.strip()
ls = line.split()
A, C, G, T = float(ls[1]), float(ls[3]), float(ls[5]), float(ls[7])
record.background = {"A": A, "C": C, "G": G, "T": T}
def _read_version(record, handle):
"""Read MEME version (PRIVATE)."""
for line in handle:
if line.startswith("MEME version"):
break
else:
raise ValueError(
"Improper input file. File should contain a line starting MEME version."
)
line = line.strip()
ls = line.split()
record.version = ls[2]
def _read_alphabet(record, handle):
"""Read alphabet (PRIVATE)."""
for line in handle:
if line.startswith("ALPHABET"):
break
else:
raise ValueError(
"Unexpected end of stream: Expected to find line starting with 'ALPHABET'"
)
if not line.startswith("ALPHABET= "):
raise ValueError("Line does not start with 'ALPHABET':\n%s" % line)
line = line.strip().replace("ALPHABET= ", "")
if line == "ACGT":
al = "ACGT"
else:
al = "ACDEFGHIKLMNPQRSTVWY"
record.alphabet = al
def _read_lpm(handle, num_occurrences):
"""Read letter probability matrix (PRIVATE)."""
counts = [[], [], [], []]
for line in handle:
freqs = line.split()
if len(freqs) != 4:
break
counts[0].append(round(float(freqs[0]) * num_occurrences))
counts[1].append(round(float(freqs[1]) * num_occurrences))
counts[2].append(round(float(freqs[2]) * num_occurrences))
counts[3].append(round(float(freqs[3]) * num_occurrences))
c = {}
c["A"] = counts[0]
c["C"] = counts[1]
c["G"] = counts[2]
c["T"] = counts[3]
return c
def _read_motif_statistics(handle):
"""Read motif statistics (PRIVATE)."""
# minimal :
# letter-probability matrix: alength= 4 w= 19 nsites= 17 E= 4.1e-009
for line in handle:
if line.startswith("letter-probability matrix:"):
break
num_occurrences = int(line.split("nsites=")[1].split()[0])
length = int(line.split("w=")[1].split()[0])
evalue = float(line.split("E=")[1].split()[0])
return length, num_occurrences, evalue
def _read_motif_name(handle):
"""Read motif name (PRIVATE)."""
for line in handle:
if "sorted by position p-value" in line:
break
else:
raise ValueError("Unexpected end of stream: Failed to find motif name")
line = line.strip()
words = line.split()
name = " ".join(words[0:2])
return name
if __name__ == "__main__":
from Bio._utils import run_doctest
run_doctest()
|