Spaces:
No application file
No application file
File size: 2,609 Bytes
b7731cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
# Copyright 2015 by Gert Hulselmans. All rights reserved.
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Parse Cluster Buster position frequency matrix files."""
from Bio import motifs
class Record(list):
"""Class to store the information in a Cluster Buster matrix table.
The record inherits from a list containing the individual motifs.
"""
def __str__(self):
"""Return a string representation of the motifs in the Record object."""
return "\n".join(str(motif) for motif in self)
def read(handle):
"""Read motifs in Cluster Buster position frequency matrix format from a file handle.
Cluster Buster motif format: http://zlab.bu.edu/cluster-buster/help/cis-format.html
"""
motif_nbr = 0
record = Record()
nucleotide_counts = {"A": [], "C": [], "G": [], "T": []}
motif_name = ""
for line in handle:
line = line.strip()
if line:
if line.startswith(">"):
if motif_nbr != 0:
motif = motifs.Motif(alphabet="GATC", counts=nucleotide_counts)
motif.name = motif_name
record.append(motif)
motif_name = line[1:].strip()
nucleotide_counts = {"A": [], "C": [], "G": [], "T": []}
motif_nbr += 1
else:
if line.startswith("#"):
continue
matrix_columns = line.split()
if len(matrix_columns) == 4:
[
nucleotide_counts[nucleotide].append(float(nucleotide_count))
for nucleotide, nucleotide_count in zip(
["A", "C", "G", "T"], matrix_columns
)
]
motif = motifs.Motif(alphabet="GATC", counts=nucleotide_counts)
motif.name = motif_name
record.append(motif)
return record
def write(motifs):
"""Return the representation of motifs in Cluster Buster position frequency matrix format."""
lines = []
for m in motifs:
line = f">{m.name}\n"
lines.append(line)
for ACGT_counts in zip(
m.counts["A"], m.counts["C"], m.counts["G"], m.counts["T"]
):
lines.append("{:0.0f}\t{:0.0f}\t{:0.0f}\t{:0.0f}\n".format(*ACGT_counts))
# Finished; glue the lines together.
text = "".join(lines)
return text
|