Spaces:
No application file
No application file
# Copyright 2022 by Michiel de Hoon. All rights reserved. | |
# | |
# This file is part of the Biopython distribution and governed by your | |
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
# Please see the LICENSE file that should have been included as part of this | |
# package. | |
"""Bio.Align support for A2M files. | |
A2M files are alignment files created by align2model or hmmscore in the SAM | |
Sequence Alignment and Modeling Software System. | |
""" | |
from Bio.Align import Alignment | |
from Bio.Align import interfaces | |
from Bio.Seq import Seq | |
from Bio.SeqRecord import SeqRecord | |
class AlignmentWriter(interfaces.AlignmentWriter): | |
"""Alignment file writer for the A2M file format.""" | |
fmt = "A2M" | |
def format_alignment(self, alignment): | |
"""Return a string with the alignment in the A2M file format.""" | |
if not isinstance(alignment, Alignment): | |
raise TypeError("Expected an Alignment object") | |
lines = [] | |
state = alignment.column_annotations["state"] | |
for sequence, line in zip(alignment.sequences, alignment): | |
try: | |
name = sequence.id | |
except AttributeError: | |
name = "" | |
try: | |
description = sequence.description | |
except AttributeError: | |
description = "" | |
if description: | |
lines.append(f">{name} {description}") | |
else: | |
lines.append(f">{name}") | |
s = "" | |
for c, m in zip(line, state): | |
if m == "D": | |
s += c.upper() | |
elif m == "I": | |
if c == "-": | |
s += "." | |
else: | |
s += c.lower() | |
lines.append(s) | |
return "\n".join(lines) + "\n" | |
write_alignments = interfaces.AlignmentWriter.write_single_alignment | |
class AlignmentIterator(interfaces.AlignmentIterator): | |
"""Alignment iterator for files in the A2M file format. | |
An A2M file contains one multiple alignment. Matches are represented by | |
upper case letters and deletions by dashes in alignment columns containing | |
matches or deletions only. Insertions are represented by lower case letters, | |
with gaps aligned to the insertion shown as periods. Header lines start | |
with '>' followed by the name of the sequence, and optionally a description. | |
""" | |
fmt = "A2M" | |
def _read_next_alignment(self, stream): | |
names = [] | |
descriptions = [] | |
lines = [] | |
for line in stream: | |
if line.startswith(">"): | |
parts = line[1:].rstrip().split(None, 1) | |
try: | |
name = parts[0] | |
except IndexError: | |
name = "" | |
try: | |
description = parts[1] | |
except IndexError: | |
description = "" | |
names.append(name) | |
descriptions.append(description) | |
lines.append("") | |
else: | |
lines[-1] += line.strip() | |
if not lines: | |
raise ValueError("Empty file.") | |
state = "" | |
for c in lines[0]: | |
if c == "-" or c.isupper(): | |
state += "D" # Match/deletion state | |
elif c == "." or c.islower(): | |
state += "I" # Insertion state | |
else: | |
raise Exception("Unexpected letter '%s' in alignment" % c) | |
for line in lines[1:]: | |
for c, m in zip(line, state): | |
if m == "D": # Match/deletion state | |
assert c == "-" or c.isupper() | |
elif m == "I": # Insertion state | |
assert c == "." or c.islower() | |
else: | |
raise Exception("Unexpected letter '%s' in alignment" % c) | |
for i, line in enumerate(lines): | |
lines[i] = line.upper().replace(".", "-") | |
coordinates = Alignment.infer_coordinates(lines) | |
records = [] | |
for name, description, line in zip(names, descriptions, lines): | |
line = line.replace("-", "") | |
sequence = Seq(line) | |
record = SeqRecord(sequence, name, description=description) | |
records.append(record) | |
alignment = Alignment(records, coordinates) | |
alignment.column_annotations = {} | |
alignment.column_annotations["state"] = state | |
self._close() # a2m files contain only one alignment | |
return alignment | |