File size: 4,531 Bytes
b7731cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# Copyright 2022 by Michiel de Hoon.  All rights reserved.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Bio.Align support for A2M files.

A2M files are alignment files created by align2model or hmmscore in the SAM
Sequence Alignment and Modeling Software System.
"""
from Bio.Align import Alignment
from Bio.Align import interfaces
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord


class AlignmentWriter(interfaces.AlignmentWriter):
    """Alignment file writer for the A2M file format."""

    fmt = "A2M"

    def format_alignment(self, alignment):
        """Return a string with the alignment in the A2M file format."""
        if not isinstance(alignment, Alignment):
            raise TypeError("Expected an Alignment object")
        lines = []
        state = alignment.column_annotations["state"]
        for sequence, line in zip(alignment.sequences, alignment):
            try:
                name = sequence.id
            except AttributeError:
                name = ""
            try:
                description = sequence.description
            except AttributeError:
                description = ""
            if description:
                lines.append(f">{name} {description}")
            else:
                lines.append(f">{name}")
            s = ""
            for c, m in zip(line, state):
                if m == "D":
                    s += c.upper()
                elif m == "I":
                    if c == "-":
                        s += "."
                    else:
                        s += c.lower()
            lines.append(s)
        return "\n".join(lines) + "\n"

    write_alignments = interfaces.AlignmentWriter.write_single_alignment


class AlignmentIterator(interfaces.AlignmentIterator):
    """Alignment iterator for files in the A2M file format.

    An A2M file contains one multiple alignment. Matches are represented by
    upper case letters and deletions by dashes in alignment columns containing
    matches or deletions only. Insertions are represented by lower case letters,
    with gaps aligned to the insertion shown as periods.  Header lines start
    with '>' followed by the name of the sequence, and optionally a description.
    """

    fmt = "A2M"

    def _read_next_alignment(self, stream):
        names = []
        descriptions = []
        lines = []
        for line in stream:
            if line.startswith(">"):
                parts = line[1:].rstrip().split(None, 1)
                try:
                    name = parts[0]
                except IndexError:
                    name = ""
                try:
                    description = parts[1]
                except IndexError:
                    description = ""
                names.append(name)
                descriptions.append(description)
                lines.append("")
            else:
                lines[-1] += line.strip()
        if not lines:
            raise ValueError("Empty file.")
        state = ""
        for c in lines[0]:
            if c == "-" or c.isupper():
                state += "D"  # Match/deletion state
            elif c == "." or c.islower():
                state += "I"  # Insertion state
            else:
                raise Exception("Unexpected letter '%s' in alignment" % c)
        for line in lines[1:]:
            for c, m in zip(line, state):
                if m == "D":  # Match/deletion state
                    assert c == "-" or c.isupper()
                elif m == "I":  # Insertion state
                    assert c == "." or c.islower()
                else:
                    raise Exception("Unexpected letter '%s' in alignment" % c)
        for i, line in enumerate(lines):
            lines[i] = line.upper().replace(".", "-")
        coordinates = Alignment.infer_coordinates(lines)
        records = []
        for name, description, line in zip(names, descriptions, lines):
            line = line.replace("-", "")
            sequence = Seq(line)
            record = SeqRecord(sequence, name, description=description)
            records.append(record)
        alignment = Alignment(records, coordinates)
        alignment.column_annotations = {}
        alignment.column_annotations["state"] = state
        self._close()  # a2m files contain only one alignment
        return alignment