Spaces:
No application file
No application file
# Copyright 2022 by Michiel de Hoon. All rights reserved. | |
# | |
# This file is part of the Biopython distribution and governed by your | |
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
# Please see the LICENSE file that should have been included as part of this | |
# package. | |
"""Bio.Align support for aligned FASTA files. | |
Aligned FASTA files are FASTA files in which alignment gaps in a sequence are | |
represented by dashes. Each sequence line in an aligned FASTA should have the | |
same length. | |
""" | |
from Bio.Align import Alignment | |
from Bio.Align import interfaces | |
from Bio.Seq import Seq | |
from Bio.SeqRecord import SeqRecord | |
class AlignmentWriter(interfaces.AlignmentWriter): | |
"""Alignment file writer for the aligned FASTA file format.""" | |
fmt = "FASTA" | |
def format_alignment(self, alignment): | |
"""Return a string with the alignment in aligned FASTA format.""" | |
if not isinstance(alignment, Alignment): | |
raise TypeError("Expected an Alignment object") | |
lines = [] | |
for sequence, line in zip(alignment.sequences, alignment): | |
try: | |
name = sequence.id | |
except AttributeError: # Seq or plain string | |
lines.append(">") | |
else: # SeqRecord | |
if sequence.description: | |
lines.append(f">{sequence.id} {sequence.description}") | |
else: | |
lines.append(f">{sequence.id}") | |
lines.append(line) | |
return "\n".join(lines) + "\n" | |
class AlignmentIterator(interfaces.AlignmentIterator): | |
"""Alignment iterator for aligned FASTA files. | |
An aligned FASTA file contains one multiple alignment. Alignment gaps are | |
represented by dashes in the sequence lines. Header lines start with '>' | |
followed by the name of the sequence, and optionally a description. | |
""" | |
fmt = "FASTA" | |
def _read_next_alignment(self, stream): | |
names = [] | |
descriptions = [] | |
lines = [] | |
for line in stream: | |
if line.startswith(">"): | |
parts = line[1:].rstrip().split(None, 1) | |
if len(parts) == 2: | |
name, description = parts | |
else: | |
description = "" | |
if len(parts) == 1: | |
name = parts[0] | |
else: | |
name = "" | |
names.append(name) | |
descriptions.append(description) | |
lines.append("") | |
else: | |
lines[-1] += line.strip() | |
if not lines: | |
raise ValueError("Empty file.") | |
coordinates = Alignment.infer_coordinates(lines) | |
records = [] | |
for name, description, line in zip(names, descriptions, lines): | |
line = line.replace("-", "") | |
sequence = Seq(line) | |
record = SeqRecord(sequence, id=name, description=description) | |
records.append(record) | |
alignment = Alignment(records, coordinates) | |
self._close() | |
return alignment | |