File size: 2,134 Bytes
44459bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
"""Helpers for handling fasta files."""
from __future__ import annotations
from pathlib import Path
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
from folding_studio_data_models.exceptions import FastaValidationError
def validate_fasta(
fasta_input: str | Path,
allow_multimer: bool = True,
max_aa_length: int | None = None,
str_output: bool = False,
) -> SeqRecord | list[SeqRecord] | str:
"""Validate a fasta content.
Args:
fasta_input (str | Path): Input fasta.
allow_multimer (bool, optional): Allow mutlimer in the fasta representation. Defaults to True.
max_aa_length (int | None, optional): Max AA lenght supported. Defaults to None.
Raises:
FastaValidationError: If no sequence found in the FASTA content.
FastaValidationError: If one or more sequence has 0 residue.
FastaValidationError: If only monomer are supported but a multimer was given.
FastaValidationError: If unuspported sequence lenght in FASTA content
Returns:
SeqRecord | list[SeqRecord]: Unique SeqRecord if `allow_multimer` set to False,
otherwise a list of SeqRecord.
"""
records: list[SeqRecord] = list(SeqIO.parse(fasta_input, "fasta"))
if not records:
raise FastaValidationError("No sequence found in the FASTA content.")
elif any(len(record.seq) == 0 for record in records):
raise FastaValidationError("One or more sequence has 0 residue.")
elif not allow_multimer and len(records) > 1:
raise FastaValidationError(
"Only monomer are supported but a multimer was given."
)
elif max_aa_length is not None and any(
len(record.seq) > max_aa_length for record in records
):
raise FastaValidationError(
"Unuspported sequence lenght in FASTA content. "
f"Max supported sequence lenght is {max_aa_length}AA."
)
if str_output:
return "\n".join(
f">{record.description}\n{str(record.seq)}" for record in records
)
elif allow_multimer:
return records
return records[0]
|