File size: 1,992 Bytes
8896a5f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
def parse(f, comment="#"):
"""
Parse a file in ``.fasta`` format.
:param f: Input file object
:type f: _io.TextIOWrapper
:param comment: Character used for comments
:type comment: str
:return: names, sequence
:rtype: list[str], list[str]
"""
starter = ">"
empty = ""
if "b" in f.mode:
comment = b"#"
starter = b">"
empty = b""
names = []
sequences = []
name = None
sequence = []
for line in f:
if line.startswith(comment):
continue
line = line.strip()
if line.startswith(starter):
if name is not None:
names.append(name)
sequences.append(empty.join(sequence))
name = line[1:]
sequence = []
else:
sequence.append(line.upper())
if name is not None:
names.append(name)
sequences.append(empty.join(sequence))
return names, sequences
def parse_directory(directory, extension=".seq"):
"""
Parse all files in a directory ending with ``extension``.
:param directory: Input directory
:type directory: str
:param extension: Extension of all files to read in
:type extension: str
:return: names, sequence
:rtype: list[str], list[str]
"""
names = []
sequences = []
for seqPath in os.listdir(directory):
if seqPath.endswith(extension):
n, s = parse(open(f"{directory}/{seqPath}", "rb"))
names.append(n[0].decode("utf-8").strip())
sequences.append(s[0].decode("utf-8").strip())
return names, sequences
def write(nam, seq, f):
"""
Write a file in ``.fasta`` format.
:param nam: List of names
:type nam: list[str]
:param seq: List of sequences
:type seq: list[str]
:param f: Output file object
:type f: _io.TextIOWrapper
"""
for n, s in zip(nam, seq):
f.write(">{}\n".format(n))
f.write("{}\n".format(s))
|