File size: 1,992 Bytes
8896a5f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
def parse(f, comment="#"):
    """
    Parse a file in ``.fasta`` format.

    :param f: Input file object
    :type f: _io.TextIOWrapper
    :param comment: Character used for comments
    :type comment: str

    :return: names, sequence
    :rtype: list[str], list[str]
    """
    starter = ">"
    empty = ""
    if "b" in f.mode:
        comment = b"#"
        starter = b">"
        empty = b""
    names = []
    sequences = []
    name = None
    sequence = []
    for line in f:
        if line.startswith(comment):
            continue
        line = line.strip()
        if line.startswith(starter):
            if name is not None:
                names.append(name)
                sequences.append(empty.join(sequence))
            name = line[1:]
            sequence = []
        else:
            sequence.append(line.upper())
    if name is not None:
        names.append(name)
        sequences.append(empty.join(sequence))

    return names, sequences


def parse_directory(directory, extension=".seq"):
    """
    Parse all files in a directory ending with ``extension``.

    :param directory: Input directory
    :type directory: str
    :param extension: Extension of all files to read in
    :type extension: str

    :return: names, sequence
    :rtype: list[str], list[str]
    """
    names = []
    sequences = []

    for seqPath in os.listdir(directory):
        if seqPath.endswith(extension):
            n, s = parse(open(f"{directory}/{seqPath}", "rb"))
            names.append(n[0].decode("utf-8").strip())
            sequences.append(s[0].decode("utf-8").strip())
    return names, sequences


def write(nam, seq, f):
    """
    Write a file in ``.fasta`` format.

    :param nam: List of names
    :type nam: list[str]
    :param seq: List of sequences
    :type seq: list[str]
    :param f: Output file object
    :type f: _io.TextIOWrapper
    """
    for n, s in zip(nam, seq):
        f.write(">{}\n".format(n))
        f.write("{}\n".format(s))