File size: 4,403 Bytes
b7731cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# Copyright 2003 Iddo Friedberg. All rights reserved.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""A parser for the NCBI blastpgp version 2.2.5 output format.

Currently only supports the '-m 9' option, (table w/ annotations).
Returns a BlastTableRec instance
"""

import warnings

from Bio import BiopythonDeprecationWarning


warnings.warn(
    "Bio.Blast.ParseBlastTable.py has been deprecated, and will be removed "
    "in a future version of Biopython.  To parse tabular output from BLAST "
    "programs, please use the Bio.Align module:\n"
    "\n"
    "from Bio import Align\n"
    "alignments = Align.parse('myblastoutputfile.txt', 'tabular')\n"
    "\n"
    "and iterate over the alignments.",
    BiopythonDeprecationWarning,
)


class BlastTableEntry:
    """Container for Blast Table Entry, the field values from the table."""

    def __init__(self, in_rec):
        """Initialize the class."""
        bt_fields = in_rec.split()
        self.qid = bt_fields[0].split("|")
        self.sid = bt_fields[1].split("|")
        self.pid = float(bt_fields[2])
        self.ali_len = int(bt_fields[3])
        self.mis = int(bt_fields[4])
        self.gaps = int(bt_fields[5])
        self.q_bounds = (int(bt_fields[6]), int(bt_fields[7]))
        self.s_bounds = (int(bt_fields[8]), int(bt_fields[9]))
        self.e_value = float(bt_fields[10])
        self.bit_score = float(bt_fields[11])


class BlastTableRec:
    """Container for Blast Table record, list of Blast Table Entries."""

    def __init__(self):
        """Initialize the class."""
        self.program = None
        self.version = None
        self.date = None
        self.iteration = None
        self.query = None
        self.database = None
        self.entries = []

    def add_entry(self, entry):
        """Add entry to Blast Table."""
        self.entries.append(entry)


class BlastTableReader:
    """Reader for the output of blastpgp."""

    reader_keywords = {
        "BLASTP": "version",
        "Iteration": "iteration",
        "Query": "query",
        "Database": "database",
        "Fields": "fields",
    }

    def __init__(self, handle):
        """Initialize the class."""
        self.handle = handle
        inline = self.handle.readline()
        # zip forward to start of record
        while inline and "BLASTP" not in inline:
            inline = self.handle.readline()
        self._lookahead = inline
        self._n = 0
        self._in_header = 1

    def __next__(self):
        """Return the next record when iterating over the file."""
        self.table_record = BlastTableRec()
        self._n += 1
        inline = self._lookahead
        if not inline:
            return None
        while inline:
            if inline[0] == "#":
                if self._in_header:
                    self._in_header = self._consume_header(inline)
                else:
                    break
            else:
                self._consume_entry(inline)
                self._in_header = 0

            inline = self.handle.readline()
        self._lookahead = inline
        self._in_header = 1
        return self.table_record

    def _consume_entry(self, inline):
        current_entry = BlastTableEntry(inline)
        self.table_record.add_entry(current_entry)

    def _consume_header(self, inline):
        for keyword in self.reader_keywords:
            if keyword in inline:
                return self._Parse(f"_parse_{self.reader_keywords[keyword]}", inline)

    def _parse_version(self, inline):
        program, version, date = inline.split()[1:]
        self.table_record.program = program
        self.table_record.version = version
        self.table_record.date = date
        return 1

    def _parse_iteration(self, inline):
        self.table_record.iteration = int(inline.split()[2])
        return 1

    def _parse_query(self, inline):
        self.table_record.query = inline.split()[2:]
        return 1

    def _parse_database(self, inline):
        self.table_record.database = inline.split()[2]
        return 1

    def _parse_fields(self, inline):
        return 0

    def _Parse(self, method_name, inline):
        return getattr(self, method_name)(inline)