File size: 5,146 Bytes
b7731cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# Copyright 2009 by Michiel de Hoon. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.

"""Code for calling and parsing ScanProsite from ExPASy."""

# Importing these functions with leading underscore as not intended for reuse
from urllib.request import urlopen
from urllib.parse import urlencode

from xml.sax import handler
from xml.sax.expatreader import ExpatParser


class Record(list):
    """Represents search results returned by ScanProsite.

    This record is a list containing the search results returned by
    ScanProsite. The record also contains the data members n_match,
    n_seq, capped, and warning.
    """

    def __init__(self):
        """Initialize the class."""
        self.n_match = None
        self.n_seq = None
        self.capped = None
        self.warning = None


# October 28th 2020 it was recognised that between October 10th 2020 and October
# 28th the main url of prosite changed from https://www.expasy.org to
# https://prosite.expasy.org. Thus a change in the mirror was issued from
# https://www.expasy.org to https://prosite.expasy.org.
def scan(seq="", mirror="https://prosite.expasy.org", output="xml", **keywords):
    """Execute a ScanProsite search.

    Arguments:
     - mirror:   The ScanProsite mirror to be used
                 (default: https://prosite.expasy.org).
     - seq:      The query sequence, or UniProtKB (Swiss-Prot,
                 TrEMBL) accession
     - output:   Format of the search results
                 (default: xml)

    Further search parameters can be passed as keywords; see the
    documentation for programmatic access to ScanProsite at
    https://prosite.expasy.org/scanprosite/scanprosite_doc.html
    for a description of such parameters.

    This function returns a handle to the search results returned by
    ScanProsite. Search results in the XML format can be parsed into a
    Python object, by using the Bio.ExPASy.ScanProsite.read function.

    """
    parameters = {"seq": seq, "output": output}
    for key, value in keywords.items():
        if value is not None:
            parameters[key] = value
    command = urlencode(parameters)
    url = f"{mirror}/cgi-bin/prosite/PSScan.cgi?{command}"
    handle = urlopen(url)
    return handle


def read(handle):
    """Parse search results returned by ScanProsite into a Python object."""
    content_handler = ContentHandler()
    saxparser = Parser()
    saxparser.setContentHandler(content_handler)
    saxparser.parse(handle)
    record = content_handler.record
    return record


# The classes below are considered private


class Parser(ExpatParser):
    """Process the result from a ScanProsite search (PRIVATE)."""

    def __init__(self):
        """Initialize the class."""
        ExpatParser.__init__(self)
        self.firsttime = True

    def feed(self, data, isFinal=0):
        """Raise an Error if plain text is received in the data.

        This is to show the Error messages returned by ScanProsite.
        """
        # Error messages returned by the ScanProsite server are formatted as
        # as plain text instead of an XML document. To catch such error
        # messages, we override the feed method of the Expat parser.
        # The error message is (hopefully) contained in the data that was just
        # fed to the parser.
        if self.firsttime:
            if data[:5].decode("utf-8") != "<?xml":
                raise ValueError(data)
        self.firsttime = False
        return ExpatParser.feed(self, data, isFinal)


class ContentHandler(handler.ContentHandler):
    """Process and fill in the records, results of the search (PRIVATE)."""

    integers = ("start", "stop")
    strings = (
        "sequence_ac",
        "sequence_id",
        "sequence_db",
        "signature_ac",
        "level",
        "level_tag",
    )

    def __init__(self):
        """Initialize the class."""
        self.element = []

    def startElement(self, name, attrs):
        """Define the beginning of a record and stores the search record."""
        self.element.append(name)
        self.content = ""
        if self.element == ["matchset"]:
            self.record = Record()
            self.record.n_match = int(attrs["n_match"])
            self.record.n_seq = int(attrs["n_seq"])
        elif self.element == ["matchset", "match"]:
            match = {}
            self.record.append(match)

    def endElement(self, name):
        """Define the end of the search record."""
        assert name == self.element.pop()
        if self.element == ["matchset", "match"]:
            match = self.record[-1]
            if name in ContentHandler.integers:
                match[name] = int(self.content)
            elif name in ContentHandler.strings:
                match[name] = self.content
            else:
                # Unknown type, treat it as a string
                match[name] = self.content

    def characters(self, content):
        """Store the record content."""
        self.content += content