Spaces:
No application file
No application file
File size: 5,146 Bytes
b7731cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
# Copyright 2009 by Michiel de Hoon. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Code for calling and parsing ScanProsite from ExPASy."""
# Importing these functions with leading underscore as not intended for reuse
from urllib.request import urlopen
from urllib.parse import urlencode
from xml.sax import handler
from xml.sax.expatreader import ExpatParser
class Record(list):
"""Represents search results returned by ScanProsite.
This record is a list containing the search results returned by
ScanProsite. The record also contains the data members n_match,
n_seq, capped, and warning.
"""
def __init__(self):
"""Initialize the class."""
self.n_match = None
self.n_seq = None
self.capped = None
self.warning = None
# October 28th 2020 it was recognised that between October 10th 2020 and October
# 28th the main url of prosite changed from https://www.expasy.org to
# https://prosite.expasy.org. Thus a change in the mirror was issued from
# https://www.expasy.org to https://prosite.expasy.org.
def scan(seq="", mirror="https://prosite.expasy.org", output="xml", **keywords):
"""Execute a ScanProsite search.
Arguments:
- mirror: The ScanProsite mirror to be used
(default: https://prosite.expasy.org).
- seq: The query sequence, or UniProtKB (Swiss-Prot,
TrEMBL) accession
- output: Format of the search results
(default: xml)
Further search parameters can be passed as keywords; see the
documentation for programmatic access to ScanProsite at
https://prosite.expasy.org/scanprosite/scanprosite_doc.html
for a description of such parameters.
This function returns a handle to the search results returned by
ScanProsite. Search results in the XML format can be parsed into a
Python object, by using the Bio.ExPASy.ScanProsite.read function.
"""
parameters = {"seq": seq, "output": output}
for key, value in keywords.items():
if value is not None:
parameters[key] = value
command = urlencode(parameters)
url = f"{mirror}/cgi-bin/prosite/PSScan.cgi?{command}"
handle = urlopen(url)
return handle
def read(handle):
"""Parse search results returned by ScanProsite into a Python object."""
content_handler = ContentHandler()
saxparser = Parser()
saxparser.setContentHandler(content_handler)
saxparser.parse(handle)
record = content_handler.record
return record
# The classes below are considered private
class Parser(ExpatParser):
"""Process the result from a ScanProsite search (PRIVATE)."""
def __init__(self):
"""Initialize the class."""
ExpatParser.__init__(self)
self.firsttime = True
def feed(self, data, isFinal=0):
"""Raise an Error if plain text is received in the data.
This is to show the Error messages returned by ScanProsite.
"""
# Error messages returned by the ScanProsite server are formatted as
# as plain text instead of an XML document. To catch such error
# messages, we override the feed method of the Expat parser.
# The error message is (hopefully) contained in the data that was just
# fed to the parser.
if self.firsttime:
if data[:5].decode("utf-8") != "<?xml":
raise ValueError(data)
self.firsttime = False
return ExpatParser.feed(self, data, isFinal)
class ContentHandler(handler.ContentHandler):
"""Process and fill in the records, results of the search (PRIVATE)."""
integers = ("start", "stop")
strings = (
"sequence_ac",
"sequence_id",
"sequence_db",
"signature_ac",
"level",
"level_tag",
)
def __init__(self):
"""Initialize the class."""
self.element = []
def startElement(self, name, attrs):
"""Define the beginning of a record and stores the search record."""
self.element.append(name)
self.content = ""
if self.element == ["matchset"]:
self.record = Record()
self.record.n_match = int(attrs["n_match"])
self.record.n_seq = int(attrs["n_seq"])
elif self.element == ["matchset", "match"]:
match = {}
self.record.append(match)
def endElement(self, name):
"""Define the end of the search record."""
assert name == self.element.pop()
if self.element == ["matchset", "match"]:
match = self.record[-1]
if name in ContentHandler.integers:
match[name] = int(self.content)
elif name in ContentHandler.strings:
match[name] = self.content
else:
# Unknown type, treat it as a string
match[name] = self.content
def characters(self, content):
"""Store the record content."""
self.content += content
|