aakash0017's picture
Upload folder using huggingface_hub
b7731cd
# Copyright 2009 by Michiel de Hoon. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Code for calling and parsing ScanProsite from ExPASy."""
# Importing these functions with leading underscore as not intended for reuse
from urllib.request import urlopen
from urllib.parse import urlencode
from xml.sax import handler
from xml.sax.expatreader import ExpatParser
class Record(list):
"""Represents search results returned by ScanProsite.
This record is a list containing the search results returned by
ScanProsite. The record also contains the data members n_match,
n_seq, capped, and warning.
"""
def __init__(self):
"""Initialize the class."""
self.n_match = None
self.n_seq = None
self.capped = None
self.warning = None
# October 28th 2020 it was recognised that between October 10th 2020 and October
# 28th the main url of prosite changed from https://www.expasy.org to
# https://prosite.expasy.org. Thus a change in the mirror was issued from
# https://www.expasy.org to https://prosite.expasy.org.
def scan(seq="", mirror="https://prosite.expasy.org", output="xml", **keywords):
"""Execute a ScanProsite search.
Arguments:
- mirror: The ScanProsite mirror to be used
(default: https://prosite.expasy.org).
- seq: The query sequence, or UniProtKB (Swiss-Prot,
TrEMBL) accession
- output: Format of the search results
(default: xml)
Further search parameters can be passed as keywords; see the
documentation for programmatic access to ScanProsite at
https://prosite.expasy.org/scanprosite/scanprosite_doc.html
for a description of such parameters.
This function returns a handle to the search results returned by
ScanProsite. Search results in the XML format can be parsed into a
Python object, by using the Bio.ExPASy.ScanProsite.read function.
"""
parameters = {"seq": seq, "output": output}
for key, value in keywords.items():
if value is not None:
parameters[key] = value
command = urlencode(parameters)
url = f"{mirror}/cgi-bin/prosite/PSScan.cgi?{command}"
handle = urlopen(url)
return handle
def read(handle):
"""Parse search results returned by ScanProsite into a Python object."""
content_handler = ContentHandler()
saxparser = Parser()
saxparser.setContentHandler(content_handler)
saxparser.parse(handle)
record = content_handler.record
return record
# The classes below are considered private
class Parser(ExpatParser):
"""Process the result from a ScanProsite search (PRIVATE)."""
def __init__(self):
"""Initialize the class."""
ExpatParser.__init__(self)
self.firsttime = True
def feed(self, data, isFinal=0):
"""Raise an Error if plain text is received in the data.
This is to show the Error messages returned by ScanProsite.
"""
# Error messages returned by the ScanProsite server are formatted as
# as plain text instead of an XML document. To catch such error
# messages, we override the feed method of the Expat parser.
# The error message is (hopefully) contained in the data that was just
# fed to the parser.
if self.firsttime:
if data[:5].decode("utf-8") != "<?xml":
raise ValueError(data)
self.firsttime = False
return ExpatParser.feed(self, data, isFinal)
class ContentHandler(handler.ContentHandler):
"""Process and fill in the records, results of the search (PRIVATE)."""
integers = ("start", "stop")
strings = (
"sequence_ac",
"sequence_id",
"sequence_db",
"signature_ac",
"level",
"level_tag",
)
def __init__(self):
"""Initialize the class."""
self.element = []
def startElement(self, name, attrs):
"""Define the beginning of a record and stores the search record."""
self.element.append(name)
self.content = ""
if self.element == ["matchset"]:
self.record = Record()
self.record.n_match = int(attrs["n_match"])
self.record.n_seq = int(attrs["n_seq"])
elif self.element == ["matchset", "match"]:
match = {}
self.record.append(match)
def endElement(self, name):
"""Define the end of the search record."""
assert name == self.element.pop()
if self.element == ["matchset", "match"]:
match = self.record[-1]
if name in ContentHandler.integers:
match[name] = int(self.content)
elif name in ContentHandler.strings:
match[name] = self.content
else:
# Unknown type, treat it as a string
match[name] = self.content
def characters(self, content):
"""Store the record content."""
self.content += content