|
""" |
|
This module contains the core classes of version 2.0 of SAX for Python. |
|
This file provides only default classes with absolutely minimum |
|
functionality, from which drivers and applications can be subclassed. |
|
|
|
Many of these classes are empty and are included only as documentation |
|
of the interfaces. |
|
|
|
$Id$ |
|
""" |
|
|
|
version = '2.0beta' |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ErrorHandler: |
|
"""Basic interface for SAX error handlers. |
|
|
|
If you create an object that implements this interface, then |
|
register the object with your XMLReader, the parser will call the |
|
methods in your object to report all warnings and errors. There |
|
are three levels of errors available: warnings, (possibly) |
|
recoverable errors, and unrecoverable errors. All methods take a |
|
SAXParseException as the only parameter.""" |
|
|
|
def error(self, exception): |
|
"Handle a recoverable error." |
|
raise exception |
|
|
|
def fatalError(self, exception): |
|
"Handle a non-recoverable error." |
|
raise exception |
|
|
|
def warning(self, exception): |
|
"Handle a warning." |
|
print(exception) |
|
|
|
|
|
|
|
|
|
class ContentHandler: |
|
"""Interface for receiving logical document content events. |
|
|
|
This is the main callback interface in SAX, and the one most |
|
important to applications. The order of events in this interface |
|
mirrors the order of the information in the document.""" |
|
|
|
def __init__(self): |
|
self._locator = None |
|
|
|
def setDocumentLocator(self, locator): |
|
"""Called by the parser to give the application a locator for |
|
locating the origin of document events. |
|
|
|
SAX parsers are strongly encouraged (though not absolutely |
|
required) to supply a locator: if it does so, it must supply |
|
the locator to the application by invoking this method before |
|
invoking any of the other methods in the DocumentHandler |
|
interface. |
|
|
|
The locator allows the application to determine the end |
|
position of any document-related event, even if the parser is |
|
not reporting an error. Typically, the application will use |
|
this information for reporting its own errors (such as |
|
character content that does not match an application's |
|
business rules). The information returned by the locator is |
|
probably not sufficient for use with a search engine. |
|
|
|
Note that the locator will return correct information only |
|
during the invocation of the events in this interface. The |
|
application should not attempt to use it at any other time.""" |
|
self._locator = locator |
|
|
|
def startDocument(self): |
|
"""Receive notification of the beginning of a document. |
|
|
|
The SAX parser will invoke this method only once, before any |
|
other methods in this interface or in DTDHandler (except for |
|
setDocumentLocator).""" |
|
|
|
def endDocument(self): |
|
"""Receive notification of the end of a document. |
|
|
|
The SAX parser will invoke this method only once, and it will |
|
be the last method invoked during the parse. The parser shall |
|
not invoke this method until it has either abandoned parsing |
|
(because of an unrecoverable error) or reached the end of |
|
input.""" |
|
|
|
def startPrefixMapping(self, prefix, uri): |
|
"""Begin the scope of a prefix-URI Namespace mapping. |
|
|
|
The information from this event is not necessary for normal |
|
Namespace processing: the SAX XML reader will automatically |
|
replace prefixes for element and attribute names when the |
|
http://xml.org/sax/features/namespaces feature is true (the |
|
default). |
|
|
|
There are cases, however, when applications need to use |
|
prefixes in character data or in attribute values, where they |
|
cannot safely be expanded automatically; the |
|
start/endPrefixMapping event supplies the information to the |
|
application to expand prefixes in those contexts itself, if |
|
necessary. |
|
|
|
Note that start/endPrefixMapping events are not guaranteed to |
|
be properly nested relative to each-other: all |
|
startPrefixMapping events will occur before the corresponding |
|
startElement event, and all endPrefixMapping events will occur |
|
after the corresponding endElement event, but their order is |
|
not guaranteed.""" |
|
|
|
def endPrefixMapping(self, prefix): |
|
"""End the scope of a prefix-URI mapping. |
|
|
|
See startPrefixMapping for details. This event will always |
|
occur after the corresponding endElement event, but the order |
|
of endPrefixMapping events is not otherwise guaranteed.""" |
|
|
|
def startElement(self, name, attrs): |
|
"""Signals the start of an element in non-namespace mode. |
|
|
|
The name parameter contains the raw XML 1.0 name of the |
|
element type as a string and the attrs parameter holds an |
|
instance of the Attributes class containing the attributes of |
|
the element.""" |
|
|
|
def endElement(self, name): |
|
"""Signals the end of an element in non-namespace mode. |
|
|
|
The name parameter contains the name of the element type, just |
|
as with the startElement event.""" |
|
|
|
def startElementNS(self, name, qname, attrs): |
|
"""Signals the start of an element in namespace mode. |
|
|
|
The name parameter contains the name of the element type as a |
|
(uri, localname) tuple, the qname parameter the raw XML 1.0 |
|
name used in the source document, and the attrs parameter |
|
holds an instance of the Attributes class containing the |
|
attributes of the element. |
|
|
|
The uri part of the name tuple is None for elements which have |
|
no namespace.""" |
|
|
|
def endElementNS(self, name, qname): |
|
"""Signals the end of an element in namespace mode. |
|
|
|
The name parameter contains the name of the element type, just |
|
as with the startElementNS event.""" |
|
|
|
def characters(self, content): |
|
"""Receive notification of character data. |
|
|
|
The Parser will call this method to report each chunk of |
|
character data. SAX parsers may return all contiguous |
|
character data in a single chunk, or they may split it into |
|
several chunks; however, all of the characters in any single |
|
event must come from the same external entity so that the |
|
Locator provides useful information.""" |
|
|
|
def ignorableWhitespace(self, whitespace): |
|
"""Receive notification of ignorable whitespace in element content. |
|
|
|
Validating Parsers must use this method to report each chunk |
|
of ignorable whitespace (see the W3C XML 1.0 recommendation, |
|
section 2.10): non-validating parsers may also use this method |
|
if they are capable of parsing and using content models. |
|
|
|
SAX parsers may return all contiguous whitespace in a single |
|
chunk, or they may split it into several chunks; however, all |
|
of the characters in any single event must come from the same |
|
external entity, so that the Locator provides useful |
|
information.""" |
|
|
|
def processingInstruction(self, target, data): |
|
"""Receive notification of a processing instruction. |
|
|
|
The Parser will invoke this method once for each processing |
|
instruction found: note that processing instructions may occur |
|
before or after the main document element. |
|
|
|
A SAX parser should never report an XML declaration (XML 1.0, |
|
section 2.8) or a text declaration (XML 1.0, section 4.3.1) |
|
using this method.""" |
|
|
|
def skippedEntity(self, name): |
|
"""Receive notification of a skipped entity. |
|
|
|
The Parser will invoke this method once for each entity |
|
skipped. Non-validating processors may skip entities if they |
|
have not seen the declarations (because, for example, the |
|
entity was declared in an external DTD subset). All processors |
|
may skip external entities, depending on the values of the |
|
http://xml.org/sax/features/external-general-entities and the |
|
http://xml.org/sax/features/external-parameter-entities |
|
properties.""" |
|
|
|
|
|
|
|
|
|
class DTDHandler: |
|
"""Handle DTD events. |
|
|
|
This interface specifies only those DTD events required for basic |
|
parsing (unparsed entities and attributes).""" |
|
|
|
def notationDecl(self, name, publicId, systemId): |
|
"Handle a notation declaration event." |
|
|
|
def unparsedEntityDecl(self, name, publicId, systemId, ndata): |
|
"Handle an unparsed entity declaration event." |
|
|
|
|
|
|
|
|
|
class EntityResolver: |
|
"""Basic interface for resolving entities. If you create an object |
|
implementing this interface, then register the object with your |
|
Parser, the parser will call the method in your object to |
|
resolve all external entities. Note that DefaultHandler implements |
|
this interface with the default behaviour.""" |
|
|
|
def resolveEntity(self, publicId, systemId): |
|
"""Resolve the system identifier of an entity and return either |
|
the system identifier to read from as a string, or an InputSource |
|
to read from.""" |
|
return systemId |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
feature_namespaces = "http://xml.org/sax/features/namespaces" |
|
|
|
|
|
|
|
|
|
|
|
feature_namespace_prefixes = "http://xml.org/sax/features/namespace-prefixes" |
|
|
|
|
|
|
|
|
|
|
|
|
|
feature_string_interning = "http://xml.org/sax/features/string-interning" |
|
|
|
|
|
|
|
|
|
|
|
feature_validation = "http://xml.org/sax/features/validation" |
|
|
|
|
|
|
|
|
|
|
|
feature_external_ges = "http://xml.org/sax/features/external-general-entities" |
|
|
|
|
|
|
|
|
|
feature_external_pes = "http://xml.org/sax/features/external-parameter-entities" |
|
|
|
|
|
|
|
|
|
|
|
|
|
all_features = [feature_namespaces, |
|
feature_namespace_prefixes, |
|
feature_string_interning, |
|
feature_validation, |
|
feature_external_ges, |
|
feature_external_pes] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
property_lexical_handler = "http://xml.org/sax/properties/lexical-handler" |
|
|
|
|
|
|
|
|
|
property_declaration_handler = "http://xml.org/sax/properties/declaration-handler" |
|
|
|
|
|
|
|
|
|
|
|
property_dom_node = "http://xml.org/sax/properties/dom-node" |
|
|
|
|
|
|
|
|
|
|
|
|
|
property_xml_string = "http://xml.org/sax/properties/xml-string" |
|
|
|
|
|
|
|
|
|
|
|
property_encoding = "http://www.python.org/sax/properties/encoding" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
property_interning_dict = "http://www.python.org/sax/properties/interning-dict" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
all_properties = [property_lexical_handler, |
|
property_dom_node, |
|
property_declaration_handler, |
|
property_xml_string, |
|
property_encoding, |
|
property_interning_dict] |
|
|
|
|
|
class LexicalHandler: |
|
"""Optional SAX2 handler for lexical events. |
|
|
|
This handler is used to obtain lexical information about an XML |
|
document, that is, information about how the document was encoded |
|
(as opposed to what it contains, which is reported to the |
|
ContentHandler), such as comments and CDATA marked section |
|
boundaries. |
|
|
|
To set the LexicalHandler of an XMLReader, use the setProperty |
|
method with the property identifier |
|
'http://xml.org/sax/properties/lexical-handler'.""" |
|
|
|
def comment(self, content): |
|
"""Reports a comment anywhere in the document (including the |
|
DTD and outside the document element). |
|
|
|
content is a string that holds the contents of the comment.""" |
|
|
|
def startDTD(self, name, public_id, system_id): |
|
"""Report the start of the DTD declarations, if the document |
|
has an associated DTD. |
|
|
|
A startEntity event will be reported before declaration events |
|
from the external DTD subset are reported, and this can be |
|
used to infer from which subset DTD declarations derive. |
|
|
|
name is the name of the document element type, public_id the |
|
public identifier of the DTD (or None if none were supplied) |
|
and system_id the system identfier of the external subset (or |
|
None if none were supplied).""" |
|
|
|
def endDTD(self): |
|
"""Signals the end of DTD declarations.""" |
|
|
|
def startCDATA(self): |
|
"""Reports the beginning of a CDATA marked section. |
|
|
|
The contents of the CDATA marked section will be reported |
|
through the characters event.""" |
|
|
|
def endCDATA(self): |
|
"""Reports the end of a CDATA marked section.""" |
|
|