File size: 4,640 Bytes
d1ceb73 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
# defusedxml
#
# Copyright (c) 2013 by Christian Heimes <[email protected]>
# Licensed to PSF under a Contributor Agreement.
# See https://www.python.org/psf/license for licensing details.
"""Defused xml.etree.ElementTree facade
"""
from __future__ import print_function, absolute_import
import sys
import warnings
from xml.etree.ElementTree import ParseError
from xml.etree.ElementTree import TreeBuilder as _TreeBuilder
from xml.etree.ElementTree import parse as _parse
from xml.etree.ElementTree import tostring
from .common import PY3
if PY3:
import importlib
else:
from xml.etree.ElementTree import XMLParser as _XMLParser
from xml.etree.ElementTree import iterparse as _iterparse
from .common import (
DTDForbidden,
EntitiesForbidden,
ExternalReferenceForbidden,
_generate_etree_functions,
)
__origin__ = "xml.etree.ElementTree"
def _get_py3_cls():
"""Python 3.3 hides the pure Python code but defusedxml requires it.
The code is based on test.support.import_fresh_module().
"""
pymodname = "xml.etree.ElementTree"
cmodname = "_elementtree"
pymod = sys.modules.pop(pymodname, None)
cmod = sys.modules.pop(cmodname, None)
sys.modules[cmodname] = None
try:
pure_pymod = importlib.import_module(pymodname)
finally:
# restore module
sys.modules[pymodname] = pymod
if cmod is not None:
sys.modules[cmodname] = cmod
else:
sys.modules.pop(cmodname, None)
# restore attribute on original package
etree_pkg = sys.modules["xml.etree"]
if pymod is not None:
etree_pkg.ElementTree = pymod
elif hasattr(etree_pkg, "ElementTree"):
del etree_pkg.ElementTree
_XMLParser = pure_pymod.XMLParser
_iterparse = pure_pymod.iterparse
# patch pure module to use ParseError from C extension
pure_pymod.ParseError = ParseError
return _XMLParser, _iterparse
if PY3:
_XMLParser, _iterparse = _get_py3_cls()
_sentinel = object()
class DefusedXMLParser(_XMLParser):
def __init__(
self,
html=_sentinel,
target=None,
encoding=None,
forbid_dtd=False,
forbid_entities=True,
forbid_external=True,
):
# Python 2.x old style class
_XMLParser.__init__(self, target=target, encoding=encoding)
if html is not _sentinel:
# the 'html' argument has been deprecated and ignored in all
# supported versions of Python. Python 3.8 finally removed it.
if html:
raise TypeError("'html=True' is no longer supported.")
else:
warnings.warn(
"'html' keyword argument is no longer supported. Pass "
"in arguments as keyword arguments.",
category=DeprecationWarning,
)
self.forbid_dtd = forbid_dtd
self.forbid_entities = forbid_entities
self.forbid_external = forbid_external
if PY3:
parser = self.parser
else:
parser = self._parser
if self.forbid_dtd:
parser.StartDoctypeDeclHandler = self.defused_start_doctype_decl
if self.forbid_entities:
parser.EntityDeclHandler = self.defused_entity_decl
parser.UnparsedEntityDeclHandler = self.defused_unparsed_entity_decl
if self.forbid_external:
parser.ExternalEntityRefHandler = self.defused_external_entity_ref_handler
def defused_start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
raise DTDForbidden(name, sysid, pubid)
def defused_entity_decl(
self, name, is_parameter_entity, value, base, sysid, pubid, notation_name
):
raise EntitiesForbidden(name, value, base, sysid, pubid, notation_name)
def defused_unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
# expat 1.2
raise EntitiesForbidden(name, None, base, sysid, pubid, notation_name) # pragma: no cover
def defused_external_entity_ref_handler(self, context, base, sysid, pubid):
raise ExternalReferenceForbidden(context, base, sysid, pubid)
# aliases
# XMLParse is a typo, keep it for backwards compatibility
XMLTreeBuilder = XMLParse = XMLParser = DefusedXMLParser
parse, iterparse, fromstring = _generate_etree_functions(
DefusedXMLParser, _TreeBuilder, _parse, _iterparse
)
XML = fromstring
__all__ = [
"ParseError",
"XML",
"XMLParse",
"XMLParser",
"XMLTreeBuilder",
"fromstring",
"iterparse",
"parse",
"tostring",
]
|