Spaces:
Running
Running
File size: 7,174 Bytes
122d3ff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
# Add x/html serialization to `Elementree`
# Taken from ElementTree 1.3 preview with slight modifications
#
# Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved.
#
# [email protected]
# https://www.pythonware.com/
#
# --------------------------------------------------------------------
# The ElementTree toolkit is
#
# Copyright (c) 1999-2007 by Fredrik Lundh
#
# By obtaining, using, and/or copying this software and/or its
# associated documentation, you agree that you have read, understood,
# and will comply with the following terms and conditions:
#
# Permission to use, copy, modify, and distribute this software and
# its associated documentation for any purpose and without fee is
# hereby granted, provided that the above copyright notice appears in
# all copies, and that both that copyright notice and this permission
# notice appear in supporting documentation, and that the name of
# Secret Labs AB or the author not be used in advertising or publicity
# pertaining to distribution of the software without specific, written
# prior permission.
#
# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
# OF THIS SOFTWARE.
# --------------------------------------------------------------------
"""
Python-Markdown provides two serializers which render [`ElementTree.Element`][xml.etree.ElementTree.Element]
objects to a string of HTML. Both functions wrap the same underlying code with only a few minor
differences as outlined below:
1. Empty (self-closing) tags are rendered as `<tag>` for HTML and as `<tag />` for XHTML.
2. Boolean attributes are rendered as `attrname` for HTML and as `attrname="attrname"` for XHTML.
"""
from __future__ import annotations
from xml.etree.ElementTree import ProcessingInstruction
from xml.etree.ElementTree import Comment, ElementTree, Element, QName, HTML_EMPTY
import re
from typing import Callable, Literal, NoReturn
__all__ = ['to_html_string', 'to_xhtml_string']
RE_AMP = re.compile(r'&(?!(?:\#[0-9]+|\#x[0-9a-f]+|[0-9a-z]+);)', re.I)
def _raise_serialization_error(text: str) -> NoReturn: # pragma: no cover
raise TypeError(
"cannot serialize {!r} (type {})".format(text, type(text).__name__)
)
def _escape_cdata(text) -> str:
# escape character data
try:
# it's worth avoiding do-nothing calls for strings that are
# shorter than 500 character, or so. assume that's, by far,
# the most common case in most applications.
if "&" in text:
# Only replace & when not part of an entity
text = RE_AMP.sub('&', text)
if "<" in text:
text = text.replace("<", "<")
if ">" in text:
text = text.replace(">", ">")
return text
except (TypeError, AttributeError): # pragma: no cover
_raise_serialization_error(text)
def _escape_attrib(text: str) -> str:
# escape attribute value
try:
if "&" in text:
# Only replace & when not part of an entity
text = RE_AMP.sub('&', text)
if "<" in text:
text = text.replace("<", "<")
if ">" in text:
text = text.replace(">", ">")
if "\"" in text:
text = text.replace("\"", """)
if "\n" in text:
text = text.replace("\n", " ")
return text
except (TypeError, AttributeError): # pragma: no cover
_raise_serialization_error(text)
def _escape_attrib_html(text: str) -> str:
# escape attribute value
try:
if "&" in text:
# Only replace & when not part of an entity
text = RE_AMP.sub('&', text)
if "<" in text:
text = text.replace("<", "<")
if ">" in text:
text = text.replace(">", ">")
if "\"" in text:
text = text.replace("\"", """)
return text
except (TypeError, AttributeError): # pragma: no cover
_raise_serialization_error(text)
def _serialize_html(write: Callable[[str], None], elem: Element, format: Literal["html", "xhtml"]) -> None:
tag = elem.tag
text = elem.text
if tag is Comment:
write("<!--%s-->" % _escape_cdata(text))
elif tag is ProcessingInstruction:
write("<?%s?>" % _escape_cdata(text))
elif tag is None:
if text:
write(_escape_cdata(text))
for e in elem:
_serialize_html(write, e, format)
else:
namespace_uri = None
if isinstance(tag, QName):
# `QNAME` objects store their data as a string: `{uri}tag`
if tag.text[:1] == "{":
namespace_uri, tag = tag.text[1:].split("}", 1)
else:
raise ValueError('QName objects must define a tag.')
write("<" + tag)
items = elem.items()
if items:
items = sorted(items) # lexical order
for k, v in items:
if isinstance(k, QName):
# Assume a text only `QName`
k = k.text
if isinstance(v, QName):
# Assume a text only `QName`
v = v.text
else:
v = _escape_attrib_html(v)
if k == v and format == 'html':
# handle boolean attributes
write(" %s" % v)
else:
write(' {}="{}"'.format(k, v))
if namespace_uri:
write(' xmlns="%s"' % (_escape_attrib(namespace_uri)))
if format == "xhtml" and tag.lower() in HTML_EMPTY:
write(" />")
else:
write(">")
if text:
if tag.lower() in ["script", "style"]:
write(text)
else:
write(_escape_cdata(text))
for e in elem:
_serialize_html(write, e, format)
if tag.lower() not in HTML_EMPTY:
write("</" + tag + ">")
if elem.tail:
write(_escape_cdata(elem.tail))
def _write_html(root: Element, format: Literal["html", "xhtml"] = "html") -> str:
assert root is not None
data: list[str] = []
write = data.append
_serialize_html(write, root, format)
return "".join(data)
# --------------------------------------------------------------------
# public functions
def to_html_string(element: Element) -> str:
""" Serialize element and its children to a string of HTML5. """
return _write_html(ElementTree(element).getroot(), format="html")
def to_xhtml_string(element: Element) -> str:
""" Serialize element and its children to a string of XHTML. """
return _write_html(ElementTree(element).getroot(), format="xhtml")
|