Spaces:
Running
Running
File size: 7,820 Bytes
122d3ff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 |
# Attribute List Extension for Python-Markdown
# ============================================
# Adds attribute list syntax. Inspired by
# [Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s
# feature of the same name.
# See https://Python-Markdown.github.io/extensions/attr_list
# for documentation.
# Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/).
# All changes Copyright 2011-2014 The Python Markdown Project
# License: [BSD](https://opensource.org/licenses/bsd-license.php)
"""
Adds attribute list syntax. Inspired by
[Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s
feature of the same name.
See the [documentation](https://Python-Markdown.github.io/extensions/attr_list)
for details.
"""
from __future__ import annotations
from typing import TYPE_CHECKING
from . import Extension
from ..treeprocessors import Treeprocessor
import re
if TYPE_CHECKING: # pragma: no cover
from xml.etree.ElementTree import Element
def _handle_double_quote(s, t):
k, v = t.split('=', 1)
return k, v.strip('"')
def _handle_single_quote(s, t):
k, v = t.split('=', 1)
return k, v.strip("'")
def _handle_key_value(s, t):
return t.split('=', 1)
def _handle_word(s, t):
if t.startswith('.'):
return '.', t[1:]
if t.startswith('#'):
return 'id', t[1:]
return t, t
_scanner = re.Scanner([
(r'[^ =}]+=".*?"', _handle_double_quote),
(r"[^ =}]+='.*?'", _handle_single_quote),
(r'[^ =}]+=[^ =}]+', _handle_key_value),
(r'[^ =}]+', _handle_word),
(r' ', None)
])
def get_attrs_and_remainder(attrs_string: str) -> tuple[list[tuple[str, str]], str]:
""" Parse attribute list and return a list of attribute tuples.
Additionally, return any text that remained after a curly brace. In typical cases, its presence
should mean that the input does not match the intended attribute list syntax.
"""
attrs, remainder = _scanner.scan(attrs_string)
# To keep historic behavior, discard all unparsable text prior to '}'.
index = remainder.find('}')
remainder = remainder[index:] if index != -1 else ''
return attrs, remainder
def get_attrs(str: str) -> list[tuple[str, str]]: # pragma: no cover
""" Soft-deprecated. Prefer `get_attrs_and_remainder`. """
return get_attrs_and_remainder(str)[0]
def isheader(elem: Element) -> bool:
return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
class AttrListTreeprocessor(Treeprocessor):
BASE_RE = r'\{\:?[ ]*([^\}\n ][^\n]*)[ ]*\}'
HEADER_RE = re.compile(r'[ ]+{}[ ]*$'.format(BASE_RE))
BLOCK_RE = re.compile(r'\n[ ]*{}[ ]*$'.format(BASE_RE))
INLINE_RE = re.compile(r'^{}'.format(BASE_RE))
NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff'
r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d'
r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff'
r'\uf900-\ufdcf\ufdf0-\ufffd'
r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+')
def run(self, doc: Element) -> None:
for elem in doc.iter():
if self.md.is_block_level(elem.tag):
# Block level: check for `attrs` on last line of text
RE = self.BLOCK_RE
if isheader(elem) or elem.tag in ['dt', 'td', 'th']:
# header, def-term, or table cell: check for attributes at end of element
RE = self.HEADER_RE
if len(elem) and elem.tag == 'li':
# special case list items. children may include a `ul` or `ol`.
pos = None
# find the `ul` or `ol` position
for i, child in enumerate(elem):
if child.tag in ['ul', 'ol']:
pos = i
break
if pos is None and elem[-1].tail:
# use tail of last child. no `ul` or `ol`.
m = RE.search(elem[-1].tail)
if m:
if not self.assign_attrs(elem, m.group(1), strict=True):
elem[-1].tail = elem[-1].tail[:m.start()]
elif pos is not None and pos > 0 and elem[pos-1].tail:
# use tail of last child before `ul` or `ol`
m = RE.search(elem[pos-1].tail)
if m:
if not self.assign_attrs(elem, m.group(1), strict=True):
elem[pos-1].tail = elem[pos-1].tail[:m.start()]
elif elem.text:
# use text. `ul` is first child.
m = RE.search(elem.text)
if m:
if not self.assign_attrs(elem, m.group(1), strict=True):
elem.text = elem.text[:m.start()]
elif len(elem) and elem[-1].tail:
# has children. Get from tail of last child
m = RE.search(elem[-1].tail)
if m:
if not self.assign_attrs(elem, m.group(1), strict=True):
elem[-1].tail = elem[-1].tail[:m.start()]
if isheader(elem):
# clean up trailing #s
elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
elif elem.text:
# no children. Get from text.
m = RE.search(elem.text)
if m:
if not self.assign_attrs(elem, m.group(1), strict=True):
elem.text = elem.text[:m.start()]
if isheader(elem):
# clean up trailing #s
elem.text = elem.text.rstrip('#').rstrip()
else:
# inline: check for `attrs` at start of tail
if elem.tail:
m = self.INLINE_RE.match(elem.tail)
if m:
remainder = self.assign_attrs(elem, m.group(1))
elem.tail = elem.tail[m.end():] + remainder
def assign_attrs(self, elem: Element, attrs_string: str, *, strict: bool = False) -> str:
""" Assign `attrs` to element.
If the `attrs_string` has an extra closing curly brace, the remaining text is returned.
The `strict` argument controls whether to still assign `attrs` if there is a remaining `}`.
"""
attrs, remainder = get_attrs_and_remainder(attrs_string)
if strict and remainder:
return remainder
for k, v in attrs:
if k == '.':
# add to class
cls = elem.get('class')
if cls:
elem.set('class', '{} {}'.format(cls, v))
else:
elem.set('class', v)
else:
# assign attribute `k` with `v`
elem.set(self.sanitize_name(k), v)
# The text that we initially over-matched will be put back.
return remainder
def sanitize_name(self, name: str) -> str:
"""
Sanitize name as 'an XML Name, minus the `:`.'
See <https://www.w3.org/TR/REC-xml-names/#NT-NCName>.
"""
return self.NAME_RE.sub('_', name)
class AttrListExtension(Extension):
""" Attribute List extension for Python-Markdown """
def extendMarkdown(self, md):
md.treeprocessors.register(AttrListTreeprocessor(md), 'attr_list', 8)
md.registerExtension(self)
def makeExtension(**kwargs): # pragma: no cover
return AttrListExtension(**kwargs)
|