File size: 7,820 Bytes
122d3ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# Attribute List Extension for Python-Markdown
# ============================================

# Adds attribute list syntax. Inspired by
# [Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s
# feature of the same name.

# See https://Python-Markdown.github.io/extensions/attr_list
# for documentation.

# Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/).

# All changes Copyright 2011-2014 The Python Markdown Project

# License: [BSD](https://opensource.org/licenses/bsd-license.php)

"""
 Adds attribute list syntax. Inspired by
[Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s
feature of the same name.

See the [documentation](https://Python-Markdown.github.io/extensions/attr_list)
for details.
"""

from __future__ import annotations
from typing import TYPE_CHECKING

from . import Extension
from ..treeprocessors import Treeprocessor
import re

if TYPE_CHECKING:  # pragma: no cover
    from xml.etree.ElementTree import Element


def _handle_double_quote(s, t):
    k, v = t.split('=', 1)
    return k, v.strip('"')


def _handle_single_quote(s, t):
    k, v = t.split('=', 1)
    return k, v.strip("'")


def _handle_key_value(s, t):
    return t.split('=', 1)


def _handle_word(s, t):
    if t.startswith('.'):
        return '.', t[1:]
    if t.startswith('#'):
        return 'id', t[1:]
    return t, t


_scanner = re.Scanner([
    (r'[^ =}]+=".*?"', _handle_double_quote),
    (r"[^ =}]+='.*?'", _handle_single_quote),
    (r'[^ =}]+=[^ =}]+', _handle_key_value),
    (r'[^ =}]+', _handle_word),
    (r' ', None)
])


def get_attrs_and_remainder(attrs_string: str) -> tuple[list[tuple[str, str]], str]:
    """ Parse attribute list and return a list of attribute tuples.

    Additionally, return any text that remained after a curly brace. In typical cases, its presence
    should mean that the input does not match the intended attribute list syntax.
    """
    attrs, remainder = _scanner.scan(attrs_string)
    # To keep historic behavior, discard all unparsable text prior to '}'.
    index = remainder.find('}')
    remainder = remainder[index:] if index != -1 else ''
    return attrs, remainder


def get_attrs(str: str) -> list[tuple[str, str]]:  # pragma: no cover
    """ Soft-deprecated. Prefer `get_attrs_and_remainder`. """
    return get_attrs_and_remainder(str)[0]


def isheader(elem: Element) -> bool:
    return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']


class AttrListTreeprocessor(Treeprocessor):

    BASE_RE = r'\{\:?[ ]*([^\}\n ][^\n]*)[ ]*\}'
    HEADER_RE = re.compile(r'[ ]+{}[ ]*$'.format(BASE_RE))
    BLOCK_RE = re.compile(r'\n[ ]*{}[ ]*$'.format(BASE_RE))
    INLINE_RE = re.compile(r'^{}'.format(BASE_RE))
    NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff'
                         r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d'
                         r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff'
                         r'\uf900-\ufdcf\ufdf0-\ufffd'
                         r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+')

    def run(self, doc: Element) -> None:
        for elem in doc.iter():
            if self.md.is_block_level(elem.tag):
                # Block level: check for `attrs` on last line of text
                RE = self.BLOCK_RE
                if isheader(elem) or elem.tag in ['dt', 'td', 'th']:
                    # header, def-term, or table cell: check for attributes at end of element
                    RE = self.HEADER_RE
                if len(elem) and elem.tag == 'li':
                    # special case list items. children may include a `ul` or `ol`.
                    pos = None
                    # find the `ul` or `ol` position
                    for i, child in enumerate(elem):
                        if child.tag in ['ul', 'ol']:
                            pos = i
                            break
                    if pos is None and elem[-1].tail:
                        # use tail of last child. no `ul` or `ol`.
                        m = RE.search(elem[-1].tail)
                        if m:
                            if not self.assign_attrs(elem, m.group(1), strict=True):
                                elem[-1].tail = elem[-1].tail[:m.start()]
                    elif pos is not None and pos > 0 and elem[pos-1].tail:
                        # use tail of last child before `ul` or `ol`
                        m = RE.search(elem[pos-1].tail)
                        if m:
                            if not self.assign_attrs(elem, m.group(1), strict=True):
                                elem[pos-1].tail = elem[pos-1].tail[:m.start()]
                    elif elem.text:
                        # use text. `ul` is first child.
                        m = RE.search(elem.text)
                        if m:
                            if not self.assign_attrs(elem, m.group(1), strict=True):
                                elem.text = elem.text[:m.start()]
                elif len(elem) and elem[-1].tail:
                    # has children. Get from tail of last child
                    m = RE.search(elem[-1].tail)
                    if m:
                        if not self.assign_attrs(elem, m.group(1), strict=True):
                            elem[-1].tail = elem[-1].tail[:m.start()]
                            if isheader(elem):
                                # clean up trailing #s
                                elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
                elif elem.text:
                    # no children. Get from text.
                    m = RE.search(elem.text)
                    if m:
                        if not self.assign_attrs(elem, m.group(1), strict=True):
                            elem.text = elem.text[:m.start()]
                            if isheader(elem):
                                # clean up trailing #s
                                elem.text = elem.text.rstrip('#').rstrip()
            else:
                # inline: check for `attrs` at start of tail
                if elem.tail:
                    m = self.INLINE_RE.match(elem.tail)
                    if m:
                        remainder = self.assign_attrs(elem, m.group(1))
                        elem.tail = elem.tail[m.end():] + remainder

    def assign_attrs(self, elem: Element, attrs_string: str, *, strict: bool = False) -> str:
        """ Assign `attrs` to element.

        If the `attrs_string` has an extra closing curly brace, the remaining text is returned.

        The `strict` argument controls whether to still assign `attrs` if there is a remaining `}`.
        """
        attrs, remainder = get_attrs_and_remainder(attrs_string)
        if strict and remainder:
            return remainder

        for k, v in attrs:
            if k == '.':
                # add to class
                cls = elem.get('class')
                if cls:
                    elem.set('class', '{} {}'.format(cls, v))
                else:
                    elem.set('class', v)
            else:
                # assign attribute `k` with `v`
                elem.set(self.sanitize_name(k), v)
        # The text that we initially over-matched will be put back.
        return remainder

    def sanitize_name(self, name: str) -> str:
        """
        Sanitize name as 'an XML Name, minus the `:`.'
        See <https://www.w3.org/TR/REC-xml-names/#NT-NCName>.
        """
        return self.NAME_RE.sub('_', name)


class AttrListExtension(Extension):
    """ Attribute List extension for Python-Markdown """
    def extendMarkdown(self, md):
        md.treeprocessors.register(AttrListTreeprocessor(md), 'attr_list', 8)
        md.registerExtension(self)


def makeExtension(**kwargs):  # pragma: no cover
    return AttrListExtension(**kwargs)