File size: 7,171 Bytes
d1ceb73 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 |
import re
from .core import BlockState
from .util import (
strip_end,
expand_tab,
expand_leading_tab,
)
# because list is complex, split list parser in a new file
LIST_PATTERN = (
r'^(?P<list_1> {0,3})'
r'(?P<list_2>[\*\+-]|\d{1,9}[.)])'
r'(?P<list_3>[ \t]*|[ \t].+)$'
)
_LINE_HAS_TEXT = re.compile(r'( *)\S')
def parse_list(block, m: re.Match, state: BlockState) -> int:
"""Parse tokens for ordered and unordered list."""
text = m.group('list_3')
if not text.strip():
# Example 285
# an empty list item cannot interrupt a paragraph
end_pos = state.append_paragraph()
if end_pos:
return end_pos
marker = m.group('list_2')
ordered = len(marker) > 1
depth = state.depth()
token = {
'type': 'list',
'children': [],
'tight': True,
'bullet': marker[-1],
'attrs': {
'depth': depth,
'ordered': ordered,
},
}
if ordered:
start = int(marker[:-1])
if start != 1:
# Example 304
# we allow only lists starting with 1 to interrupt paragraphs
end_pos = state.append_paragraph()
if end_pos:
return end_pos
token['attrs']['start'] = start
state.cursor = m.end() + 1
groups = (m.group('list_1'), marker, text)
if depth >= block.max_nested_level - 1:
rules = list(block.list_rules)
rules.remove('list')
else:
rules = block.list_rules
bullet = _get_list_bullet(marker[-1])
while groups:
groups = _parse_list_item(block, bullet, groups, token, state, rules)
end_pos = token.pop('_end_pos', None)
_transform_tight_list(token)
if end_pos:
index = token.pop('_tok_index')
state.tokens.insert(index, token)
return end_pos
state.append_token(token)
return state.cursor
def _transform_tight_list(token):
if token['tight']:
# reset tight list item
for list_item in token['children']:
for tok in list_item['children']:
if tok['type'] == 'paragraph':
tok['type'] = 'block_text'
elif tok['type'] == 'list':
_transform_tight_list(tok)
def _parse_list_item(block, bullet, groups, token, state, rules):
spaces, marker, text = groups
leading_width = len(spaces) + len(marker)
text, continue_width = _compile_continue_width(text, leading_width)
item_pattern = _compile_list_item_pattern(bullet, leading_width)
pairs = [
('thematic_break', block.specification['thematic_break']),
('fenced_code', block.specification['fenced_code']),
('axt_heading', block.specification['axt_heading']),
('block_quote', block.specification['block_quote']),
('block_html', block.specification['block_html']),
('list', block.specification['list']),
]
if leading_width < 3:
_repl_w = str(leading_width)
pairs = [(n, p.replace('3', _repl_w, 1)) for n, p in pairs]
pairs.insert(1, ('list_item', item_pattern))
regex = '|'.join(r'(?P<%s>(?<=\n)%s)' % pair for pair in pairs)
sc = re.compile(regex, re.M)
src = ''
next_group = None
prev_blank_line = False
pos = state.cursor
continue_space = ' ' * continue_width
while pos < state.cursor_max:
pos = state.find_line_end()
line = state.get_text(pos)
if block.BLANK_LINE.match(line):
src += '\n'
prev_blank_line = True
state.cursor = pos
continue
line = expand_leading_tab(line)
if line.startswith(continue_space):
if prev_blank_line and not text and not src.strip():
# Example 280
# A list item can begin with at most one blank line
break
src += line
prev_blank_line = False
state.cursor = pos
continue
m = sc.match(state.src, state.cursor)
if m:
tok_type = m.lastgroup
if tok_type == 'list_item':
if prev_blank_line:
token['tight'] = False
next_group = (
m.group('listitem_1'),
m.group('listitem_2'),
m.group('listitem_3')
)
state.cursor = m.end() + 1
break
if tok_type == 'list':
break
tok_index = len(state.tokens)
end_pos = block.parse_method(m, state)
if end_pos:
token['_tok_index'] = tok_index
token['_end_pos'] = end_pos
break
if prev_blank_line and not line.startswith(continue_space):
# not a continue line, and previous line is blank
break
src += line
state.cursor = pos
text += _clean_list_item_text(src, continue_width)
child = state.child_state(strip_end(text))
block.parse(child, rules)
if token['tight'] and _is_loose_list(child.tokens):
token['tight'] = False
token['children'].append({
'type': 'list_item',
'children': child.tokens,
})
if next_group:
return next_group
def _get_list_bullet(c):
if c == '.':
bullet = r'\d{0,9}\.'
elif c == ')':
bullet = r'\d{0,9}\)'
elif c == '*':
bullet = r'\*'
elif c == '+':
bullet = r'\+'
else:
bullet = '-'
return bullet
def _compile_list_item_pattern(bullet, leading_width):
if leading_width > 3:
leading_width = 3
return (
r'^(?P<listitem_1> {0,' + str(leading_width) + '})'
r'(?P<listitem_2>' + bullet + ')'
r'(?P<listitem_3>[ \t]*|[ \t][^\n]+)$'
)
def _compile_continue_width(text, leading_width):
text = expand_leading_tab(text, 3)
text = expand_tab(text)
m2 = _LINE_HAS_TEXT.match(text)
if m2:
# indent code, startswith 5 spaces
if text.startswith(' '):
space_width = 1
else:
space_width = len(m2.group(1))
text = text[space_width:] + '\n'
else:
space_width = 1
text = ''
continue_width = leading_width + space_width
return text, continue_width
def _clean_list_item_text(src, continue_width):
# according to Example 7, tab should be treated as 3 spaces
rv = []
trim_space = ' ' * continue_width
lines = src.split('\n')
for line in lines:
if line.startswith(trim_space):
line = line.replace(trim_space, '', 1)
# according to CommonMark Example 5
# tab should be treated as 4 spaces
line = expand_tab(line)
rv.append(line)
else:
rv.append(line)
return '\n'.join(rv)
def _is_loose_list(tokens):
paragraph_count = 0
for tok in tokens:
if tok['type'] == 'blank_line':
return True
if tok['type'] == 'paragraph':
paragraph_count += 1
if paragraph_count > 1:
return True
|