|
import re |
|
from .core import BlockState |
|
from .util import ( |
|
strip_end, |
|
expand_tab, |
|
expand_leading_tab, |
|
) |
|
|
|
|
|
LIST_PATTERN = ( |
|
r'^(?P<list_1> {0,3})' |
|
r'(?P<list_2>[\*\+-]|\d{1,9}[.)])' |
|
r'(?P<list_3>[ \t]*|[ \t].+)$' |
|
) |
|
|
|
_LINE_HAS_TEXT = re.compile(r'( *)\S') |
|
|
|
|
|
def parse_list(block, m: re.Match, state: BlockState) -> int: |
|
"""Parse tokens for ordered and unordered list.""" |
|
text = m.group('list_3') |
|
if not text.strip(): |
|
|
|
|
|
end_pos = state.append_paragraph() |
|
if end_pos: |
|
return end_pos |
|
|
|
marker = m.group('list_2') |
|
ordered = len(marker) > 1 |
|
depth = state.depth() |
|
token = { |
|
'type': 'list', |
|
'children': [], |
|
'tight': True, |
|
'bullet': marker[-1], |
|
'attrs': { |
|
'depth': depth, |
|
'ordered': ordered, |
|
}, |
|
} |
|
if ordered: |
|
start = int(marker[:-1]) |
|
if start != 1: |
|
|
|
|
|
end_pos = state.append_paragraph() |
|
if end_pos: |
|
return end_pos |
|
token['attrs']['start'] = start |
|
|
|
state.cursor = m.end() + 1 |
|
groups = (m.group('list_1'), marker, text) |
|
|
|
if depth >= block.max_nested_level - 1: |
|
rules = list(block.list_rules) |
|
rules.remove('list') |
|
else: |
|
rules = block.list_rules |
|
|
|
bullet = _get_list_bullet(marker[-1]) |
|
while groups: |
|
groups = _parse_list_item(block, bullet, groups, token, state, rules) |
|
|
|
end_pos = token.pop('_end_pos', None) |
|
_transform_tight_list(token) |
|
if end_pos: |
|
index = token.pop('_tok_index') |
|
state.tokens.insert(index, token) |
|
return end_pos |
|
|
|
state.append_token(token) |
|
return state.cursor |
|
|
|
|
|
def _transform_tight_list(token): |
|
if token['tight']: |
|
|
|
for list_item in token['children']: |
|
for tok in list_item['children']: |
|
if tok['type'] == 'paragraph': |
|
tok['type'] = 'block_text' |
|
elif tok['type'] == 'list': |
|
_transform_tight_list(tok) |
|
|
|
|
|
def _parse_list_item(block, bullet, groups, token, state, rules): |
|
spaces, marker, text = groups |
|
|
|
leading_width = len(spaces) + len(marker) |
|
text, continue_width = _compile_continue_width(text, leading_width) |
|
item_pattern = _compile_list_item_pattern(bullet, leading_width) |
|
pairs = [ |
|
('thematic_break', block.specification['thematic_break']), |
|
('fenced_code', block.specification['fenced_code']), |
|
('axt_heading', block.specification['axt_heading']), |
|
('block_quote', block.specification['block_quote']), |
|
('block_html', block.specification['block_html']), |
|
('list', block.specification['list']), |
|
] |
|
if leading_width < 3: |
|
_repl_w = str(leading_width) |
|
pairs = [(n, p.replace('3', _repl_w, 1)) for n, p in pairs] |
|
|
|
pairs.insert(1, ('list_item', item_pattern)) |
|
regex = '|'.join(r'(?P<%s>(?<=\n)%s)' % pair for pair in pairs) |
|
sc = re.compile(regex, re.M) |
|
|
|
src = '' |
|
next_group = None |
|
prev_blank_line = False |
|
pos = state.cursor |
|
|
|
continue_space = ' ' * continue_width |
|
while pos < state.cursor_max: |
|
pos = state.find_line_end() |
|
line = state.get_text(pos) |
|
if block.BLANK_LINE.match(line): |
|
src += '\n' |
|
prev_blank_line = True |
|
state.cursor = pos |
|
continue |
|
|
|
line = expand_leading_tab(line) |
|
if line.startswith(continue_space): |
|
if prev_blank_line and not text and not src.strip(): |
|
|
|
|
|
break |
|
|
|
src += line |
|
prev_blank_line = False |
|
state.cursor = pos |
|
continue |
|
|
|
m = sc.match(state.src, state.cursor) |
|
if m: |
|
tok_type = m.lastgroup |
|
if tok_type == 'list_item': |
|
if prev_blank_line: |
|
token['tight'] = False |
|
next_group = ( |
|
m.group('listitem_1'), |
|
m.group('listitem_2'), |
|
m.group('listitem_3') |
|
) |
|
state.cursor = m.end() + 1 |
|
break |
|
|
|
if tok_type == 'list': |
|
break |
|
|
|
tok_index = len(state.tokens) |
|
end_pos = block.parse_method(m, state) |
|
if end_pos: |
|
token['_tok_index'] = tok_index |
|
token['_end_pos'] = end_pos |
|
break |
|
|
|
if prev_blank_line and not line.startswith(continue_space): |
|
|
|
break |
|
|
|
src += line |
|
state.cursor = pos |
|
|
|
text += _clean_list_item_text(src, continue_width) |
|
child = state.child_state(strip_end(text)) |
|
|
|
block.parse(child, rules) |
|
|
|
if token['tight'] and _is_loose_list(child.tokens): |
|
token['tight'] = False |
|
|
|
token['children'].append({ |
|
'type': 'list_item', |
|
'children': child.tokens, |
|
}) |
|
if next_group: |
|
return next_group |
|
|
|
|
|
def _get_list_bullet(c): |
|
if c == '.': |
|
bullet = r'\d{0,9}\.' |
|
elif c == ')': |
|
bullet = r'\d{0,9}\)' |
|
elif c == '*': |
|
bullet = r'\*' |
|
elif c == '+': |
|
bullet = r'\+' |
|
else: |
|
bullet = '-' |
|
return bullet |
|
|
|
|
|
def _compile_list_item_pattern(bullet, leading_width): |
|
if leading_width > 3: |
|
leading_width = 3 |
|
return ( |
|
r'^(?P<listitem_1> {0,' + str(leading_width) + '})' |
|
r'(?P<listitem_2>' + bullet + ')' |
|
r'(?P<listitem_3>[ \t]*|[ \t][^\n]+)$' |
|
) |
|
|
|
|
|
def _compile_continue_width(text, leading_width): |
|
text = expand_leading_tab(text, 3) |
|
text = expand_tab(text) |
|
|
|
m2 = _LINE_HAS_TEXT.match(text) |
|
if m2: |
|
|
|
if text.startswith(' '): |
|
space_width = 1 |
|
else: |
|
space_width = len(m2.group(1)) |
|
|
|
text = text[space_width:] + '\n' |
|
else: |
|
space_width = 1 |
|
text = '' |
|
|
|
continue_width = leading_width + space_width |
|
return text, continue_width |
|
|
|
|
|
def _clean_list_item_text(src, continue_width): |
|
|
|
rv = [] |
|
trim_space = ' ' * continue_width |
|
lines = src.split('\n') |
|
for line in lines: |
|
if line.startswith(trim_space): |
|
line = line.replace(trim_space, '', 1) |
|
|
|
|
|
line = expand_tab(line) |
|
rv.append(line) |
|
else: |
|
rv.append(line) |
|
|
|
return '\n'.join(rv) |
|
|
|
|
|
def _is_loose_list(tokens): |
|
paragraph_count = 0 |
|
for tok in tokens: |
|
if tok['type'] == 'blank_line': |
|
return True |
|
if tok['type'] == 'paragraph': |
|
paragraph_count += 1 |
|
if paragraph_count > 1: |
|
return True |
|
|