|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import createDebug from 'debug' |
|
import {markdownLineEnding} from 'micromark-util-character' |
|
import {push, splice} from 'micromark-util-chunked' |
|
import {resolveAll} from 'micromark-util-resolve-all' |
|
import {codes} from 'micromark-util-symbol/codes.js' |
|
import {values} from 'micromark-util-symbol/values.js' |
|
import {ok as assert} from 'uvu/assert' |
|
|
|
const debug = createDebug('micromark') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export function createTokenizer(parser, initialize, from) { |
|
|
|
let point = Object.assign( |
|
from ? Object.assign({}, from) : {line: 1, column: 1, offset: 0}, |
|
{_index: 0, _bufferIndex: -1} |
|
) |
|
|
|
const columnStart = {} |
|
|
|
const resolveAllConstructs = [] |
|
|
|
let chunks = [] |
|
|
|
let stack = [] |
|
|
|
let consumed = true |
|
|
|
|
|
|
|
|
|
|
|
|
|
const effects = { |
|
consume, |
|
enter, |
|
exit, |
|
attempt: constructFactory(onsuccessfulconstruct), |
|
check: constructFactory(onsuccessfulcheck), |
|
interrupt: constructFactory(onsuccessfulcheck, {interrupt: true}) |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
const context = { |
|
previous: codes.eof, |
|
code: codes.eof, |
|
containerState: {}, |
|
events: [], |
|
parser, |
|
sliceStream, |
|
sliceSerialize, |
|
now, |
|
defineSkip, |
|
write |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
let state = initialize.tokenize.call(context, effects) |
|
|
|
|
|
|
|
|
|
|
|
|
|
let expectedCode |
|
|
|
if (initialize.resolveAll) { |
|
resolveAllConstructs.push(initialize) |
|
} |
|
|
|
return context |
|
|
|
|
|
function write(slice) { |
|
chunks = push(chunks, slice) |
|
|
|
main() |
|
|
|
|
|
if (chunks[chunks.length - 1] !== codes.eof) { |
|
return [] |
|
} |
|
|
|
addResult(initialize, 0) |
|
|
|
|
|
context.events = resolveAll(resolveAllConstructs, context.events, context) |
|
|
|
return context.events |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
function sliceSerialize(token, expandTabs) { |
|
return serializeChunks(sliceStream(token), expandTabs) |
|
} |
|
|
|
|
|
function sliceStream(token) { |
|
return sliceChunks(chunks, token) |
|
} |
|
|
|
|
|
function now() { |
|
|
|
const {line, column, offset, _index, _bufferIndex} = point |
|
return {line, column, offset, _index, _bufferIndex} |
|
} |
|
|
|
|
|
function defineSkip(value) { |
|
columnStart[value.line] = value.column |
|
accountForPotentialSkip() |
|
debug('position: define skip: `%j`', point) |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function main() { |
|
|
|
let chunkIndex |
|
|
|
while (point._index < chunks.length) { |
|
const chunk = chunks[point._index] |
|
|
|
|
|
if (typeof chunk === 'string') { |
|
chunkIndex = point._index |
|
|
|
if (point._bufferIndex < 0) { |
|
point._bufferIndex = 0 |
|
} |
|
|
|
while ( |
|
point._index === chunkIndex && |
|
point._bufferIndex < chunk.length |
|
) { |
|
go(chunk.charCodeAt(point._bufferIndex)) |
|
} |
|
} else { |
|
go(chunk) |
|
} |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function go(code) { |
|
assert(consumed === true, 'expected character to be consumed') |
|
consumed = undefined |
|
debug('main: passing `%s` to %s', code, state && state.name) |
|
expectedCode = code |
|
assert(typeof state === 'function', 'expected state') |
|
state = state(code) |
|
} |
|
|
|
|
|
function consume(code) { |
|
assert(code === expectedCode, 'expected given code to equal expected code') |
|
|
|
debug('consume: `%s`', code) |
|
|
|
assert( |
|
consumed === undefined, |
|
'expected code to not have been consumed: this might be because `return x(code)` instead of `return x` was used' |
|
) |
|
assert( |
|
code === null |
|
? context.events.length === 0 || |
|
context.events[context.events.length - 1][0] === 'exit' |
|
: context.events[context.events.length - 1][0] === 'enter', |
|
'expected last token to be open' |
|
) |
|
|
|
if (markdownLineEnding(code)) { |
|
point.line++ |
|
point.column = 1 |
|
point.offset += code === codes.carriageReturnLineFeed ? 2 : 1 |
|
accountForPotentialSkip() |
|
debug('position: after eol: `%j`', point) |
|
} else if (code !== codes.virtualSpace) { |
|
point.column++ |
|
point.offset++ |
|
} |
|
|
|
|
|
if (point._bufferIndex < 0) { |
|
point._index++ |
|
} else { |
|
point._bufferIndex++ |
|
|
|
|
|
|
|
|
|
if (point._bufferIndex === chunks[point._index].length) { |
|
point._bufferIndex = -1 |
|
point._index++ |
|
} |
|
} |
|
|
|
|
|
context.previous = code |
|
|
|
|
|
consumed = true |
|
} |
|
|
|
|
|
function enter(type, fields) { |
|
|
|
|
|
const token = fields || {} |
|
token.type = type |
|
token.start = now() |
|
|
|
assert(typeof type === 'string', 'expected string type') |
|
assert(type.length > 0, 'expected non-empty string') |
|
debug('enter: `%s`', type) |
|
|
|
context.events.push(['enter', token, context]) |
|
|
|
stack.push(token) |
|
|
|
return token |
|
} |
|
|
|
|
|
function exit(type) { |
|
assert(typeof type === 'string', 'expected string type') |
|
assert(type.length > 0, 'expected non-empty string') |
|
|
|
const token = stack.pop() |
|
assert(token, 'cannot close w/o open tokens') |
|
token.end = now() |
|
|
|
assert(type === token.type, 'expected exit token to match current token') |
|
|
|
assert( |
|
!( |
|
token.start._index === token.end._index && |
|
token.start._bufferIndex === token.end._bufferIndex |
|
), |
|
'expected non-empty token (`' + type + '`)' |
|
) |
|
|
|
debug('exit: `%s`', token.type) |
|
context.events.push(['exit', token, context]) |
|
|
|
return token |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
function onsuccessfulconstruct(construct, info) { |
|
addResult(construct, info.from) |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
function onsuccessfulcheck(_, info) { |
|
info.restore() |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function constructFactory(onreturn, fields) { |
|
return hook |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function hook(constructs, returnState, bogusState) { |
|
|
|
let listOfConstructs |
|
|
|
let constructIndex |
|
|
|
let currentConstruct |
|
|
|
let info |
|
|
|
return Array.isArray(constructs) |
|
? |
|
handleListOfConstructs(constructs) |
|
: 'tokenize' in constructs |
|
? |
|
handleListOfConstructs([constructs]) |
|
: handleMapOfConstructs(constructs) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function handleMapOfConstructs(map) { |
|
return start |
|
|
|
|
|
function start(code) { |
|
const def = code !== null && map[code] |
|
const all = code !== null && map.null |
|
const list = [ |
|
|
|
|
|
...(Array.isArray(def) ? def : def ? [def] : []), |
|
...(Array.isArray(all) ? all : all ? [all] : []) |
|
] |
|
|
|
return handleListOfConstructs(list)(code) |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function handleListOfConstructs(list) { |
|
listOfConstructs = list |
|
constructIndex = 0 |
|
|
|
if (list.length === 0) { |
|
assert(bogusState, 'expected `bogusState` to be given') |
|
return bogusState |
|
} |
|
|
|
return handleConstruct(list[constructIndex]) |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function handleConstruct(construct) { |
|
return start |
|
|
|
|
|
function start(code) { |
|
|
|
|
|
|
|
|
|
info = store() |
|
currentConstruct = construct |
|
|
|
if (!construct.partial) { |
|
context.currentConstruct = construct |
|
} |
|
|
|
|
|
assert( |
|
context.parser.constructs.disable.null, |
|
'expected `disable.null` to be populated' |
|
) |
|
|
|
if ( |
|
construct.name && |
|
context.parser.constructs.disable.null.includes(construct.name) |
|
) { |
|
return nok(code) |
|
} |
|
|
|
return construct.tokenize.call( |
|
|
|
|
|
|
|
fields ? Object.assign(Object.create(context), fields) : context, |
|
effects, |
|
ok, |
|
nok |
|
)(code) |
|
} |
|
} |
|
|
|
|
|
function ok(code) { |
|
assert(code === expectedCode, 'expected code') |
|
consumed = true |
|
onreturn(currentConstruct, info) |
|
return returnState |
|
} |
|
|
|
|
|
function nok(code) { |
|
assert(code === expectedCode, 'expected code') |
|
consumed = true |
|
info.restore() |
|
|
|
if (++constructIndex < listOfConstructs.length) { |
|
return handleConstruct(listOfConstructs[constructIndex]) |
|
} |
|
|
|
return bogusState |
|
} |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
function addResult(construct, from) { |
|
if (construct.resolveAll && !resolveAllConstructs.includes(construct)) { |
|
resolveAllConstructs.push(construct) |
|
} |
|
|
|
if (construct.resolve) { |
|
splice( |
|
context.events, |
|
from, |
|
context.events.length - from, |
|
construct.resolve(context.events.slice(from), context) |
|
) |
|
} |
|
|
|
if (construct.resolveTo) { |
|
context.events = construct.resolveTo(context.events, context) |
|
} |
|
|
|
assert( |
|
construct.partial || |
|
context.events.length === 0 || |
|
context.events[context.events.length - 1][0] === 'exit', |
|
'expected last token to end' |
|
) |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
function store() { |
|
const startPoint = now() |
|
const startPrevious = context.previous |
|
const startCurrentConstruct = context.currentConstruct |
|
const startEventsIndex = context.events.length |
|
const startStack = Array.from(stack) |
|
|
|
return {restore, from: startEventsIndex} |
|
|
|
|
|
|
|
|
|
|
|
|
|
function restore() { |
|
point = startPoint |
|
context.previous = startPrevious |
|
context.currentConstruct = startCurrentConstruct |
|
context.events.length = startEventsIndex |
|
stack = startStack |
|
accountForPotentialSkip() |
|
debug('position: restore: `%j`', point) |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function accountForPotentialSkip() { |
|
if (point.line in columnStart && point.column < 2) { |
|
point.column = columnStart[point.line] |
|
point.offset += columnStart[point.line] - 1 |
|
} |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function sliceChunks(chunks, token) { |
|
const startIndex = token.start._index |
|
const startBufferIndex = token.start._bufferIndex |
|
const endIndex = token.end._index |
|
const endBufferIndex = token.end._bufferIndex |
|
|
|
let view |
|
|
|
if (startIndex === endIndex) { |
|
assert(endBufferIndex > -1, 'expected non-negative end buffer index') |
|
assert(startBufferIndex > -1, 'expected non-negative start buffer index') |
|
|
|
view = [chunks[startIndex].slice(startBufferIndex, endBufferIndex)] |
|
} else { |
|
view = chunks.slice(startIndex, endIndex) |
|
|
|
if (startBufferIndex > -1) { |
|
const head = view[0] |
|
if (typeof head === 'string') { |
|
view[0] = head.slice(startBufferIndex) |
|
} else { |
|
assert(startBufferIndex === 0, 'expected `startBufferIndex` to be `0`') |
|
view.shift() |
|
} |
|
} |
|
|
|
if (endBufferIndex > 0) { |
|
|
|
view.push(chunks[endIndex].slice(0, endBufferIndex)) |
|
} |
|
} |
|
|
|
return view |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function serializeChunks(chunks, expandTabs) { |
|
let index = -1 |
|
|
|
const result = [] |
|
|
|
let atTab |
|
|
|
while (++index < chunks.length) { |
|
const chunk = chunks[index] |
|
|
|
let value |
|
|
|
if (typeof chunk === 'string') { |
|
value = chunk |
|
} else |
|
switch (chunk) { |
|
case codes.carriageReturn: { |
|
value = values.cr |
|
|
|
break |
|
} |
|
|
|
case codes.lineFeed: { |
|
value = values.lf |
|
|
|
break |
|
} |
|
|
|
case codes.carriageReturnLineFeed: { |
|
value = values.cr + values.lf |
|
|
|
break |
|
} |
|
|
|
case codes.horizontalTab: { |
|
value = expandTabs ? values.space : values.ht |
|
|
|
break |
|
} |
|
|
|
case codes.virtualSpace: { |
|
if (!expandTabs && atTab) continue |
|
value = values.space |
|
|
|
break |
|
} |
|
|
|
default: { |
|
assert(typeof chunk === 'number', 'expected number') |
|
|
|
value = String.fromCharCode(chunk) |
|
} |
|
} |
|
|
|
atTab = chunk === codes.horizontalTab |
|
result.push(value) |
|
} |
|
|
|
return result.join('') |
|
} |
|
|