|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import {decodeNamedCharacterReference} from 'decode-named-character-reference' |
|
import { |
|
asciiAlphanumeric, |
|
asciiDigit, |
|
asciiHexDigit |
|
} from 'micromark-util-character' |
|
import {codes} from 'micromark-util-symbol/codes.js' |
|
import {constants} from 'micromark-util-symbol/constants.js' |
|
import {types} from 'micromark-util-symbol/types.js' |
|
import {ok as assert} from 'uvu/assert' |
|
|
|
|
|
export const characterReference = { |
|
name: 'characterReference', |
|
tokenize: tokenizeCharacterReference |
|
} |
|
|
|
|
|
|
|
|
|
|
|
function tokenizeCharacterReference(effects, ok, nok) { |
|
const self = this |
|
let size = 0 |
|
|
|
let max |
|
|
|
let test |
|
|
|
return start |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function start(code) { |
|
assert(code === codes.ampersand, 'expected `&`') |
|
effects.enter(types.characterReference) |
|
effects.enter(types.characterReferenceMarker) |
|
effects.consume(code) |
|
effects.exit(types.characterReferenceMarker) |
|
return open |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function open(code) { |
|
if (code === codes.numberSign) { |
|
effects.enter(types.characterReferenceMarkerNumeric) |
|
effects.consume(code) |
|
effects.exit(types.characterReferenceMarkerNumeric) |
|
return numeric |
|
} |
|
|
|
effects.enter(types.characterReferenceValue) |
|
max = constants.characterReferenceNamedSizeMax |
|
test = asciiAlphanumeric |
|
return value(code) |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function numeric(code) { |
|
if (code === codes.uppercaseX || code === codes.lowercaseX) { |
|
effects.enter(types.characterReferenceMarkerHexadecimal) |
|
effects.consume(code) |
|
effects.exit(types.characterReferenceMarkerHexadecimal) |
|
effects.enter(types.characterReferenceValue) |
|
max = constants.characterReferenceHexadecimalSizeMax |
|
test = asciiHexDigit |
|
return value |
|
} |
|
|
|
effects.enter(types.characterReferenceValue) |
|
max = constants.characterReferenceDecimalSizeMax |
|
test = asciiDigit |
|
return value(code) |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function value(code) { |
|
if (code === codes.semicolon && size) { |
|
const token = effects.exit(types.characterReferenceValue) |
|
|
|
if ( |
|
test === asciiAlphanumeric && |
|
!decodeNamedCharacterReference(self.sliceSerialize(token)) |
|
) { |
|
return nok(code) |
|
} |
|
|
|
|
|
|
|
effects.enter(types.characterReferenceMarker) |
|
effects.consume(code) |
|
effects.exit(types.characterReferenceMarker) |
|
effects.exit(types.characterReference) |
|
return ok |
|
} |
|
|
|
if (test(code) && size++ < max) { |
|
effects.consume(code) |
|
return value |
|
} |
|
|
|
return nok(code) |
|
} |
|
} |
|
|