|
"use strict"; |
|
Object.defineProperty(exports, "__esModule", { value: true }); |
|
exports.QuoteType = void 0; |
|
var decode_js_1 = require("entities/lib/decode.js"); |
|
var CharCodes; |
|
(function (CharCodes) { |
|
CharCodes[CharCodes["Tab"] = 9] = "Tab"; |
|
CharCodes[CharCodes["NewLine"] = 10] = "NewLine"; |
|
CharCodes[CharCodes["FormFeed"] = 12] = "FormFeed"; |
|
CharCodes[CharCodes["CarriageReturn"] = 13] = "CarriageReturn"; |
|
CharCodes[CharCodes["Space"] = 32] = "Space"; |
|
CharCodes[CharCodes["ExclamationMark"] = 33] = "ExclamationMark"; |
|
CharCodes[CharCodes["Number"] = 35] = "Number"; |
|
CharCodes[CharCodes["Amp"] = 38] = "Amp"; |
|
CharCodes[CharCodes["SingleQuote"] = 39] = "SingleQuote"; |
|
CharCodes[CharCodes["DoubleQuote"] = 34] = "DoubleQuote"; |
|
CharCodes[CharCodes["Dash"] = 45] = "Dash"; |
|
CharCodes[CharCodes["Slash"] = 47] = "Slash"; |
|
CharCodes[CharCodes["Zero"] = 48] = "Zero"; |
|
CharCodes[CharCodes["Nine"] = 57] = "Nine"; |
|
CharCodes[CharCodes["Semi"] = 59] = "Semi"; |
|
CharCodes[CharCodes["Lt"] = 60] = "Lt"; |
|
CharCodes[CharCodes["Eq"] = 61] = "Eq"; |
|
CharCodes[CharCodes["Gt"] = 62] = "Gt"; |
|
CharCodes[CharCodes["Questionmark"] = 63] = "Questionmark"; |
|
CharCodes[CharCodes["UpperA"] = 65] = "UpperA"; |
|
CharCodes[CharCodes["LowerA"] = 97] = "LowerA"; |
|
CharCodes[CharCodes["UpperF"] = 70] = "UpperF"; |
|
CharCodes[CharCodes["LowerF"] = 102] = "LowerF"; |
|
CharCodes[CharCodes["UpperZ"] = 90] = "UpperZ"; |
|
CharCodes[CharCodes["LowerZ"] = 122] = "LowerZ"; |
|
CharCodes[CharCodes["LowerX"] = 120] = "LowerX"; |
|
CharCodes[CharCodes["OpeningSquareBracket"] = 91] = "OpeningSquareBracket"; |
|
})(CharCodes || (CharCodes = {})); |
|
|
|
var State; |
|
(function (State) { |
|
State[State["Text"] = 1] = "Text"; |
|
State[State["BeforeTagName"] = 2] = "BeforeTagName"; |
|
State[State["InTagName"] = 3] = "InTagName"; |
|
State[State["InSelfClosingTag"] = 4] = "InSelfClosingTag"; |
|
State[State["BeforeClosingTagName"] = 5] = "BeforeClosingTagName"; |
|
State[State["InClosingTagName"] = 6] = "InClosingTagName"; |
|
State[State["AfterClosingTagName"] = 7] = "AfterClosingTagName"; |
|
|
|
State[State["BeforeAttributeName"] = 8] = "BeforeAttributeName"; |
|
State[State["InAttributeName"] = 9] = "InAttributeName"; |
|
State[State["AfterAttributeName"] = 10] = "AfterAttributeName"; |
|
State[State["BeforeAttributeValue"] = 11] = "BeforeAttributeValue"; |
|
State[State["InAttributeValueDq"] = 12] = "InAttributeValueDq"; |
|
State[State["InAttributeValueSq"] = 13] = "InAttributeValueSq"; |
|
State[State["InAttributeValueNq"] = 14] = "InAttributeValueNq"; |
|
|
|
State[State["BeforeDeclaration"] = 15] = "BeforeDeclaration"; |
|
State[State["InDeclaration"] = 16] = "InDeclaration"; |
|
|
|
State[State["InProcessingInstruction"] = 17] = "InProcessingInstruction"; |
|
|
|
State[State["BeforeComment"] = 18] = "BeforeComment"; |
|
State[State["CDATASequence"] = 19] = "CDATASequence"; |
|
State[State["InSpecialComment"] = 20] = "InSpecialComment"; |
|
State[State["InCommentLike"] = 21] = "InCommentLike"; |
|
|
|
State[State["BeforeSpecialS"] = 22] = "BeforeSpecialS"; |
|
State[State["SpecialStartSequence"] = 23] = "SpecialStartSequence"; |
|
State[State["InSpecialTag"] = 24] = "InSpecialTag"; |
|
State[State["BeforeEntity"] = 25] = "BeforeEntity"; |
|
State[State["BeforeNumericEntity"] = 26] = "BeforeNumericEntity"; |
|
State[State["InNamedEntity"] = 27] = "InNamedEntity"; |
|
State[State["InNumericEntity"] = 28] = "InNumericEntity"; |
|
State[State["InHexEntity"] = 29] = "InHexEntity"; |
|
})(State || (State = {})); |
|
function isWhitespace(c) { |
|
return (c === CharCodes.Space || |
|
c === CharCodes.NewLine || |
|
c === CharCodes.Tab || |
|
c === CharCodes.FormFeed || |
|
c === CharCodes.CarriageReturn); |
|
} |
|
function isEndOfTagSection(c) { |
|
return c === CharCodes.Slash || c === CharCodes.Gt || isWhitespace(c); |
|
} |
|
function isNumber(c) { |
|
return c >= CharCodes.Zero && c <= CharCodes.Nine; |
|
} |
|
function isASCIIAlpha(c) { |
|
return ((c >= CharCodes.LowerA && c <= CharCodes.LowerZ) || |
|
(c >= CharCodes.UpperA && c <= CharCodes.UpperZ)); |
|
} |
|
function isHexDigit(c) { |
|
return ((c >= CharCodes.UpperA && c <= CharCodes.UpperF) || |
|
(c >= CharCodes.LowerA && c <= CharCodes.LowerF)); |
|
} |
|
var QuoteType; |
|
(function (QuoteType) { |
|
QuoteType[QuoteType["NoValue"] = 0] = "NoValue"; |
|
QuoteType[QuoteType["Unquoted"] = 1] = "Unquoted"; |
|
QuoteType[QuoteType["Single"] = 2] = "Single"; |
|
QuoteType[QuoteType["Double"] = 3] = "Double"; |
|
})(QuoteType = exports.QuoteType || (exports.QuoteType = {})); |
|
|
|
|
|
|
|
|
|
|
|
|
|
var Sequences = { |
|
Cdata: new Uint8Array([0x43, 0x44, 0x41, 0x54, 0x41, 0x5b]), |
|
CdataEnd: new Uint8Array([0x5d, 0x5d, 0x3e]), |
|
CommentEnd: new Uint8Array([0x2d, 0x2d, 0x3e]), |
|
ScriptEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74]), |
|
StyleEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x74, 0x79, 0x6c, 0x65]), |
|
TitleEnd: new Uint8Array([0x3c, 0x2f, 0x74, 0x69, 0x74, 0x6c, 0x65]), |
|
}; |
|
var Tokenizer = (function () { |
|
function Tokenizer(_a, cbs) { |
|
var _b = _a.xmlMode, xmlMode = _b === void 0 ? false : _b, _c = _a.decodeEntities, decodeEntities = _c === void 0 ? true : _c; |
|
this.cbs = cbs; |
|
|
|
this.state = State.Text; |
|
|
|
this.buffer = ""; |
|
|
|
this.sectionStart = 0; |
|
|
|
this.index = 0; |
|
|
|
this.baseState = State.Text; |
|
|
|
this.isSpecial = false; |
|
|
|
this.running = true; |
|
|
|
this.offset = 0; |
|
this.currentSequence = undefined; |
|
this.sequenceIndex = 0; |
|
this.trieIndex = 0; |
|
this.trieCurrent = 0; |
|
|
|
this.entityResult = 0; |
|
this.entityExcess = 0; |
|
this.xmlMode = xmlMode; |
|
this.decodeEntities = decodeEntities; |
|
this.entityTrie = xmlMode ? decode_js_1.xmlDecodeTree : decode_js_1.htmlDecodeTree; |
|
} |
|
Tokenizer.prototype.reset = function () { |
|
this.state = State.Text; |
|
this.buffer = ""; |
|
this.sectionStart = 0; |
|
this.index = 0; |
|
this.baseState = State.Text; |
|
this.currentSequence = undefined; |
|
this.running = true; |
|
this.offset = 0; |
|
}; |
|
Tokenizer.prototype.write = function (chunk) { |
|
this.offset += this.buffer.length; |
|
this.buffer = chunk; |
|
this.parse(); |
|
}; |
|
Tokenizer.prototype.end = function () { |
|
if (this.running) |
|
this.finish(); |
|
}; |
|
Tokenizer.prototype.pause = function () { |
|
this.running = false; |
|
}; |
|
Tokenizer.prototype.resume = function () { |
|
this.running = true; |
|
if (this.index < this.buffer.length + this.offset) { |
|
this.parse(); |
|
} |
|
}; |
|
|
|
|
|
|
|
Tokenizer.prototype.getIndex = function () { |
|
return this.index; |
|
}; |
|
|
|
|
|
|
|
Tokenizer.prototype.getSectionStart = function () { |
|
return this.sectionStart; |
|
}; |
|
Tokenizer.prototype.stateText = function (c) { |
|
if (c === CharCodes.Lt || |
|
(!this.decodeEntities && this.fastForwardTo(CharCodes.Lt))) { |
|
if (this.index > this.sectionStart) { |
|
this.cbs.ontext(this.sectionStart, this.index); |
|
} |
|
this.state = State.BeforeTagName; |
|
this.sectionStart = this.index; |
|
} |
|
else if (this.decodeEntities && c === CharCodes.Amp) { |
|
this.state = State.BeforeEntity; |
|
} |
|
}; |
|
Tokenizer.prototype.stateSpecialStartSequence = function (c) { |
|
var isEnd = this.sequenceIndex === this.currentSequence.length; |
|
var isMatch = isEnd |
|
? |
|
isEndOfTagSection(c) |
|
: |
|
(c | 0x20) === this.currentSequence[this.sequenceIndex]; |
|
if (!isMatch) { |
|
this.isSpecial = false; |
|
} |
|
else if (!isEnd) { |
|
this.sequenceIndex++; |
|
return; |
|
} |
|
this.sequenceIndex = 0; |
|
this.state = State.InTagName; |
|
this.stateInTagName(c); |
|
}; |
|
|
|
Tokenizer.prototype.stateInSpecialTag = function (c) { |
|
if (this.sequenceIndex === this.currentSequence.length) { |
|
if (c === CharCodes.Gt || isWhitespace(c)) { |
|
var endOfText = this.index - this.currentSequence.length; |
|
if (this.sectionStart < endOfText) { |
|
|
|
var actualIndex = this.index; |
|
this.index = endOfText; |
|
this.cbs.ontext(this.sectionStart, endOfText); |
|
this.index = actualIndex; |
|
} |
|
this.isSpecial = false; |
|
this.sectionStart = endOfText + 2; |
|
this.stateInClosingTagName(c); |
|
return; |
|
} |
|
this.sequenceIndex = 0; |
|
} |
|
if ((c | 0x20) === this.currentSequence[this.sequenceIndex]) { |
|
this.sequenceIndex += 1; |
|
} |
|
else if (this.sequenceIndex === 0) { |
|
if (this.currentSequence === Sequences.TitleEnd) { |
|
|
|
if (this.decodeEntities && c === CharCodes.Amp) { |
|
this.state = State.BeforeEntity; |
|
} |
|
} |
|
else if (this.fastForwardTo(CharCodes.Lt)) { |
|
|
|
this.sequenceIndex = 1; |
|
} |
|
} |
|
else { |
|
|
|
this.sequenceIndex = Number(c === CharCodes.Lt); |
|
} |
|
}; |
|
Tokenizer.prototype.stateCDATASequence = function (c) { |
|
if (c === Sequences.Cdata[this.sequenceIndex]) { |
|
if (++this.sequenceIndex === Sequences.Cdata.length) { |
|
this.state = State.InCommentLike; |
|
this.currentSequence = Sequences.CdataEnd; |
|
this.sequenceIndex = 0; |
|
this.sectionStart = this.index + 1; |
|
} |
|
} |
|
else { |
|
this.sequenceIndex = 0; |
|
this.state = State.InDeclaration; |
|
this.stateInDeclaration(c); |
|
} |
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
Tokenizer.prototype.fastForwardTo = function (c) { |
|
while (++this.index < this.buffer.length + this.offset) { |
|
if (this.buffer.charCodeAt(this.index - this.offset) === c) { |
|
return true; |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
this.index = this.buffer.length + this.offset - 1; |
|
return false; |
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Tokenizer.prototype.stateInCommentLike = function (c) { |
|
if (c === this.currentSequence[this.sequenceIndex]) { |
|
if (++this.sequenceIndex === this.currentSequence.length) { |
|
if (this.currentSequence === Sequences.CdataEnd) { |
|
this.cbs.oncdata(this.sectionStart, this.index, 2); |
|
} |
|
else { |
|
this.cbs.oncomment(this.sectionStart, this.index, 2); |
|
} |
|
this.sequenceIndex = 0; |
|
this.sectionStart = this.index + 1; |
|
this.state = State.Text; |
|
} |
|
} |
|
else if (this.sequenceIndex === 0) { |
|
|
|
if (this.fastForwardTo(this.currentSequence[0])) { |
|
this.sequenceIndex = 1; |
|
} |
|
} |
|
else if (c !== this.currentSequence[this.sequenceIndex - 1]) { |
|
|
|
this.sequenceIndex = 0; |
|
} |
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
Tokenizer.prototype.isTagStartChar = function (c) { |
|
return this.xmlMode ? !isEndOfTagSection(c) : isASCIIAlpha(c); |
|
}; |
|
Tokenizer.prototype.startSpecial = function (sequence, offset) { |
|
this.isSpecial = true; |
|
this.currentSequence = sequence; |
|
this.sequenceIndex = offset; |
|
this.state = State.SpecialStartSequence; |
|
}; |
|
Tokenizer.prototype.stateBeforeTagName = function (c) { |
|
if (c === CharCodes.ExclamationMark) { |
|
this.state = State.BeforeDeclaration; |
|
this.sectionStart = this.index + 1; |
|
} |
|
else if (c === CharCodes.Questionmark) { |
|
this.state = State.InProcessingInstruction; |
|
this.sectionStart = this.index + 1; |
|
} |
|
else if (this.isTagStartChar(c)) { |
|
var lower = c | 0x20; |
|
this.sectionStart = this.index; |
|
if (!this.xmlMode && lower === Sequences.TitleEnd[2]) { |
|
this.startSpecial(Sequences.TitleEnd, 3); |
|
} |
|
else { |
|
this.state = |
|
!this.xmlMode && lower === Sequences.ScriptEnd[2] |
|
? State.BeforeSpecialS |
|
: State.InTagName; |
|
} |
|
} |
|
else if (c === CharCodes.Slash) { |
|
this.state = State.BeforeClosingTagName; |
|
} |
|
else { |
|
this.state = State.Text; |
|
this.stateText(c); |
|
} |
|
}; |
|
Tokenizer.prototype.stateInTagName = function (c) { |
|
if (isEndOfTagSection(c)) { |
|
this.cbs.onopentagname(this.sectionStart, this.index); |
|
this.sectionStart = -1; |
|
this.state = State.BeforeAttributeName; |
|
this.stateBeforeAttributeName(c); |
|
} |
|
}; |
|
Tokenizer.prototype.stateBeforeClosingTagName = function (c) { |
|
if (isWhitespace(c)) { |
|
|
|
} |
|
else if (c === CharCodes.Gt) { |
|
this.state = State.Text; |
|
} |
|
else { |
|
this.state = this.isTagStartChar(c) |
|
? State.InClosingTagName |
|
: State.InSpecialComment; |
|
this.sectionStart = this.index; |
|
} |
|
}; |
|
Tokenizer.prototype.stateInClosingTagName = function (c) { |
|
if (c === CharCodes.Gt || isWhitespace(c)) { |
|
this.cbs.onclosetag(this.sectionStart, this.index); |
|
this.sectionStart = -1; |
|
this.state = State.AfterClosingTagName; |
|
this.stateAfterClosingTagName(c); |
|
} |
|
}; |
|
Tokenizer.prototype.stateAfterClosingTagName = function (c) { |
|
|
|
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) { |
|
this.state = State.Text; |
|
this.baseState = State.Text; |
|
this.sectionStart = this.index + 1; |
|
} |
|
}; |
|
Tokenizer.prototype.stateBeforeAttributeName = function (c) { |
|
if (c === CharCodes.Gt) { |
|
this.cbs.onopentagend(this.index); |
|
if (this.isSpecial) { |
|
this.state = State.InSpecialTag; |
|
this.sequenceIndex = 0; |
|
} |
|
else { |
|
this.state = State.Text; |
|
} |
|
this.baseState = this.state; |
|
this.sectionStart = this.index + 1; |
|
} |
|
else if (c === CharCodes.Slash) { |
|
this.state = State.InSelfClosingTag; |
|
} |
|
else if (!isWhitespace(c)) { |
|
this.state = State.InAttributeName; |
|
this.sectionStart = this.index; |
|
} |
|
}; |
|
Tokenizer.prototype.stateInSelfClosingTag = function (c) { |
|
if (c === CharCodes.Gt) { |
|
this.cbs.onselfclosingtag(this.index); |
|
this.state = State.Text; |
|
this.baseState = State.Text; |
|
this.sectionStart = this.index + 1; |
|
this.isSpecial = false; |
|
} |
|
else if (!isWhitespace(c)) { |
|
this.state = State.BeforeAttributeName; |
|
this.stateBeforeAttributeName(c); |
|
} |
|
}; |
|
Tokenizer.prototype.stateInAttributeName = function (c) { |
|
if (c === CharCodes.Eq || isEndOfTagSection(c)) { |
|
this.cbs.onattribname(this.sectionStart, this.index); |
|
this.sectionStart = -1; |
|
this.state = State.AfterAttributeName; |
|
this.stateAfterAttributeName(c); |
|
} |
|
}; |
|
Tokenizer.prototype.stateAfterAttributeName = function (c) { |
|
if (c === CharCodes.Eq) { |
|
this.state = State.BeforeAttributeValue; |
|
} |
|
else if (c === CharCodes.Slash || c === CharCodes.Gt) { |
|
this.cbs.onattribend(QuoteType.NoValue, this.index); |
|
this.state = State.BeforeAttributeName; |
|
this.stateBeforeAttributeName(c); |
|
} |
|
else if (!isWhitespace(c)) { |
|
this.cbs.onattribend(QuoteType.NoValue, this.index); |
|
this.state = State.InAttributeName; |
|
this.sectionStart = this.index; |
|
} |
|
}; |
|
Tokenizer.prototype.stateBeforeAttributeValue = function (c) { |
|
if (c === CharCodes.DoubleQuote) { |
|
this.state = State.InAttributeValueDq; |
|
this.sectionStart = this.index + 1; |
|
} |
|
else if (c === CharCodes.SingleQuote) { |
|
this.state = State.InAttributeValueSq; |
|
this.sectionStart = this.index + 1; |
|
} |
|
else if (!isWhitespace(c)) { |
|
this.sectionStart = this.index; |
|
this.state = State.InAttributeValueNq; |
|
this.stateInAttributeValueNoQuotes(c); |
|
} |
|
}; |
|
Tokenizer.prototype.handleInAttributeValue = function (c, quote) { |
|
if (c === quote || |
|
(!this.decodeEntities && this.fastForwardTo(quote))) { |
|
this.cbs.onattribdata(this.sectionStart, this.index); |
|
this.sectionStart = -1; |
|
this.cbs.onattribend(quote === CharCodes.DoubleQuote |
|
? QuoteType.Double |
|
: QuoteType.Single, this.index); |
|
this.state = State.BeforeAttributeName; |
|
} |
|
else if (this.decodeEntities && c === CharCodes.Amp) { |
|
this.baseState = this.state; |
|
this.state = State.BeforeEntity; |
|
} |
|
}; |
|
Tokenizer.prototype.stateInAttributeValueDoubleQuotes = function (c) { |
|
this.handleInAttributeValue(c, CharCodes.DoubleQuote); |
|
}; |
|
Tokenizer.prototype.stateInAttributeValueSingleQuotes = function (c) { |
|
this.handleInAttributeValue(c, CharCodes.SingleQuote); |
|
}; |
|
Tokenizer.prototype.stateInAttributeValueNoQuotes = function (c) { |
|
if (isWhitespace(c) || c === CharCodes.Gt) { |
|
this.cbs.onattribdata(this.sectionStart, this.index); |
|
this.sectionStart = -1; |
|
this.cbs.onattribend(QuoteType.Unquoted, this.index); |
|
this.state = State.BeforeAttributeName; |
|
this.stateBeforeAttributeName(c); |
|
} |
|
else if (this.decodeEntities && c === CharCodes.Amp) { |
|
this.baseState = this.state; |
|
this.state = State.BeforeEntity; |
|
} |
|
}; |
|
Tokenizer.prototype.stateBeforeDeclaration = function (c) { |
|
if (c === CharCodes.OpeningSquareBracket) { |
|
this.state = State.CDATASequence; |
|
this.sequenceIndex = 0; |
|
} |
|
else { |
|
this.state = |
|
c === CharCodes.Dash |
|
? State.BeforeComment |
|
: State.InDeclaration; |
|
} |
|
}; |
|
Tokenizer.prototype.stateInDeclaration = function (c) { |
|
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) { |
|
this.cbs.ondeclaration(this.sectionStart, this.index); |
|
this.state = State.Text; |
|
this.sectionStart = this.index + 1; |
|
} |
|
}; |
|
Tokenizer.prototype.stateInProcessingInstruction = function (c) { |
|
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) { |
|
this.cbs.onprocessinginstruction(this.sectionStart, this.index); |
|
this.state = State.Text; |
|
this.sectionStart = this.index + 1; |
|
} |
|
}; |
|
Tokenizer.prototype.stateBeforeComment = function (c) { |
|
if (c === CharCodes.Dash) { |
|
this.state = State.InCommentLike; |
|
this.currentSequence = Sequences.CommentEnd; |
|
|
|
this.sequenceIndex = 2; |
|
this.sectionStart = this.index + 1; |
|
} |
|
else { |
|
this.state = State.InDeclaration; |
|
} |
|
}; |
|
Tokenizer.prototype.stateInSpecialComment = function (c) { |
|
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) { |
|
this.cbs.oncomment(this.sectionStart, this.index, 0); |
|
this.state = State.Text; |
|
this.sectionStart = this.index + 1; |
|
} |
|
}; |
|
Tokenizer.prototype.stateBeforeSpecialS = function (c) { |
|
var lower = c | 0x20; |
|
if (lower === Sequences.ScriptEnd[3]) { |
|
this.startSpecial(Sequences.ScriptEnd, 4); |
|
} |
|
else if (lower === Sequences.StyleEnd[3]) { |
|
this.startSpecial(Sequences.StyleEnd, 4); |
|
} |
|
else { |
|
this.state = State.InTagName; |
|
this.stateInTagName(c); |
|
} |
|
}; |
|
Tokenizer.prototype.stateBeforeEntity = function (c) { |
|
|
|
this.entityExcess = 1; |
|
this.entityResult = 0; |
|
if (c === CharCodes.Number) { |
|
this.state = State.BeforeNumericEntity; |
|
} |
|
else if (c === CharCodes.Amp) { |
|
|
|
} |
|
else { |
|
this.trieIndex = 0; |
|
this.trieCurrent = this.entityTrie[0]; |
|
this.state = State.InNamedEntity; |
|
this.stateInNamedEntity(c); |
|
} |
|
}; |
|
Tokenizer.prototype.stateInNamedEntity = function (c) { |
|
this.entityExcess += 1; |
|
this.trieIndex = (0, decode_js_1.determineBranch)(this.entityTrie, this.trieCurrent, this.trieIndex + 1, c); |
|
if (this.trieIndex < 0) { |
|
this.emitNamedEntity(); |
|
this.index--; |
|
return; |
|
} |
|
this.trieCurrent = this.entityTrie[this.trieIndex]; |
|
var masked = this.trieCurrent & decode_js_1.BinTrieFlags.VALUE_LENGTH; |
|
|
|
if (masked) { |
|
|
|
var valueLength = (masked >> 14) - 1; |
|
|
|
if (!this.allowLegacyEntity() && c !== CharCodes.Semi) { |
|
this.trieIndex += valueLength; |
|
} |
|
else { |
|
|
|
var entityStart = this.index - this.entityExcess + 1; |
|
if (entityStart > this.sectionStart) { |
|
this.emitPartial(this.sectionStart, entityStart); |
|
} |
|
|
|
this.entityResult = this.trieIndex; |
|
this.trieIndex += valueLength; |
|
this.entityExcess = 0; |
|
this.sectionStart = this.index + 1; |
|
if (valueLength === 0) { |
|
this.emitNamedEntity(); |
|
} |
|
} |
|
} |
|
}; |
|
Tokenizer.prototype.emitNamedEntity = function () { |
|
this.state = this.baseState; |
|
if (this.entityResult === 0) { |
|
return; |
|
} |
|
var valueLength = (this.entityTrie[this.entityResult] & decode_js_1.BinTrieFlags.VALUE_LENGTH) >> |
|
14; |
|
switch (valueLength) { |
|
case 1: { |
|
this.emitCodePoint(this.entityTrie[this.entityResult] & |
|
~decode_js_1.BinTrieFlags.VALUE_LENGTH); |
|
break; |
|
} |
|
case 2: { |
|
this.emitCodePoint(this.entityTrie[this.entityResult + 1]); |
|
break; |
|
} |
|
case 3: { |
|
this.emitCodePoint(this.entityTrie[this.entityResult + 1]); |
|
this.emitCodePoint(this.entityTrie[this.entityResult + 2]); |
|
} |
|
} |
|
}; |
|
Tokenizer.prototype.stateBeforeNumericEntity = function (c) { |
|
if ((c | 0x20) === CharCodes.LowerX) { |
|
this.entityExcess++; |
|
this.state = State.InHexEntity; |
|
} |
|
else { |
|
this.state = State.InNumericEntity; |
|
this.stateInNumericEntity(c); |
|
} |
|
}; |
|
Tokenizer.prototype.emitNumericEntity = function (strict) { |
|
var entityStart = this.index - this.entityExcess - 1; |
|
var numberStart = entityStart + 2 + Number(this.state === State.InHexEntity); |
|
if (numberStart !== this.index) { |
|
|
|
if (entityStart > this.sectionStart) { |
|
this.emitPartial(this.sectionStart, entityStart); |
|
} |
|
this.sectionStart = this.index + Number(strict); |
|
this.emitCodePoint((0, decode_js_1.replaceCodePoint)(this.entityResult)); |
|
} |
|
this.state = this.baseState; |
|
}; |
|
Tokenizer.prototype.stateInNumericEntity = function (c) { |
|
if (c === CharCodes.Semi) { |
|
this.emitNumericEntity(true); |
|
} |
|
else if (isNumber(c)) { |
|
this.entityResult = this.entityResult * 10 + (c - CharCodes.Zero); |
|
this.entityExcess++; |
|
} |
|
else { |
|
if (this.allowLegacyEntity()) { |
|
this.emitNumericEntity(false); |
|
} |
|
else { |
|
this.state = this.baseState; |
|
} |
|
this.index--; |
|
} |
|
}; |
|
Tokenizer.prototype.stateInHexEntity = function (c) { |
|
if (c === CharCodes.Semi) { |
|
this.emitNumericEntity(true); |
|
} |
|
else if (isNumber(c)) { |
|
this.entityResult = this.entityResult * 16 + (c - CharCodes.Zero); |
|
this.entityExcess++; |
|
} |
|
else if (isHexDigit(c)) { |
|
this.entityResult = |
|
this.entityResult * 16 + ((c | 0x20) - CharCodes.LowerA + 10); |
|
this.entityExcess++; |
|
} |
|
else { |
|
if (this.allowLegacyEntity()) { |
|
this.emitNumericEntity(false); |
|
} |
|
else { |
|
this.state = this.baseState; |
|
} |
|
this.index--; |
|
} |
|
}; |
|
Tokenizer.prototype.allowLegacyEntity = function () { |
|
return (!this.xmlMode && |
|
(this.baseState === State.Text || |
|
this.baseState === State.InSpecialTag)); |
|
}; |
|
|
|
|
|
|
|
Tokenizer.prototype.cleanup = function () { |
|
|
|
if (this.running && this.sectionStart !== this.index) { |
|
if (this.state === State.Text || |
|
(this.state === State.InSpecialTag && this.sequenceIndex === 0)) { |
|
this.cbs.ontext(this.sectionStart, this.index); |
|
this.sectionStart = this.index; |
|
} |
|
else if (this.state === State.InAttributeValueDq || |
|
this.state === State.InAttributeValueSq || |
|
this.state === State.InAttributeValueNq) { |
|
this.cbs.onattribdata(this.sectionStart, this.index); |
|
this.sectionStart = this.index; |
|
} |
|
} |
|
}; |
|
Tokenizer.prototype.shouldContinue = function () { |
|
return this.index < this.buffer.length + this.offset && this.running; |
|
}; |
|
|
|
|
|
|
|
|
|
|
|
Tokenizer.prototype.parse = function () { |
|
while (this.shouldContinue()) { |
|
var c = this.buffer.charCodeAt(this.index - this.offset); |
|
switch (this.state) { |
|
case State.Text: { |
|
this.stateText(c); |
|
break; |
|
} |
|
case State.SpecialStartSequence: { |
|
this.stateSpecialStartSequence(c); |
|
break; |
|
} |
|
case State.InSpecialTag: { |
|
this.stateInSpecialTag(c); |
|
break; |
|
} |
|
case State.CDATASequence: { |
|
this.stateCDATASequence(c); |
|
break; |
|
} |
|
case State.InAttributeValueDq: { |
|
this.stateInAttributeValueDoubleQuotes(c); |
|
break; |
|
} |
|
case State.InAttributeName: { |
|
this.stateInAttributeName(c); |
|
break; |
|
} |
|
case State.InCommentLike: { |
|
this.stateInCommentLike(c); |
|
break; |
|
} |
|
case State.InSpecialComment: { |
|
this.stateInSpecialComment(c); |
|
break; |
|
} |
|
case State.BeforeAttributeName: { |
|
this.stateBeforeAttributeName(c); |
|
break; |
|
} |
|
case State.InTagName: { |
|
this.stateInTagName(c); |
|
break; |
|
} |
|
case State.InClosingTagName: { |
|
this.stateInClosingTagName(c); |
|
break; |
|
} |
|
case State.BeforeTagName: { |
|
this.stateBeforeTagName(c); |
|
break; |
|
} |
|
case State.AfterAttributeName: { |
|
this.stateAfterAttributeName(c); |
|
break; |
|
} |
|
case State.InAttributeValueSq: { |
|
this.stateInAttributeValueSingleQuotes(c); |
|
break; |
|
} |
|
case State.BeforeAttributeValue: { |
|
this.stateBeforeAttributeValue(c); |
|
break; |
|
} |
|
case State.BeforeClosingTagName: { |
|
this.stateBeforeClosingTagName(c); |
|
break; |
|
} |
|
case State.AfterClosingTagName: { |
|
this.stateAfterClosingTagName(c); |
|
break; |
|
} |
|
case State.BeforeSpecialS: { |
|
this.stateBeforeSpecialS(c); |
|
break; |
|
} |
|
case State.InAttributeValueNq: { |
|
this.stateInAttributeValueNoQuotes(c); |
|
break; |
|
} |
|
case State.InSelfClosingTag: { |
|
this.stateInSelfClosingTag(c); |
|
break; |
|
} |
|
case State.InDeclaration: { |
|
this.stateInDeclaration(c); |
|
break; |
|
} |
|
case State.BeforeDeclaration: { |
|
this.stateBeforeDeclaration(c); |
|
break; |
|
} |
|
case State.BeforeComment: { |
|
this.stateBeforeComment(c); |
|
break; |
|
} |
|
case State.InProcessingInstruction: { |
|
this.stateInProcessingInstruction(c); |
|
break; |
|
} |
|
case State.InNamedEntity: { |
|
this.stateInNamedEntity(c); |
|
break; |
|
} |
|
case State.BeforeEntity: { |
|
this.stateBeforeEntity(c); |
|
break; |
|
} |
|
case State.InHexEntity: { |
|
this.stateInHexEntity(c); |
|
break; |
|
} |
|
case State.InNumericEntity: { |
|
this.stateInNumericEntity(c); |
|
break; |
|
} |
|
default: { |
|
|
|
this.stateBeforeNumericEntity(c); |
|
} |
|
} |
|
this.index++; |
|
} |
|
this.cleanup(); |
|
}; |
|
Tokenizer.prototype.finish = function () { |
|
if (this.state === State.InNamedEntity) { |
|
this.emitNamedEntity(); |
|
} |
|
|
|
if (this.sectionStart < this.index) { |
|
this.handleTrailingData(); |
|
} |
|
this.cbs.onend(); |
|
}; |
|
|
|
Tokenizer.prototype.handleTrailingData = function () { |
|
var endIndex = this.buffer.length + this.offset; |
|
if (this.state === State.InCommentLike) { |
|
if (this.currentSequence === Sequences.CdataEnd) { |
|
this.cbs.oncdata(this.sectionStart, endIndex, 0); |
|
} |
|
else { |
|
this.cbs.oncomment(this.sectionStart, endIndex, 0); |
|
} |
|
} |
|
else if (this.state === State.InNumericEntity && |
|
this.allowLegacyEntity()) { |
|
this.emitNumericEntity(false); |
|
|
|
} |
|
else if (this.state === State.InHexEntity && |
|
this.allowLegacyEntity()) { |
|
this.emitNumericEntity(false); |
|
|
|
} |
|
else if (this.state === State.InTagName || |
|
this.state === State.BeforeAttributeName || |
|
this.state === State.BeforeAttributeValue || |
|
this.state === State.AfterAttributeName || |
|
this.state === State.InAttributeName || |
|
this.state === State.InAttributeValueSq || |
|
this.state === State.InAttributeValueDq || |
|
this.state === State.InAttributeValueNq || |
|
this.state === State.InClosingTagName) { |
|
|
|
|
|
|
|
|
|
} |
|
else { |
|
this.cbs.ontext(this.sectionStart, endIndex); |
|
} |
|
}; |
|
Tokenizer.prototype.emitPartial = function (start, endIndex) { |
|
if (this.baseState !== State.Text && |
|
this.baseState !== State.InSpecialTag) { |
|
this.cbs.onattribdata(start, endIndex); |
|
} |
|
else { |
|
this.cbs.ontext(start, endIndex); |
|
} |
|
}; |
|
Tokenizer.prototype.emitCodePoint = function (cp) { |
|
if (this.baseState !== State.Text && |
|
this.baseState !== State.InSpecialTag) { |
|
this.cbs.onattribentity(cp); |
|
} |
|
else { |
|
this.cbs.ontextentity(cp); |
|
} |
|
}; |
|
return Tokenizer; |
|
}()); |
|
exports.default = Tokenizer; |
|
|