Spaces:
Build error
Build error
import Tokenizer, { QuoteType } from "./Tokenizer.js"; | |
import { fromCodePoint } from "entities/lib/decode.js"; | |
const formTags = new Set([ | |
"input", | |
"option", | |
"optgroup", | |
"select", | |
"button", | |
"datalist", | |
"textarea", | |
]); | |
const pTag = new Set(["p"]); | |
const tableSectionTags = new Set(["thead", "tbody"]); | |
const ddtTags = new Set(["dd", "dt"]); | |
const rtpTags = new Set(["rt", "rp"]); | |
const openImpliesClose = new Map([ | |
["tr", new Set(["tr", "th", "td"])], | |
["th", new Set(["th"])], | |
["td", new Set(["thead", "th", "td"])], | |
["body", new Set(["head", "link", "script"])], | |
["li", new Set(["li"])], | |
["p", pTag], | |
["h1", pTag], | |
["h2", pTag], | |
["h3", pTag], | |
["h4", pTag], | |
["h5", pTag], | |
["h6", pTag], | |
["select", formTags], | |
["input", formTags], | |
["output", formTags], | |
["button", formTags], | |
["datalist", formTags], | |
["textarea", formTags], | |
["option", new Set(["option"])], | |
["optgroup", new Set(["optgroup", "option"])], | |
["dd", ddtTags], | |
["dt", ddtTags], | |
["address", pTag], | |
["article", pTag], | |
["aside", pTag], | |
["blockquote", pTag], | |
["details", pTag], | |
["div", pTag], | |
["dl", pTag], | |
["fieldset", pTag], | |
["figcaption", pTag], | |
["figure", pTag], | |
["footer", pTag], | |
["form", pTag], | |
["header", pTag], | |
["hr", pTag], | |
["main", pTag], | |
["nav", pTag], | |
["ol", pTag], | |
["pre", pTag], | |
["section", pTag], | |
["table", pTag], | |
["ul", pTag], | |
["rt", rtpTags], | |
["rp", rtpTags], | |
["tbody", tableSectionTags], | |
["tfoot", tableSectionTags], | |
]); | |
const voidElements = new Set([ | |
"area", | |
"base", | |
"basefont", | |
"br", | |
"col", | |
"command", | |
"embed", | |
"frame", | |
"hr", | |
"img", | |
"input", | |
"isindex", | |
"keygen", | |
"link", | |
"meta", | |
"param", | |
"source", | |
"track", | |
"wbr", | |
]); | |
const foreignContextElements = new Set(["math", "svg"]); | |
const htmlIntegrationElements = new Set([ | |
"mi", | |
"mo", | |
"mn", | |
"ms", | |
"mtext", | |
"annotation-xml", | |
"foreignobject", | |
"desc", | |
"title", | |
]); | |
const reNameEnd = /\s|\//; | |
export class Parser { | |
constructor(cbs, options = {}) { | |
var _a, _b, _c, _d, _e; | |
this.options = options; | |
/** The start index of the last event. */ | |
this.startIndex = 0; | |
/** The end index of the last event. */ | |
this.endIndex = 0; | |
/** | |
* Store the start index of the current open tag, | |
* so we can update the start index for attributes. | |
*/ | |
this.openTagStart = 0; | |
this.tagname = ""; | |
this.attribname = ""; | |
this.attribvalue = ""; | |
this.attribs = null; | |
this.stack = []; | |
this.foreignContext = []; | |
this.buffers = []; | |
this.bufferOffset = 0; | |
/** The index of the last written buffer. Used when resuming after a `pause()`. */ | |
this.writeIndex = 0; | |
/** Indicates whether the parser has finished running / `.end` has been called. */ | |
this.ended = false; | |
this.cbs = cbs !== null && cbs !== void 0 ? cbs : {}; | |
this.lowerCaseTagNames = (_a = options.lowerCaseTags) !== null && _a !== void 0 ? _a : !options.xmlMode; | |
this.lowerCaseAttributeNames = | |
(_b = options.lowerCaseAttributeNames) !== null && _b !== void 0 ? _b : !options.xmlMode; | |
this.tokenizer = new ((_c = options.Tokenizer) !== null && _c !== void 0 ? _c : Tokenizer)(this.options, this); | |
(_e = (_d = this.cbs).onparserinit) === null || _e === void 0 ? void 0 : _e.call(_d, this); | |
} | |
// Tokenizer event handlers | |
/** @internal */ | |
ontext(start, endIndex) { | |
var _a, _b; | |
const data = this.getSlice(start, endIndex); | |
this.endIndex = endIndex - 1; | |
(_b = (_a = this.cbs).ontext) === null || _b === void 0 ? void 0 : _b.call(_a, data); | |
this.startIndex = endIndex; | |
} | |
/** @internal */ | |
ontextentity(cp) { | |
var _a, _b; | |
/* | |
* Entities can be emitted on the character, or directly after. | |
* We use the section start here to get accurate indices. | |
*/ | |
const index = this.tokenizer.getSectionStart(); | |
this.endIndex = index - 1; | |
(_b = (_a = this.cbs).ontext) === null || _b === void 0 ? void 0 : _b.call(_a, fromCodePoint(cp)); | |
this.startIndex = index; | |
} | |
isVoidElement(name) { | |
return !this.options.xmlMode && voidElements.has(name); | |
} | |
/** @internal */ | |
onopentagname(start, endIndex) { | |
this.endIndex = endIndex; | |
let name = this.getSlice(start, endIndex); | |
if (this.lowerCaseTagNames) { | |
name = name.toLowerCase(); | |
} | |
this.emitOpenTag(name); | |
} | |
emitOpenTag(name) { | |
var _a, _b, _c, _d; | |
this.openTagStart = this.startIndex; | |
this.tagname = name; | |
const impliesClose = !this.options.xmlMode && openImpliesClose.get(name); | |
if (impliesClose) { | |
while (this.stack.length > 0 && | |
impliesClose.has(this.stack[this.stack.length - 1])) { | |
const element = this.stack.pop(); | |
(_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, element, true); | |
} | |
} | |
if (!this.isVoidElement(name)) { | |
this.stack.push(name); | |
if (foreignContextElements.has(name)) { | |
this.foreignContext.push(true); | |
} | |
else if (htmlIntegrationElements.has(name)) { | |
this.foreignContext.push(false); | |
} | |
} | |
(_d = (_c = this.cbs).onopentagname) === null || _d === void 0 ? void 0 : _d.call(_c, name); | |
if (this.cbs.onopentag) | |
this.attribs = {}; | |
} | |
endOpenTag(isImplied) { | |
var _a, _b; | |
this.startIndex = this.openTagStart; | |
if (this.attribs) { | |
(_b = (_a = this.cbs).onopentag) === null || _b === void 0 ? void 0 : _b.call(_a, this.tagname, this.attribs, isImplied); | |
this.attribs = null; | |
} | |
if (this.cbs.onclosetag && this.isVoidElement(this.tagname)) { | |
this.cbs.onclosetag(this.tagname, true); | |
} | |
this.tagname = ""; | |
} | |
/** @internal */ | |
onopentagend(endIndex) { | |
this.endIndex = endIndex; | |
this.endOpenTag(false); | |
// Set `startIndex` for next node | |
this.startIndex = endIndex + 1; | |
} | |
/** @internal */ | |
onclosetag(start, endIndex) { | |
var _a, _b, _c, _d, _e, _f; | |
this.endIndex = endIndex; | |
let name = this.getSlice(start, endIndex); | |
if (this.lowerCaseTagNames) { | |
name = name.toLowerCase(); | |
} | |
if (foreignContextElements.has(name) || | |
htmlIntegrationElements.has(name)) { | |
this.foreignContext.pop(); | |
} | |
if (!this.isVoidElement(name)) { | |
const pos = this.stack.lastIndexOf(name); | |
if (pos !== -1) { | |
if (this.cbs.onclosetag) { | |
let count = this.stack.length - pos; | |
while (count--) { | |
// We know the stack has sufficient elements. | |
this.cbs.onclosetag(this.stack.pop(), count !== 0); | |
} | |
} | |
else | |
this.stack.length = pos; | |
} | |
else if (!this.options.xmlMode && name === "p") { | |
// Implicit open before close | |
this.emitOpenTag("p"); | |
this.closeCurrentTag(true); | |
} | |
} | |
else if (!this.options.xmlMode && name === "br") { | |
// We can't use `emitOpenTag` for implicit open, as `br` would be implicitly closed. | |
(_b = (_a = this.cbs).onopentagname) === null || _b === void 0 ? void 0 : _b.call(_a, "br"); | |
(_d = (_c = this.cbs).onopentag) === null || _d === void 0 ? void 0 : _d.call(_c, "br", {}, true); | |
(_f = (_e = this.cbs).onclosetag) === null || _f === void 0 ? void 0 : _f.call(_e, "br", false); | |
} | |
// Set `startIndex` for next node | |
this.startIndex = endIndex + 1; | |
} | |
/** @internal */ | |
onselfclosingtag(endIndex) { | |
this.endIndex = endIndex; | |
if (this.options.xmlMode || | |
this.options.recognizeSelfClosing || | |
this.foreignContext[this.foreignContext.length - 1]) { | |
this.closeCurrentTag(false); | |
// Set `startIndex` for next node | |
this.startIndex = endIndex + 1; | |
} | |
else { | |
// Ignore the fact that the tag is self-closing. | |
this.onopentagend(endIndex); | |
} | |
} | |
closeCurrentTag(isOpenImplied) { | |
var _a, _b; | |
const name = this.tagname; | |
this.endOpenTag(isOpenImplied); | |
// Self-closing tags will be on the top of the stack | |
if (this.stack[this.stack.length - 1] === name) { | |
// If the opening tag isn't implied, the closing tag has to be implied. | |
(_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, name, !isOpenImplied); | |
this.stack.pop(); | |
} | |
} | |
/** @internal */ | |
onattribname(start, endIndex) { | |
this.startIndex = start; | |
const name = this.getSlice(start, endIndex); | |
this.attribname = this.lowerCaseAttributeNames | |
? name.toLowerCase() | |
: name; | |
} | |
/** @internal */ | |
onattribdata(start, endIndex) { | |
this.attribvalue += this.getSlice(start, endIndex); | |
} | |
/** @internal */ | |
onattribentity(cp) { | |
this.attribvalue += fromCodePoint(cp); | |
} | |
/** @internal */ | |
onattribend(quote, endIndex) { | |
var _a, _b; | |
this.endIndex = endIndex; | |
(_b = (_a = this.cbs).onattribute) === null || _b === void 0 ? void 0 : _b.call(_a, this.attribname, this.attribvalue, quote === QuoteType.Double | |
? '"' | |
: quote === QuoteType.Single | |
? "'" | |
: quote === QuoteType.NoValue | |
? undefined | |
: null); | |
if (this.attribs && | |
!Object.prototype.hasOwnProperty.call(this.attribs, this.attribname)) { | |
this.attribs[this.attribname] = this.attribvalue; | |
} | |
this.attribvalue = ""; | |
} | |
getInstructionName(value) { | |
const index = value.search(reNameEnd); | |
let name = index < 0 ? value : value.substr(0, index); | |
if (this.lowerCaseTagNames) { | |
name = name.toLowerCase(); | |
} | |
return name; | |
} | |
/** @internal */ | |
ondeclaration(start, endIndex) { | |
this.endIndex = endIndex; | |
const value = this.getSlice(start, endIndex); | |
if (this.cbs.onprocessinginstruction) { | |
const name = this.getInstructionName(value); | |
this.cbs.onprocessinginstruction(`!${name}`, `!${value}`); | |
} | |
// Set `startIndex` for next node | |
this.startIndex = endIndex + 1; | |
} | |
/** @internal */ | |
onprocessinginstruction(start, endIndex) { | |
this.endIndex = endIndex; | |
const value = this.getSlice(start, endIndex); | |
if (this.cbs.onprocessinginstruction) { | |
const name = this.getInstructionName(value); | |
this.cbs.onprocessinginstruction(`?${name}`, `?${value}`); | |
} | |
// Set `startIndex` for next node | |
this.startIndex = endIndex + 1; | |
} | |
/** @internal */ | |
oncomment(start, endIndex, offset) { | |
var _a, _b, _c, _d; | |
this.endIndex = endIndex; | |
(_b = (_a = this.cbs).oncomment) === null || _b === void 0 ? void 0 : _b.call(_a, this.getSlice(start, endIndex - offset)); | |
(_d = (_c = this.cbs).oncommentend) === null || _d === void 0 ? void 0 : _d.call(_c); | |
// Set `startIndex` for next node | |
this.startIndex = endIndex + 1; | |
} | |
/** @internal */ | |
oncdata(start, endIndex, offset) { | |
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k; | |
this.endIndex = endIndex; | |
const value = this.getSlice(start, endIndex - offset); | |
if (this.options.xmlMode || this.options.recognizeCDATA) { | |
(_b = (_a = this.cbs).oncdatastart) === null || _b === void 0 ? void 0 : _b.call(_a); | |
(_d = (_c = this.cbs).ontext) === null || _d === void 0 ? void 0 : _d.call(_c, value); | |
(_f = (_e = this.cbs).oncdataend) === null || _f === void 0 ? void 0 : _f.call(_e); | |
} | |
else { | |
(_h = (_g = this.cbs).oncomment) === null || _h === void 0 ? void 0 : _h.call(_g, `[CDATA[${value}]]`); | |
(_k = (_j = this.cbs).oncommentend) === null || _k === void 0 ? void 0 : _k.call(_j); | |
} | |
// Set `startIndex` for next node | |
this.startIndex = endIndex + 1; | |
} | |
/** @internal */ | |
onend() { | |
var _a, _b; | |
if (this.cbs.onclosetag) { | |
// Set the end index for all remaining tags | |
this.endIndex = this.startIndex; | |
for (let index = this.stack.length; index > 0; this.cbs.onclosetag(this.stack[--index], true)) | |
; | |
} | |
(_b = (_a = this.cbs).onend) === null || _b === void 0 ? void 0 : _b.call(_a); | |
} | |
/** | |
* Resets the parser to a blank state, ready to parse a new HTML document | |
*/ | |
reset() { | |
var _a, _b, _c, _d; | |
(_b = (_a = this.cbs).onreset) === null || _b === void 0 ? void 0 : _b.call(_a); | |
this.tokenizer.reset(); | |
this.tagname = ""; | |
this.attribname = ""; | |
this.attribs = null; | |
this.stack.length = 0; | |
this.startIndex = 0; | |
this.endIndex = 0; | |
(_d = (_c = this.cbs).onparserinit) === null || _d === void 0 ? void 0 : _d.call(_c, this); | |
this.buffers.length = 0; | |
this.bufferOffset = 0; | |
this.writeIndex = 0; | |
this.ended = false; | |
} | |
/** | |
* Resets the parser, then parses a complete document and | |
* pushes it to the handler. | |
* | |
* @param data Document to parse. | |
*/ | |
parseComplete(data) { | |
this.reset(); | |
this.end(data); | |
} | |
getSlice(start, end) { | |
while (start - this.bufferOffset >= this.buffers[0].length) { | |
this.shiftBuffer(); | |
} | |
let slice = this.buffers[0].slice(start - this.bufferOffset, end - this.bufferOffset); | |
while (end - this.bufferOffset > this.buffers[0].length) { | |
this.shiftBuffer(); | |
slice += this.buffers[0].slice(0, end - this.bufferOffset); | |
} | |
return slice; | |
} | |
shiftBuffer() { | |
this.bufferOffset += this.buffers[0].length; | |
this.writeIndex--; | |
this.buffers.shift(); | |
} | |
/** | |
* Parses a chunk of data and calls the corresponding callbacks. | |
* | |
* @param chunk Chunk to parse. | |
*/ | |
write(chunk) { | |
var _a, _b; | |
if (this.ended) { | |
(_b = (_a = this.cbs).onerror) === null || _b === void 0 ? void 0 : _b.call(_a, new Error(".write() after done!")); | |
return; | |
} | |
this.buffers.push(chunk); | |
if (this.tokenizer.running) { | |
this.tokenizer.write(chunk); | |
this.writeIndex++; | |
} | |
} | |
/** | |
* Parses the end of the buffer and clears the stack, calls onend. | |
* | |
* @param chunk Optional final chunk to parse. | |
*/ | |
end(chunk) { | |
var _a, _b; | |
if (this.ended) { | |
(_b = (_a = this.cbs).onerror) === null || _b === void 0 ? void 0 : _b.call(_a, new Error(".end() after done!")); | |
return; | |
} | |
if (chunk) | |
this.write(chunk); | |
this.ended = true; | |
this.tokenizer.end(); | |
} | |
/** | |
* Pauses parsing. The parser won't emit events until `resume` is called. | |
*/ | |
pause() { | |
this.tokenizer.pause(); | |
} | |
/** | |
* Resumes parsing after `pause` was called. | |
*/ | |
resume() { | |
this.tokenizer.resume(); | |
while (this.tokenizer.running && | |
this.writeIndex < this.buffers.length) { | |
this.tokenizer.write(this.buffers[this.writeIndex++]); | |
} | |
if (this.ended) | |
this.tokenizer.end(); | |
} | |
/** | |
* Alias of `write`, for backwards compatibility. | |
* | |
* @param chunk Chunk to parse. | |
* @deprecated | |
*/ | |
parseChunk(chunk) { | |
this.write(chunk); | |
} | |
/** | |
* Alias of `end`, for backwards compatibility. | |
* | |
* @param chunk Optional final chunk to parse. | |
* @deprecated | |
*/ | |
done(chunk) { | |
this.end(chunk); | |
} | |
} | |
//# sourceMappingURL=Parser.js.map |