|
"use strict"; |
|
Object.defineProperty(exports, "__esModule", { value: true }); |
|
exports.Preprocessor = void 0; |
|
const unicode_js_1 = require("../common/unicode.js"); |
|
const error_codes_js_1 = require("../common/error-codes.js"); |
|
|
|
const DEFAULT_BUFFER_WATERLINE = 1 << 16; |
|
|
|
|
|
|
|
class Preprocessor { |
|
constructor(handler) { |
|
this.handler = handler; |
|
this.html = ''; |
|
this.pos = -1; |
|
|
|
this.lastGapPos = -2; |
|
this.gapStack = []; |
|
this.skipNextNewLine = false; |
|
this.lastChunkWritten = false; |
|
this.endOfChunkHit = false; |
|
this.bufferWaterline = DEFAULT_BUFFER_WATERLINE; |
|
this.isEol = false; |
|
this.lineStartPos = 0; |
|
this.droppedBufferSize = 0; |
|
this.line = 1; |
|
|
|
this.lastErrOffset = -1; |
|
} |
|
|
|
get col() { |
|
return this.pos - this.lineStartPos + Number(this.lastGapPos !== this.pos); |
|
} |
|
get offset() { |
|
return this.droppedBufferSize + this.pos; |
|
} |
|
getError(code) { |
|
const { line, col, offset } = this; |
|
return { |
|
code, |
|
startLine: line, |
|
endLine: line, |
|
startCol: col, |
|
endCol: col, |
|
startOffset: offset, |
|
endOffset: offset, |
|
}; |
|
} |
|
_err(code) { |
|
if (this.handler.onParseError && this.lastErrOffset !== this.offset) { |
|
this.lastErrOffset = this.offset; |
|
this.handler.onParseError(this.getError(code)); |
|
} |
|
} |
|
_addGap() { |
|
this.gapStack.push(this.lastGapPos); |
|
this.lastGapPos = this.pos; |
|
} |
|
_processSurrogate(cp) { |
|
|
|
if (this.pos !== this.html.length - 1) { |
|
const nextCp = this.html.charCodeAt(this.pos + 1); |
|
if ((0, unicode_js_1.isSurrogatePair)(nextCp)) { |
|
|
|
this.pos++; |
|
|
|
this._addGap(); |
|
return (0, unicode_js_1.getSurrogatePairCodePoint)(cp, nextCp); |
|
} |
|
} |
|
|
|
else if (!this.lastChunkWritten) { |
|
this.endOfChunkHit = true; |
|
return unicode_js_1.CODE_POINTS.EOF; |
|
} |
|
|
|
this._err(error_codes_js_1.ERR.surrogateInInputStream); |
|
return cp; |
|
} |
|
willDropParsedChunk() { |
|
return this.pos > this.bufferWaterline; |
|
} |
|
dropParsedChunk() { |
|
if (this.willDropParsedChunk()) { |
|
this.html = this.html.substring(this.pos); |
|
this.lineStartPos -= this.pos; |
|
this.droppedBufferSize += this.pos; |
|
this.pos = 0; |
|
this.lastGapPos = -2; |
|
this.gapStack.length = 0; |
|
} |
|
} |
|
write(chunk, isLastChunk) { |
|
if (this.html.length > 0) { |
|
this.html += chunk; |
|
} |
|
else { |
|
this.html = chunk; |
|
} |
|
this.endOfChunkHit = false; |
|
this.lastChunkWritten = isLastChunk; |
|
} |
|
insertHtmlAtCurrentPos(chunk) { |
|
this.html = this.html.substring(0, this.pos + 1) + chunk + this.html.substring(this.pos + 1); |
|
this.endOfChunkHit = false; |
|
} |
|
startsWith(pattern, caseSensitive) { |
|
|
|
if (this.pos + pattern.length > this.html.length) { |
|
this.endOfChunkHit = !this.lastChunkWritten; |
|
return false; |
|
} |
|
if (caseSensitive) { |
|
return this.html.startsWith(pattern, this.pos); |
|
} |
|
for (let i = 0; i < pattern.length; i++) { |
|
const cp = this.html.charCodeAt(this.pos + i) | 0x20; |
|
if (cp !== pattern.charCodeAt(i)) { |
|
return false; |
|
} |
|
} |
|
return true; |
|
} |
|
peek(offset) { |
|
const pos = this.pos + offset; |
|
if (pos >= this.html.length) { |
|
this.endOfChunkHit = !this.lastChunkWritten; |
|
return unicode_js_1.CODE_POINTS.EOF; |
|
} |
|
const code = this.html.charCodeAt(pos); |
|
return code === unicode_js_1.CODE_POINTS.CARRIAGE_RETURN ? unicode_js_1.CODE_POINTS.LINE_FEED : code; |
|
} |
|
advance() { |
|
this.pos++; |
|
|
|
if (this.isEol) { |
|
this.isEol = false; |
|
this.line++; |
|
this.lineStartPos = this.pos; |
|
} |
|
if (this.pos >= this.html.length) { |
|
this.endOfChunkHit = !this.lastChunkWritten; |
|
return unicode_js_1.CODE_POINTS.EOF; |
|
} |
|
let cp = this.html.charCodeAt(this.pos); |
|
|
|
if (cp === unicode_js_1.CODE_POINTS.CARRIAGE_RETURN) { |
|
this.isEol = true; |
|
this.skipNextNewLine = true; |
|
return unicode_js_1.CODE_POINTS.LINE_FEED; |
|
} |
|
|
|
|
|
if (cp === unicode_js_1.CODE_POINTS.LINE_FEED) { |
|
this.isEol = true; |
|
if (this.skipNextNewLine) { |
|
|
|
this.line--; |
|
this.skipNextNewLine = false; |
|
this._addGap(); |
|
return this.advance(); |
|
} |
|
} |
|
this.skipNextNewLine = false; |
|
if ((0, unicode_js_1.isSurrogate)(cp)) { |
|
cp = this._processSurrogate(cp); |
|
} |
|
|
|
|
|
|
|
const isCommonValidRange = this.handler.onParseError === null || |
|
(cp > 0x1f && cp < 0x7f) || |
|
cp === unicode_js_1.CODE_POINTS.LINE_FEED || |
|
cp === unicode_js_1.CODE_POINTS.CARRIAGE_RETURN || |
|
(cp > 0x9f && cp < 64976); |
|
if (!isCommonValidRange) { |
|
this._checkForProblematicCharacters(cp); |
|
} |
|
return cp; |
|
} |
|
_checkForProblematicCharacters(cp) { |
|
if ((0, unicode_js_1.isControlCodePoint)(cp)) { |
|
this._err(error_codes_js_1.ERR.controlCharacterInInputStream); |
|
} |
|
else if ((0, unicode_js_1.isUndefinedCodePoint)(cp)) { |
|
this._err(error_codes_js_1.ERR.noncharacterInInputStream); |
|
} |
|
} |
|
retreat(count) { |
|
this.pos -= count; |
|
while (this.pos < this.lastGapPos) { |
|
this.lastGapPos = this.gapStack.pop(); |
|
this.pos--; |
|
} |
|
this.isEol = false; |
|
} |
|
} |
|
exports.Preprocessor = Preprocessor; |
|
|