Spaces:
Runtime error
Runtime error
/** | |
* Copyright Brian White. All rights reserved. | |
* | |
* @see https://github.com/mscdex/streamsearch | |
* | |
* Permission is hereby granted, free of charge, to any person obtaining a copy | |
* of this software and associated documentation files (the "Software"), to | |
* deal in the Software without restriction, including without limitation the | |
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | |
* sell copies of the Software, and to permit persons to whom the Software is | |
* furnished to do so, subject to the following conditions: | |
* | |
* The above copyright notice and this permission notice shall be included in | |
* all copies or substantial portions of the Software. | |
* | |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
* IN THE SOFTWARE. | |
* | |
* Based heavily on the Streaming Boyer-Moore-Horspool C++ implementation | |
* by Hongli Lai at: https://github.com/FooBarWidget/boyer-moore-horspool | |
*/ | |
const EventEmitter = require('node:events').EventEmitter | |
const inherits = require('node:util').inherits | |
function SBMH (needle) { | |
if (typeof needle === 'string') { | |
needle = Buffer.from(needle) | |
} | |
if (!Buffer.isBuffer(needle)) { | |
throw new TypeError('The needle has to be a String or a Buffer.') | |
} | |
const needleLength = needle.length | |
if (needleLength === 0) { | |
throw new Error('The needle cannot be an empty String/Buffer.') | |
} | |
if (needleLength > 256) { | |
throw new Error('The needle cannot have a length bigger than 256.') | |
} | |
this.maxMatches = Infinity | |
this.matches = 0 | |
this._occ = new Array(256) | |
.fill(needleLength) // Initialize occurrence table. | |
this._lookbehind_size = 0 | |
this._needle = needle | |
this._bufpos = 0 | |
this._lookbehind = Buffer.alloc(needleLength) | |
// Populate occurrence table with analysis of the needle, | |
// ignoring last letter. | |
for (var i = 0; i < needleLength - 1; ++i) { // eslint-disable-line no-var | |
this._occ[needle[i]] = needleLength - 1 - i | |
} | |
} | |
inherits(SBMH, EventEmitter) | |
SBMH.prototype.reset = function () { | |
this._lookbehind_size = 0 | |
this.matches = 0 | |
this._bufpos = 0 | |
} | |
SBMH.prototype.push = function (chunk, pos) { | |
if (!Buffer.isBuffer(chunk)) { | |
chunk = Buffer.from(chunk, 'binary') | |
} | |
const chlen = chunk.length | |
this._bufpos = pos || 0 | |
let r | |
while (r !== chlen && this.matches < this.maxMatches) { r = this._sbmh_feed(chunk) } | |
return r | |
} | |
SBMH.prototype._sbmh_feed = function (data) { | |
const len = data.length | |
const needle = this._needle | |
const needleLength = needle.length | |
const lastNeedleChar = needle[needleLength - 1] | |
// Positive: points to a position in `data` | |
// pos == 3 points to data[3] | |
// Negative: points to a position in the lookbehind buffer | |
// pos == -2 points to lookbehind[lookbehind_size - 2] | |
let pos = -this._lookbehind_size | |
let ch | |
if (pos < 0) { | |
// Lookbehind buffer is not empty. Perform Boyer-Moore-Horspool | |
// search with character lookup code that considers both the | |
// lookbehind buffer and the current round's haystack data. | |
// | |
// Loop until | |
// there is a match. | |
// or until | |
// we've moved past the position that requires the | |
// lookbehind buffer. In this case we switch to the | |
// optimized loop. | |
// or until | |
// the character to look at lies outside the haystack. | |
while (pos < 0 && pos <= len - needleLength) { | |
ch = this._sbmh_lookup_char(data, pos + needleLength - 1) | |
if ( | |
ch === lastNeedleChar && | |
this._sbmh_memcmp(data, pos, needleLength - 1) | |
) { | |
this._lookbehind_size = 0 | |
++this.matches | |
this.emit('info', true) | |
return (this._bufpos = pos + needleLength) | |
} | |
pos += this._occ[ch] | |
} | |
// No match. | |
if (pos < 0) { | |
// There's too few data for Boyer-Moore-Horspool to run, | |
// so let's use a different algorithm to skip as much as | |
// we can. | |
// Forward pos until | |
// the trailing part of lookbehind + data | |
// looks like the beginning of the needle | |
// or until | |
// pos == 0 | |
while (pos < 0 && !this._sbmh_memcmp(data, pos, len - pos)) { ++pos } | |
} | |
if (pos >= 0) { | |
// Discard lookbehind buffer. | |
this.emit('info', false, this._lookbehind, 0, this._lookbehind_size) | |
this._lookbehind_size = 0 | |
} else { | |
// Cut off part of the lookbehind buffer that has | |
// been processed and append the entire haystack | |
// into it. | |
const bytesToCutOff = this._lookbehind_size + pos | |
if (bytesToCutOff > 0) { | |
// The cut off data is guaranteed not to contain the needle. | |
this.emit('info', false, this._lookbehind, 0, bytesToCutOff) | |
} | |
this._lookbehind.copy(this._lookbehind, 0, bytesToCutOff, | |
this._lookbehind_size - bytesToCutOff) | |
this._lookbehind_size -= bytesToCutOff | |
data.copy(this._lookbehind, this._lookbehind_size) | |
this._lookbehind_size += len | |
this._bufpos = len | |
return len | |
} | |
} | |
pos += (pos >= 0) * this._bufpos | |
// Lookbehind buffer is now empty. We only need to check if the | |
// needle is in the haystack. | |
if (data.indexOf(needle, pos) !== -1) { | |
pos = data.indexOf(needle, pos) | |
++this.matches | |
if (pos > 0) { this.emit('info', true, data, this._bufpos, pos) } else { this.emit('info', true) } | |
return (this._bufpos = pos + needleLength) | |
} else { | |
pos = len - needleLength | |
} | |
// There was no match. If there's trailing haystack data that we cannot | |
// match yet using the Boyer-Moore-Horspool algorithm (because the trailing | |
// data is less than the needle size) then match using a modified | |
// algorithm that starts matching from the beginning instead of the end. | |
// Whatever trailing data is left after running this algorithm is added to | |
// the lookbehind buffer. | |
while ( | |
pos < len && | |
( | |
data[pos] !== needle[0] || | |
( | |
(Buffer.compare( | |
data.subarray(pos, pos + len - pos), | |
needle.subarray(0, len - pos) | |
) !== 0) | |
) | |
) | |
) { | |
++pos | |
} | |
if (pos < len) { | |
data.copy(this._lookbehind, 0, pos, pos + (len - pos)) | |
this._lookbehind_size = len - pos | |
} | |
// Everything until pos is guaranteed not to contain needle data. | |
if (pos > 0) { this.emit('info', false, data, this._bufpos, pos < len ? pos : len) } | |
this._bufpos = len | |
return len | |
} | |
SBMH.prototype._sbmh_lookup_char = function (data, pos) { | |
return (pos < 0) | |
? this._lookbehind[this._lookbehind_size + pos] | |
: data[pos] | |
} | |
SBMH.prototype._sbmh_memcmp = function (data, pos, len) { | |
for (var i = 0; i < len; ++i) { // eslint-disable-line no-var | |
if (this._sbmh_lookup_char(data, pos + i) !== this._needle[i]) { return false } | |
} | |
return true | |
} | |
module.exports = SBMH | |