/** | |
* @param {string} value | |
* @returns {RegExp} | |
* */ | |
function escape(value) { | |
return new RegExp(value.replace(/[-/\\^$*+?.()|[\]{}]/g, '\\$&'), 'm'); | |
} | |
/** | |
* @param {RegExp | string } re | |
* @returns {string} | |
*/ | |
function source(re) { | |
if (!re) return null; | |
if (typeof re === "string") return re; | |
return re.source; | |
} | |
/** | |
* @param {RegExp | string } re | |
* @returns {string} | |
*/ | |
function lookahead(re) { | |
return concat('(?=', re, ')'); | |
} | |
/** | |
* @param {RegExp | string } re | |
* @returns {string} | |
*/ | |
function anyNumberOfTimes(re) { | |
return concat('(?:', re, ')*'); | |
} | |
/** | |
* @param {RegExp | string } re | |
* @returns {string} | |
*/ | |
function optional(re) { | |
return concat('(?:', re, ')?'); | |
} | |
/** | |
* @param {...(RegExp | string) } args | |
* @returns {string} | |
*/ | |
function concat(...args) { | |
const joined = args.map((x) => source(x)).join(""); | |
return joined; | |
} | |
/** | |
* @param { Array<string | RegExp | Object> } args | |
* @returns {object} | |
*/ | |
function stripOptionsFromArgs(args) { | |
const opts = args[args.length - 1]; | |
if (typeof opts === 'object' && opts.constructor === Object) { | |
args.splice(args.length - 1, 1); | |
return opts; | |
} else { | |
return {}; | |
} | |
} | |
/** @typedef { {capture?: boolean} } RegexEitherOptions */ | |
/** | |
* Any of the passed expresssions may match | |
* | |
* Creates a huge this | this | that | that match | |
* @param {(RegExp | string)[] | [...(RegExp | string)[], RegexEitherOptions]} args | |
* @returns {string} | |
*/ | |
function either(...args) { | |
/** @type { object & {capture?: boolean} } */ | |
const opts = stripOptionsFromArgs(args); | |
const joined = '(' | |
+ (opts.capture ? "" : "?:") | |
+ args.map((x) => source(x)).join("|") + ")"; | |
return joined; | |
} | |
/** | |
* @param {RegExp | string} re | |
* @returns {number} | |
*/ | |
function countMatchGroups(re) { | |
return (new RegExp(re.toString() + '|')).exec('').length - 1; | |
} | |
/** | |
* Does lexeme start with a regular expression match at the beginning | |
* @param {RegExp} re | |
* @param {string} lexeme | |
*/ | |
function startsWith(re, lexeme) { | |
const match = re && re.exec(lexeme); | |
return match && match.index === 0; | |
} | |
// BACKREF_RE matches an open parenthesis or backreference. To avoid | |
// an incorrect parse, it additionally matches the following: | |
// - [...] elements, where the meaning of parentheses and escapes change | |
// - other escape sequences, so we do not misparse escape sequences as | |
// interesting elements | |
// - non-matching or lookahead parentheses, which do not capture. These | |
// follow the '(' with a '?'. | |
const BACKREF_RE = /\[(?:[^\\\]]|\\.)*\]|\(\??|\\([1-9][0-9]*)|\\./; | |
// **INTERNAL** Not intended for outside usage | |
// join logically computes regexps.join(separator), but fixes the | |
// backreferences so they continue to match. | |
// it also places each individual regular expression into it's own | |
// match group, keeping track of the sequencing of those match groups | |
// is currently an exercise for the caller. :-) | |
/** | |
* @param {(string | RegExp)[]} regexps | |
* @param {{joinWith: string}} opts | |
* @returns {string} | |
*/ | |
function _rewriteBackreferences(regexps, { joinWith }) { | |
let numCaptures = 0; | |
return regexps.map((regex) => { | |
numCaptures += 1; | |
const offset = numCaptures; | |
let re = source(regex); | |
let out = ''; | |
while (re.length > 0) { | |
const match = BACKREF_RE.exec(re); | |
if (!match) { | |
out += re; | |
break; | |
} | |
out += re.substring(0, match.index); | |
re = re.substring(match.index + match[0].length); | |
if (match[0][0] === '\\' && match[1]) { | |
// Adjust the backreference. | |
out += '\\' + String(Number(match[1]) + offset); | |
} else { | |
out += match[0]; | |
if (match[0] === '(') { | |
numCaptures++; | |
} | |
} | |
} | |
return out; | |
}).map(re => `(${re})`).join(joinWith); | |
} | |
export { _rewriteBackreferences, anyNumberOfTimes, concat, countMatchGroups, either, escape, lookahead, optional, source, startsWith }; | |