File size: 5,631 Bytes
bc20498 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
// translate the various posix character classes into unicode properties
// this works across all unicode locales
// { <posix class>: [<translation>, /u flag required, negated]
const posixClasses = {
'[:alnum:]': ['\\p{L}\\p{Nl}\\p{Nd}', true],
'[:alpha:]': ['\\p{L}\\p{Nl}', true],
'[:ascii:]': ['\\x' + '00-\\x' + '7f', false],
'[:blank:]': ['\\p{Zs}\\t', true],
'[:cntrl:]': ['\\p{Cc}', true],
'[:digit:]': ['\\p{Nd}', true],
'[:graph:]': ['\\p{Z}\\p{C}', true, true],
'[:lower:]': ['\\p{Ll}', true],
'[:print:]': ['\\p{C}', true],
'[:punct:]': ['\\p{P}', true],
'[:space:]': ['\\p{Z}\\t\\r\\n\\v\\f', true],
'[:upper:]': ['\\p{Lu}', true],
'[:word:]': ['\\p{L}\\p{Nl}\\p{Nd}\\p{Pc}', true],
'[:xdigit:]': ['A-Fa-f0-9', false],
};
// only need to escape a few things inside of brace expressions
// escapes: [ \ ] -
const braceEscape = (s) => s.replace(/[[\]\\-]/g, '\\$&');
// escape all regexp magic characters
const regexpEscape = (s) => s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&');
// everything has already been escaped, we just have to join
const rangesToString = (ranges) => ranges.join('');
// takes a glob string at a posix brace expression, and returns
// an equivalent regular expression source, and boolean indicating
// whether the /u flag needs to be applied, and the number of chars
// consumed to parse the character class.
// This also removes out of order ranges, and returns ($.) if the
// entire class just no good.
export const parseClass = (glob, position) => {
const pos = position;
/* c8 ignore start */
if (glob.charAt(pos) !== '[') {
throw new Error('not in a brace expression');
}
/* c8 ignore stop */
const ranges = [];
const negs = [];
let i = pos + 1;
let sawStart = false;
let uflag = false;
let escaping = false;
let negate = false;
let endPos = pos;
let rangeStart = '';
WHILE: while (i < glob.length) {
const c = glob.charAt(i);
if ((c === '!' || c === '^') && i === pos + 1) {
negate = true;
i++;
continue;
}
if (c === ']' && sawStart && !escaping) {
endPos = i + 1;
break;
}
sawStart = true;
if (c === '\\') {
if (!escaping) {
escaping = true;
i++;
continue;
}
// escaped \ char, fall through and treat like normal char
}
if (c === '[' && !escaping) {
// either a posix class, a collation equivalent, or just a [
for (const [cls, [unip, u, neg]] of Object.entries(posixClasses)) {
if (glob.startsWith(cls, i)) {
// invalid, [a-[] is fine, but not [a-[:alpha]]
if (rangeStart) {
return ['$.', false, glob.length - pos, true];
}
i += cls.length;
if (neg)
negs.push(unip);
else
ranges.push(unip);
uflag = uflag || u;
continue WHILE;
}
}
}
// now it's just a normal character, effectively
escaping = false;
if (rangeStart) {
// throw this range away if it's not valid, but others
// can still match.
if (c > rangeStart) {
ranges.push(braceEscape(rangeStart) + '-' + braceEscape(c));
}
else if (c === rangeStart) {
ranges.push(braceEscape(c));
}
rangeStart = '';
i++;
continue;
}
// now might be the start of a range.
// can be either c-d or c-] or c<more...>] or c] at this point
if (glob.startsWith('-]', i + 1)) {
ranges.push(braceEscape(c + '-'));
i += 2;
continue;
}
if (glob.startsWith('-', i + 1)) {
rangeStart = c;
i += 2;
continue;
}
// not the start of a range, just a single character
ranges.push(braceEscape(c));
i++;
}
if (endPos < i) {
// didn't see the end of the class, not a valid class,
// but might still be valid as a literal match.
return ['', false, 0, false];
}
// if we got no ranges and no negates, then we have a range that
// cannot possibly match anything, and that poisons the whole glob
if (!ranges.length && !negs.length) {
return ['$.', false, glob.length - pos, true];
}
// if we got one positive range, and it's a single character, then that's
// not actually a magic pattern, it's just that one literal character.
// we should not treat that as "magic", we should just return the literal
// character. [_] is a perfectly valid way to escape glob magic chars.
if (negs.length === 0 &&
ranges.length === 1 &&
/^\\?.$/.test(ranges[0]) &&
!negate) {
const r = ranges[0].length === 2 ? ranges[0].slice(-1) : ranges[0];
return [regexpEscape(r), false, endPos - pos, false];
}
const sranges = '[' + (negate ? '^' : '') + rangesToString(ranges) + ']';
const snegs = '[' + (negate ? '' : '^') + rangesToString(negs) + ']';
const comb = ranges.length && negs.length
? '(' + sranges + '|' + snegs + ')'
: ranges.length
? sranges
: snegs;
return [comb, uflag, endPos - pos, true];
};
//# sourceMappingURL=brace-expressions.js.map |