File size: 22,532 Bytes
bc20498 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 |
// parse a single path portion
import { parseClass } from './brace-expressions.js';
import { unescape } from './unescape.js';
const types = new Set(['!', '?', '+', '*', '@']);
const isExtglobType = (c) => types.has(c);
// Patterns that get prepended to bind to the start of either the
// entire string, or just a single path portion, to prevent dots
// and/or traversal patterns, when needed.
// Exts don't need the ^ or / bit, because the root binds that already.
const startNoTraversal = '(?!(?:^|/)\\.\\.?(?:$|/))';
const startNoDot = '(?!\\.)';
// characters that indicate a start of pattern needs the "no dots" bit,
// because a dot *might* be matched. ( is not in the list, because in
// the case of a child extglob, it will handle the prevention itself.
const addPatternStart = new Set(['[', '.']);
// cases where traversal is A-OK, no dot prevention needed
const justDots = new Set(['..', '.']);
const reSpecials = new Set('().*{}+?[]^$\\!');
const regExpEscape = (s) => s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&');
// any single thing other than /
const qmark = '[^/]';
// * => any number of characters
const star = qmark + '*?';
// use + when we need to ensure that *something* matches, because the * is
// the only thing in the path portion.
const starNoEmpty = qmark + '+?';
// remove the \ chars that we added if we end up doing a nonmagic compare
// const deslash = (s: string) => s.replace(/\\(.)/g, '$1')
export class AST {
type;
#root;
#hasMagic;
#uflag = false;
#parts = [];
#parent;
#parentIndex;
#negs;
#filledNegs = false;
#options;
#toString;
// set to true if it's an extglob with no children
// (which really means one child of '')
#emptyExt = false;
constructor(type, parent, options = {}) {
this.type = type;
// extglobs are inherently magical
if (type)
this.#hasMagic = true;
this.#parent = parent;
this.#root = this.#parent ? this.#parent.#root : this;
this.#options = this.#root === this ? options : this.#root.#options;
this.#negs = this.#root === this ? [] : this.#root.#negs;
if (type === '!' && !this.#root.#filledNegs)
this.#negs.push(this);
this.#parentIndex = this.#parent ? this.#parent.#parts.length : 0;
}
get hasMagic() {
/* c8 ignore start */
if (this.#hasMagic !== undefined)
return this.#hasMagic;
/* c8 ignore stop */
for (const p of this.#parts) {
if (typeof p === 'string')
continue;
if (p.type || p.hasMagic)
return (this.#hasMagic = true);
}
// note: will be undefined until we generate the regexp src and find out
return this.#hasMagic;
}
// reconstructs the pattern
toString() {
if (this.#toString !== undefined)
return this.#toString;
if (!this.type) {
return (this.#toString = this.#parts.map(p => String(p)).join(''));
}
else {
return (this.#toString =
this.type + '(' + this.#parts.map(p => String(p)).join('|') + ')');
}
}
#fillNegs() {
/* c8 ignore start */
if (this !== this.#root)
throw new Error('should only call on root');
if (this.#filledNegs)
return this;
/* c8 ignore stop */
// call toString() once to fill this out
this.toString();
this.#filledNegs = true;
let n;
while ((n = this.#negs.pop())) {
if (n.type !== '!')
continue;
// walk up the tree, appending everthing that comes AFTER parentIndex
let p = n;
let pp = p.#parent;
while (pp) {
for (let i = p.#parentIndex + 1; !pp.type && i < pp.#parts.length; i++) {
for (const part of n.#parts) {
/* c8 ignore start */
if (typeof part === 'string') {
throw new Error('string part in extglob AST??');
}
/* c8 ignore stop */
part.copyIn(pp.#parts[i]);
}
}
p = pp;
pp = p.#parent;
}
}
return this;
}
push(...parts) {
for (const p of parts) {
if (p === '')
continue;
/* c8 ignore start */
if (typeof p !== 'string' && !(p instanceof AST && p.#parent === this)) {
throw new Error('invalid part: ' + p);
}
/* c8 ignore stop */
this.#parts.push(p);
}
}
toJSON() {
const ret = this.type === null
? this.#parts.slice().map(p => (typeof p === 'string' ? p : p.toJSON()))
: [this.type, ...this.#parts.map(p => p.toJSON())];
if (this.isStart() && !this.type)
ret.unshift([]);
if (this.isEnd() &&
(this === this.#root ||
(this.#root.#filledNegs && this.#parent?.type === '!'))) {
ret.push({});
}
return ret;
}
isStart() {
if (this.#root === this)
return true;
// if (this.type) return !!this.#parent?.isStart()
if (!this.#parent?.isStart())
return false;
if (this.#parentIndex === 0)
return true;
// if everything AHEAD of this is a negation, then it's still the "start"
const p = this.#parent;
for (let i = 0; i < this.#parentIndex; i++) {
const pp = p.#parts[i];
if (!(pp instanceof AST && pp.type === '!')) {
return false;
}
}
return true;
}
isEnd() {
if (this.#root === this)
return true;
if (this.#parent?.type === '!')
return true;
if (!this.#parent?.isEnd())
return false;
if (!this.type)
return this.#parent?.isEnd();
// if not root, it'll always have a parent
/* c8 ignore start */
const pl = this.#parent ? this.#parent.#parts.length : 0;
/* c8 ignore stop */
return this.#parentIndex === pl - 1;
}
copyIn(part) {
if (typeof part === 'string')
this.push(part);
else
this.push(part.clone(this));
}
clone(parent) {
const c = new AST(this.type, parent);
for (const p of this.#parts) {
c.copyIn(p);
}
return c;
}
static #parseAST(str, ast, pos, opt) {
let escaping = false;
let inBrace = false;
let braceStart = -1;
let braceNeg = false;
if (ast.type === null) {
// outside of a extglob, append until we find a start
let i = pos;
let acc = '';
while (i < str.length) {
const c = str.charAt(i++);
// still accumulate escapes at this point, but we do ignore
// starts that are escaped
if (escaping || c === '\\') {
escaping = !escaping;
acc += c;
continue;
}
if (inBrace) {
if (i === braceStart + 1) {
if (c === '^' || c === '!') {
braceNeg = true;
}
}
else if (c === ']' && !(i === braceStart + 2 && braceNeg)) {
inBrace = false;
}
acc += c;
continue;
}
else if (c === '[') {
inBrace = true;
braceStart = i;
braceNeg = false;
acc += c;
continue;
}
if (!opt.noext && isExtglobType(c) && str.charAt(i) === '(') {
ast.push(acc);
acc = '';
const ext = new AST(c, ast);
i = AST.#parseAST(str, ext, i, opt);
ast.push(ext);
continue;
}
acc += c;
}
ast.push(acc);
return i;
}
// some kind of extglob, pos is at the (
// find the next | or )
let i = pos + 1;
let part = new AST(null, ast);
const parts = [];
let acc = '';
while (i < str.length) {
const c = str.charAt(i++);
// still accumulate escapes at this point, but we do ignore
// starts that are escaped
if (escaping || c === '\\') {
escaping = !escaping;
acc += c;
continue;
}
if (inBrace) {
if (i === braceStart + 1) {
if (c === '^' || c === '!') {
braceNeg = true;
}
}
else if (c === ']' && !(i === braceStart + 2 && braceNeg)) {
inBrace = false;
}
acc += c;
continue;
}
else if (c === '[') {
inBrace = true;
braceStart = i;
braceNeg = false;
acc += c;
continue;
}
if (isExtglobType(c) && str.charAt(i) === '(') {
part.push(acc);
acc = '';
const ext = new AST(c, part);
part.push(ext);
i = AST.#parseAST(str, ext, i, opt);
continue;
}
if (c === '|') {
part.push(acc);
acc = '';
parts.push(part);
part = new AST(null, ast);
continue;
}
if (c === ')') {
if (acc === '' && ast.#parts.length === 0) {
ast.#emptyExt = true;
}
part.push(acc);
acc = '';
ast.push(...parts, part);
return i;
}
acc += c;
}
// unfinished extglob
// if we got here, it was a malformed extglob! not an extglob, but
// maybe something else in there.
ast.type = null;
ast.#hasMagic = undefined;
ast.#parts = [str.substring(pos - 1)];
return i;
}
static fromGlob(pattern, options = {}) {
const ast = new AST(null, undefined, options);
AST.#parseAST(pattern, ast, 0, options);
return ast;
}
// returns the regular expression if there's magic, or the unescaped
// string if not.
toMMPattern() {
// should only be called on root
/* c8 ignore start */
if (this !== this.#root)
return this.#root.toMMPattern();
/* c8 ignore stop */
const glob = this.toString();
const [re, body, hasMagic, uflag] = this.toRegExpSource();
// if we're in nocase mode, and not nocaseMagicOnly, then we do
// still need a regular expression if we have to case-insensitively
// match capital/lowercase characters.
const anyMagic = hasMagic ||
this.#hasMagic ||
(this.#options.nocase &&
!this.#options.nocaseMagicOnly &&
glob.toUpperCase() !== glob.toLowerCase());
if (!anyMagic) {
return body;
}
const flags = (this.#options.nocase ? 'i' : '') + (uflag ? 'u' : '');
return Object.assign(new RegExp(`^${re}$`, flags), {
_src: re,
_glob: glob,
});
}
// returns the string match, the regexp source, whether there's magic
// in the regexp (so a regular expression is required) and whether or
// not the uflag is needed for the regular expression (for posix classes)
// TODO: instead of injecting the start/end at this point, just return
// the BODY of the regexp, along with the start/end portions suitable
// for binding the start/end in either a joined full-path makeRe context
// (where we bind to (^|/), or a standalone matchPart context (where
// we bind to ^, and not /). Otherwise slashes get duped!
//
// In part-matching mode, the start is:
// - if not isStart: nothing
// - if traversal possible, but not allowed: ^(?!\.\.?$)
// - if dots allowed or not possible: ^
// - if dots possible and not allowed: ^(?!\.)
// end is:
// - if not isEnd(): nothing
// - else: $
//
// In full-path matching mode, we put the slash at the START of the
// pattern, so start is:
// - if first pattern: same as part-matching mode
// - if not isStart(): nothing
// - if traversal possible, but not allowed: /(?!\.\.?(?:$|/))
// - if dots allowed or not possible: /
// - if dots possible and not allowed: /(?!\.)
// end is:
// - if last pattern, same as part-matching mode
// - else nothing
//
// Always put the (?:$|/) on negated tails, though, because that has to be
// there to bind the end of the negated pattern portion, and it's easier to
// just stick it in now rather than try to inject it later in the middle of
// the pattern.
//
// We can just always return the same end, and leave it up to the caller
// to know whether it's going to be used joined or in parts.
// And, if the start is adjusted slightly, can do the same there:
// - if not isStart: nothing
// - if traversal possible, but not allowed: (?:/|^)(?!\.\.?$)
// - if dots allowed or not possible: (?:/|^)
// - if dots possible and not allowed: (?:/|^)(?!\.)
//
// But it's better to have a simpler binding without a conditional, for
// performance, so probably better to return both start options.
//
// Then the caller just ignores the end if it's not the first pattern,
// and the start always gets applied.
//
// But that's always going to be $ if it's the ending pattern, or nothing,
// so the caller can just attach $ at the end of the pattern when building.
//
// So the todo is:
// - better detect what kind of start is needed
// - return both flavors of starting pattern
// - attach $ at the end of the pattern when creating the actual RegExp
//
// Ah, but wait, no, that all only applies to the root when the first pattern
// is not an extglob. If the first pattern IS an extglob, then we need all
// that dot prevention biz to live in the extglob portions, because eg
// +(*|.x*) can match .xy but not .yx.
//
// So, return the two flavors if it's #root and the first child is not an
// AST, otherwise leave it to the child AST to handle it, and there,
// use the (?:^|/) style of start binding.
//
// Even simplified further:
// - Since the start for a join is eg /(?!\.) and the start for a part
// is ^(?!\.), we can just prepend (?!\.) to the pattern (either root
// or start or whatever) and prepend ^ or / at the Regexp construction.
toRegExpSource(allowDot) {
const dot = allowDot ?? !!this.#options.dot;
if (this.#root === this)
this.#fillNegs();
if (!this.type) {
const noEmpty = this.isStart() && this.isEnd();
const src = this.#parts
.map(p => {
const [re, _, hasMagic, uflag] = typeof p === 'string'
? AST.#parseGlob(p, this.#hasMagic, noEmpty)
: p.toRegExpSource(allowDot);
this.#hasMagic = this.#hasMagic || hasMagic;
this.#uflag = this.#uflag || uflag;
return re;
})
.join('');
let start = '';
if (this.isStart()) {
if (typeof this.#parts[0] === 'string') {
// this is the string that will match the start of the pattern,
// so we need to protect against dots and such.
// '.' and '..' cannot match unless the pattern is that exactly,
// even if it starts with . or dot:true is set.
const dotTravAllowed = this.#parts.length === 1 && justDots.has(this.#parts[0]);
if (!dotTravAllowed) {
const aps = addPatternStart;
// check if we have a possibility of matching . or ..,
// and prevent that.
const needNoTrav =
// dots are allowed, and the pattern starts with [ or .
(dot && aps.has(src.charAt(0))) ||
// the pattern starts with \., and then [ or .
(src.startsWith('\\.') && aps.has(src.charAt(2))) ||
// the pattern starts with \.\., and then [ or .
(src.startsWith('\\.\\.') && aps.has(src.charAt(4)));
// no need to prevent dots if it can't match a dot, or if a
// sub-pattern will be preventing it anyway.
const needNoDot = !dot && !allowDot && aps.has(src.charAt(0));
start = needNoTrav ? startNoTraversal : needNoDot ? startNoDot : '';
}
}
}
// append the "end of path portion" pattern to negation tails
let end = '';
if (this.isEnd() &&
this.#root.#filledNegs &&
this.#parent?.type === '!') {
end = '(?:$|\\/)';
}
const final = start + src + end;
return [
final,
unescape(src),
(this.#hasMagic = !!this.#hasMagic),
this.#uflag,
];
}
// We need to calculate the body *twice* if it's a repeat pattern
// at the start, once in nodot mode, then again in dot mode, so a
// pattern like *(?) can match 'x.y'
const repeated = this.type === '*' || this.type === '+';
// some kind of extglob
const start = this.type === '!' ? '(?:(?!(?:' : '(?:';
let body = this.#partsToRegExp(dot);
if (this.isStart() && this.isEnd() && !body && this.type !== '!') {
// invalid extglob, has to at least be *something* present, if it's
// the entire path portion.
const s = this.toString();
this.#parts = [s];
this.type = null;
this.#hasMagic = undefined;
return [s, unescape(this.toString()), false, false];
}
// XXX abstract out this map method
let bodyDotAllowed = !repeated || allowDot || dot || !startNoDot
? ''
: this.#partsToRegExp(true);
if (bodyDotAllowed === body) {
bodyDotAllowed = '';
}
if (bodyDotAllowed) {
body = `(?:${body})(?:${bodyDotAllowed})*?`;
}
// an empty !() is exactly equivalent to a starNoEmpty
let final = '';
if (this.type === '!' && this.#emptyExt) {
final = (this.isStart() && !dot ? startNoDot : '') + starNoEmpty;
}
else {
const close = this.type === '!'
? // !() must match something,but !(x) can match ''
'))' +
(this.isStart() && !dot && !allowDot ? startNoDot : '') +
star +
')'
: this.type === '@'
? ')'
: this.type === '?'
? ')?'
: this.type === '+' && bodyDotAllowed
? ')'
: this.type === '*' && bodyDotAllowed
? `)?`
: `)${this.type}`;
final = start + body + close;
}
return [
final,
unescape(body),
(this.#hasMagic = !!this.#hasMagic),
this.#uflag,
];
}
#partsToRegExp(dot) {
return this.#parts
.map(p => {
// extglob ASTs should only contain parent ASTs
/* c8 ignore start */
if (typeof p === 'string') {
throw new Error('string type in extglob ast??');
}
/* c8 ignore stop */
// can ignore hasMagic, because extglobs are already always magic
const [re, _, _hasMagic, uflag] = p.toRegExpSource(dot);
this.#uflag = this.#uflag || uflag;
return re;
})
.filter(p => !(this.isStart() && this.isEnd()) || !!p)
.join('|');
}
static #parseGlob(glob, hasMagic, noEmpty = false) {
let escaping = false;
let re = '';
let uflag = false;
for (let i = 0; i < glob.length; i++) {
const c = glob.charAt(i);
if (escaping) {
escaping = false;
re += (reSpecials.has(c) ? '\\' : '') + c;
continue;
}
if (c === '\\') {
if (i === glob.length - 1) {
re += '\\\\';
}
else {
escaping = true;
}
continue;
}
if (c === '[') {
const [src, needUflag, consumed, magic] = parseClass(glob, i);
if (consumed) {
re += src;
uflag = uflag || needUflag;
i += consumed - 1;
hasMagic = hasMagic || magic;
continue;
}
}
if (c === '*') {
if (noEmpty && glob === '*')
re += starNoEmpty;
else
re += star;
hasMagic = true;
continue;
}
if (c === '?') {
re += qmark;
hasMagic = true;
continue;
}
re += regExpEscape(c);
}
return [re, unescape(glob), !!hasMagic, uflag];
}
}
//# sourceMappingURL=ast.js.map |