DuyTa
/

Graduation

Model card Files Files and versions Community

Graduation / ui /node_modules /css-tree /cjs /tokenizer /index.cjs

DuyTa's picture

Upload folder using huggingface_hub

bc20498 verified 10 months ago

23.7 kB

	'use strict';

	const types = require('./types.cjs');
	const charCodeDefinitions = require('./char-code-definitions.cjs');
	const utils = require('./utils.cjs');
	const names = require('./names.cjs');
	const OffsetToLocation = require('./OffsetToLocation.cjs');
	const TokenStream = require('./TokenStream.cjs');

	function tokenize(source, onToken) {
	function getCharCode(offset) {
	return offset < sourceLength ? source.charCodeAt(offset) : 0;
	}

	// § 4.3.3. Consume a numeric token
	function consumeNumericToken() {
	// Consume a number and let number be the result.
	offset = utils.consumeNumber(source, offset);

	// If the next 3 input code points would start an identifier, then:
	if (charCodeDefinitions.isIdentifierStart(getCharCode(offset), getCharCode(offset + 1), getCharCode(offset + 2))) {
	// Create a <dimension-token> with the same value and type flag as number, and a unit set initially to the empty string.
	// Consume a name. Set the <dimension-token>’s unit to the returned value.
	// Return the <dimension-token>.
	type = types.Dimension;
	offset = utils.consumeName(source, offset);
	return;
	}

	// Otherwise, if the next input code point is U+0025 PERCENTAGE SIGN (%), consume it.
	if (getCharCode(offset) === 0x0025) {
	// Create a <percentage-token> with the same value as number, and return it.
	type = types.Percentage;
	offset++;
	return;
	}

	// Otherwise, create a <number-token> with the same value and type flag as number, and return it.
	type = types.Number;
	}

	// § 4.3.4. Consume an ident-like token
	function consumeIdentLikeToken() {
	const nameStartOffset = offset;

	// Consume a name, and let string be the result.
	offset = utils.consumeName(source, offset);

	// If string’s value is an ASCII case-insensitive match for "url",
	// and the next input code point is U+0028 LEFT PARENTHESIS ((), consume it.
	if (utils.cmpStr(source, nameStartOffset, offset, 'url') && getCharCode(offset) === 0x0028) {
	// While the next two input code points are whitespace, consume the next input code point.
	offset = utils.findWhiteSpaceEnd(source, offset + 1);

	// If the next one or two input code points are U+0022 QUOTATION MARK ("), U+0027 APOSTROPHE ('),
	// or whitespace followed by U+0022 QUOTATION MARK (") or U+0027 APOSTROPHE ('),
	// then create a <function-token> with its value set to string and return it.
	if (getCharCode(offset) === 0x0022 \|\|
	getCharCode(offset) === 0x0027) {
	type = types.Function;
	offset = nameStartOffset + 4;
	return;
	}

	// Otherwise, consume a url token, and return it.
	consumeUrlToken();
	return;
	}

	// Otherwise, if the next input code point is U+0028 LEFT PARENTHESIS ((), consume it.
	// Create a <function-token> with its value set to string and return it.
	if (getCharCode(offset) === 0x0028) {
	type = types.Function;
	offset++;
	return;
	}

	// Otherwise, create an <ident-token> with its value set to string and return it.
	type = types.Ident;
	}

	// § 4.3.5. Consume a string token
	function consumeStringToken(endingCodePoint) {
	// This algorithm may be called with an ending code point, which denotes the code point
	// that ends the string. If an ending code point is not specified,
	// the current input code point is used.
	if (!endingCodePoint) {
	endingCodePoint = getCharCode(offset++);
	}

	// Initially create a <string-token> with its value set to the empty string.
	type = types.String;

	// Repeatedly consume the next input code point from the stream:
	for (; offset < source.length; offset++) {
	const code = source.charCodeAt(offset);

	switch (charCodeDefinitions.charCodeCategory(code)) {
	// ending code point
	case endingCodePoint:
	// Return the <string-token>.
	offset++;
	return;

	// EOF
	// case EofCategory:
	// This is a parse error. Return the <string-token>.
	// return;

	// newline
	case charCodeDefinitions.WhiteSpaceCategory:
	if (charCodeDefinitions.isNewline(code)) {
	// This is a parse error. Reconsume the current input code point,
	// create a <bad-string-token>, and return it.
	offset += utils.getNewlineLength(source, offset, code);
	type = types.BadString;
	return;
	}
	break;

	// U+005C REVERSE SOLIDUS (\)
	case 0x005C:
	// If the next input code point is EOF, do nothing.
	if (offset === source.length - 1) {
	break;
	}

	const nextCode = getCharCode(offset + 1);

	// Otherwise, if the next input code point is a newline, consume it.
	if (charCodeDefinitions.isNewline(nextCode)) {
	offset += utils.getNewlineLength(source, offset + 1, nextCode);
	} else if (charCodeDefinitions.isValidEscape(code, nextCode)) {
	// Otherwise, (the stream starts with a valid escape) consume
	// an escaped code point and append the returned code point to
	// the <string-token>’s value.
	offset = utils.consumeEscaped(source, offset) - 1;
	}
	break;

	// anything else
	// Append the current input code point to the <string-token>’s value.
	}
	}
	}

	// § 4.3.6. Consume a url token
	// Note: This algorithm assumes that the initial "url(" has already been consumed.
	// This algorithm also assumes that it’s being called to consume an "unquoted" value, like url(foo).
	// A quoted value, like url("foo"), is parsed as a <function-token>. Consume an ident-like token
	// automatically handles this distinction; this algorithm shouldn’t be called directly otherwise.
	function consumeUrlToken() {
	// Initially create a <url-token> with its value set to the empty string.
	type = types.Url;

	// Consume as much whitespace as possible.
	offset = utils.findWhiteSpaceEnd(source, offset);

	// Repeatedly consume the next input code point from the stream:
	for (; offset < source.length; offset++) {
	const code = source.charCodeAt(offset);

	switch (charCodeDefinitions.charCodeCategory(code)) {
	// U+0029 RIGHT PARENTHESIS ())
	case 0x0029:
	// Return the <url-token>.
	offset++;
	return;

	// EOF
	// case EofCategory:
	// This is a parse error. Return the <url-token>.
	// return;

	// whitespace
	case charCodeDefinitions.WhiteSpaceCategory:
	// Consume as much whitespace as possible.
	offset = utils.findWhiteSpaceEnd(source, offset);

	// If the next input code point is U+0029 RIGHT PARENTHESIS ()) or EOF,
	// consume it and return the <url-token>
	// (if EOF was encountered, this is a parse error);
	if (getCharCode(offset) === 0x0029 \|\| offset >= source.length) {
	if (offset < source.length) {
	offset++;
	}
	return;
	}

	// otherwise, consume the remnants of a bad url, create a <bad-url-token>,
	// and return it.
	offset = utils.consumeBadUrlRemnants(source, offset);
	type = types.BadUrl;
	return;

	// U+0022 QUOTATION MARK (")
	// U+0027 APOSTROPHE (')
	// U+0028 LEFT PARENTHESIS (()
	// non-printable code point
	case 0x0022:
	case 0x0027:
	case 0x0028:
	case charCodeDefinitions.NonPrintableCategory:
	// This is a parse error. Consume the remnants of a bad url,
	// create a <bad-url-token>, and return it.
	offset = utils.consumeBadUrlRemnants(source, offset);
	type = types.BadUrl;
	return;

	// U+005C REVERSE SOLIDUS (\)
	case 0x005C:
	// If the stream starts with a valid escape, consume an escaped code point and
	// append the returned code point to the <url-token>’s value.
	if (charCodeDefinitions.isValidEscape(code, getCharCode(offset + 1))) {
	offset = utils.consumeEscaped(source, offset) - 1;
	break;
	}

	// Otherwise, this is a parse error. Consume the remnants of a bad url,
	// create a <bad-url-token>, and return it.
	offset = utils.consumeBadUrlRemnants(source, offset);
	type = types.BadUrl;
	return;

	// anything else
	// Append the current input code point to the <url-token>’s value.
	}
	}
	}

	// ensure source is a string
	source = String(source \|\| '');

	const sourceLength = source.length;
	let start = charCodeDefinitions.isBOM(getCharCode(0));
	let offset = start;
	let type;

	// https://drafts.csswg.org/css-syntax-3/#consume-token
	// § 4.3.1. Consume a token
	while (offset < sourceLength) {
	const code = source.charCodeAt(offset);

	switch (charCodeDefinitions.charCodeCategory(code)) {
	// whitespace
	case charCodeDefinitions.WhiteSpaceCategory:
	// Consume as much whitespace as possible. Return a <whitespace-token>.
	type = types.WhiteSpace;
	offset = utils.findWhiteSpaceEnd(source, offset + 1);
	break;

	// U+0022 QUOTATION MARK (")
	case 0x0022:
	// Consume a string token and return it.
	consumeStringToken();
	break;

	// U+0023 NUMBER SIGN (#)
	case 0x0023:
	// If the next input code point is a name code point or the next two input code points are a valid escape, then:
	if (charCodeDefinitions.isName(getCharCode(offset + 1)) \|\| charCodeDefinitions.isValidEscape(getCharCode(offset + 1), getCharCode(offset + 2))) {
	// Create a <hash-token>.
	type = types.Hash;

	// If the next 3 input code points would start an identifier, set the <hash-token>’s type flag to "id".
	// if (isIdentifierStart(getCharCode(offset + 1), getCharCode(offset + 2), getCharCode(offset + 3))) {
	// // TODO: set id flag
	// }

	// Consume a name, and set the <hash-token>’s value to the returned string.
	offset = utils.consumeName(source, offset + 1);

	// Return the <hash-token>.
	} else {
	// Otherwise, return a <delim-token> with its value set to the current input code point.
	type = types.Delim;
	offset++;
	}

	break;

	// U+0027 APOSTROPHE (')
	case 0x0027:
	// Consume a string token and return it.
	consumeStringToken();
	break;

	// U+0028 LEFT PARENTHESIS (()
	case 0x0028:
	// Return a <(-token>.
	type = types.LeftParenthesis;
	offset++;
	break;

	// U+0029 RIGHT PARENTHESIS ())
	case 0x0029:
	// Return a <)-token>.
	type = types.RightParenthesis;
	offset++;
	break;

	// U+002B PLUS SIGN (+)
	case 0x002B:
	// If the input stream starts with a number, ...
	if (charCodeDefinitions.isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
	// ... reconsume the current input code point, consume a numeric token, and return it.
	consumeNumericToken();
	} else {
	// Otherwise, return a <delim-token> with its value set to the current input code point.
	type = types.Delim;
	offset++;
	}
	break;

	// U+002C COMMA (,)
	case 0x002C:
	// Return a <comma-token>.
	type = types.Comma;
	offset++;
	break;

	// U+002D HYPHEN-MINUS (-)
	case 0x002D:
	// If the input stream starts with a number, reconsume the current input code point, consume a numeric token, and return it.
	if (charCodeDefinitions.isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
	consumeNumericToken();
	} else {
	// Otherwise, if the next 2 input code points are U+002D HYPHEN-MINUS U+003E GREATER-THAN SIGN (->), consume them and return a <CDC-token>.
	if (getCharCode(offset + 1) === 0x002D &&
	getCharCode(offset + 2) === 0x003E) {
	type = types.CDC;
	offset = offset + 3;
	} else {
	// Otherwise, if the input stream starts with an identifier, ...
	if (charCodeDefinitions.isIdentifierStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
	// ... reconsume the current input code point, consume an ident-like token, and return it.
	consumeIdentLikeToken();
	} else {
	// Otherwise, return a <delim-token> with its value set to the current input code point.
	type = types.Delim;
	offset++;
	}
	}
	}
	break;

	// U+002E FULL STOP (.)
	case 0x002E:
	// If the input stream starts with a number, ...
	if (charCodeDefinitions.isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) {
	// ... reconsume the current input code point, consume a numeric token, and return it.
	consumeNumericToken();
	} else {
	// Otherwise, return a <delim-token> with its value set to the current input code point.
	type = types.Delim;
	offset++;
	}

	break;

	// U+002F SOLIDUS (/)
	case 0x002F:
	// If the next two input code point are U+002F SOLIDUS (/) followed by a U+002A ASTERISK (*),
	if (getCharCode(offset + 1) === 0x002A) {
	// ... consume them and all following code points up to and including the first U+002A ASTERISK (*)
	// followed by a U+002F SOLIDUS (/), or up to an EOF code point.
	type = types.Comment;
	offset = source.indexOf('*/', offset + 2);
	offset = offset === -1 ? source.length : offset + 2;
	} else {
	type = types.Delim;
	offset++;
	}
	break;

	// U+003A COLON (:)
	case 0x003A:
	// Return a <colon-token>.
	type = types.Colon;
	offset++;
	break;

	// U+003B SEMICOLON (;)
	case 0x003B:
	// Return a <semicolon-token>.
	type = types.Semicolon;
	offset++;
	break;

	// U+003C LESS-THAN SIGN (<)
	case 0x003C:
	// If the next 3 input code points are U+0021 EXCLAMATION MARK U+002D HYPHEN-MINUS U+002D HYPHEN-MINUS (!--), ...
	if (getCharCode(offset + 1) === 0x0021 &&
	getCharCode(offset + 2) === 0x002D &&
	getCharCode(offset + 3) === 0x002D) {
	// ... consume them and return a <CDO-token>.
	type = types.CDO;
	offset = offset + 4;
	} else {
	// Otherwise, return a <delim-token> with its value set to the current input code point.
	type = types.Delim;
	offset++;
	}

	break;

	// U+0040 COMMERCIAL AT (@)
	case 0x0040:
	// If the next 3 input code points would start an identifier, ...
	if (charCodeDefinitions.isIdentifierStart(getCharCode(offset + 1), getCharCode(offset + 2), getCharCode(offset + 3))) {
	// ... consume a name, create an <at-keyword-token> with its value set to the returned value, and return it.
	type = types.AtKeyword;
	offset = utils.consumeName(source, offset + 1);
	} else {
	// Otherwise, return a <delim-token> with its value set to the current input code point.
	type = types.Delim;
	offset++;
	}

	break;

	// U+005B LEFT SQUARE BRACKET ([)
	case 0x005B:
	// Return a <[-token>.
	type = types.LeftSquareBracket;
	offset++;
	break;

	// U+005C REVERSE SOLIDUS (\)
	case 0x005C:
	// If the input stream starts with a valid escape, ...
	if (charCodeDefinitions.isValidEscape(code, getCharCode(offset + 1))) {
	// ... reconsume the current input code point, consume an ident-like token, and return it.
	consumeIdentLikeToken();
	} else {
	// Otherwise, this is a parse error. Return a <delim-token> with its value set to the current input code point.
	type = types.Delim;
	offset++;
	}
	break;

	// U+005D RIGHT SQUARE BRACKET (])
	case 0x005D:
	// Return a <]-token>.
	type = types.RightSquareBracket;
	offset++;
	break;

	// U+007B LEFT CURLY BRACKET ({)
	case 0x007B:
	// Return a <{-token>.
	type = types.LeftCurlyBracket;
	offset++;
	break;

	// U+007D RIGHT CURLY BRACKET (})
	case 0x007D:
	// Return a <}-token>.
	type = types.RightCurlyBracket;
	offset++;
	break;

	// digit
	case charCodeDefinitions.DigitCategory:
	// Reconsume the current input code point, consume a numeric token, and return it.
	consumeNumericToken();
	break;

	// name-start code point
	case charCodeDefinitions.NameStartCategory:
	// Reconsume the current input code point, consume an ident-like token, and return it.
	consumeIdentLikeToken();
	break;

	// EOF
	// case EofCategory:
	// Return an <EOF-token>.
	// break;

	// anything else
	default:
	// Return a <delim-token> with its value set to the current input code point.
	type = types.Delim;
	offset++;
	}

	// put token to stream
	onToken(type, start, start = offset);
	}
	}

	exports.AtKeyword = types.AtKeyword;
	exports.BadString = types.BadString;
	exports.BadUrl = types.BadUrl;
	exports.CDC = types.CDC;
	exports.CDO = types.CDO;
	exports.Colon = types.Colon;
	exports.Comma = types.Comma;
	exports.Comment = types.Comment;
	exports.Delim = types.Delim;
	exports.Dimension = types.Dimension;
	exports.EOF = types.EOF;
	exports.Function = types.Function;
	exports.Hash = types.Hash;
	exports.Ident = types.Ident;
	exports.LeftCurlyBracket = types.LeftCurlyBracket;
	exports.LeftParenthesis = types.LeftParenthesis;
	exports.LeftSquareBracket = types.LeftSquareBracket;
	exports.Number = types.Number;
	exports.Percentage = types.Percentage;
	exports.RightCurlyBracket = types.RightCurlyBracket;
	exports.RightParenthesis = types.RightParenthesis;
	exports.RightSquareBracket = types.RightSquareBracket;
	exports.Semicolon = types.Semicolon;
	exports.String = types.String;
	exports.Url = types.Url;
	exports.WhiteSpace = types.WhiteSpace;
	exports.tokenTypes = types;
	exports.DigitCategory = charCodeDefinitions.DigitCategory;
	exports.EofCategory = charCodeDefinitions.EofCategory;
	exports.NameStartCategory = charCodeDefinitions.NameStartCategory;
	exports.NonPrintableCategory = charCodeDefinitions.NonPrintableCategory;
	exports.WhiteSpaceCategory = charCodeDefinitions.WhiteSpaceCategory;
	exports.charCodeCategory = charCodeDefinitions.charCodeCategory;
	exports.isBOM = charCodeDefinitions.isBOM;
	exports.isDigit = charCodeDefinitions.isDigit;
	exports.isHexDigit = charCodeDefinitions.isHexDigit;
	exports.isIdentifierStart = charCodeDefinitions.isIdentifierStart;
	exports.isLetter = charCodeDefinitions.isLetter;
	exports.isLowercaseLetter = charCodeDefinitions.isLowercaseLetter;
	exports.isName = charCodeDefinitions.isName;
	exports.isNameStart = charCodeDefinitions.isNameStart;
	exports.isNewline = charCodeDefinitions.isNewline;
	exports.isNonAscii = charCodeDefinitions.isNonAscii;
	exports.isNonPrintable = charCodeDefinitions.isNonPrintable;
	exports.isNumberStart = charCodeDefinitions.isNumberStart;
	exports.isUppercaseLetter = charCodeDefinitions.isUppercaseLetter;
	exports.isValidEscape = charCodeDefinitions.isValidEscape;
	exports.isWhiteSpace = charCodeDefinitions.isWhiteSpace;
	exports.cmpChar = utils.cmpChar;
	exports.cmpStr = utils.cmpStr;
	exports.consumeBadUrlRemnants = utils.consumeBadUrlRemnants;
	exports.consumeEscaped = utils.consumeEscaped;
	exports.consumeName = utils.consumeName;
	exports.consumeNumber = utils.consumeNumber;
	exports.decodeEscaped = utils.decodeEscaped;
	exports.findDecimalNumberEnd = utils.findDecimalNumberEnd;
	exports.findWhiteSpaceEnd = utils.findWhiteSpaceEnd;
	exports.findWhiteSpaceStart = utils.findWhiteSpaceStart;
	exports.getNewlineLength = utils.getNewlineLength;
	exports.tokenNames = names;
	exports.OffsetToLocation = OffsetToLocation.OffsetToLocation;
	exports.TokenStream = TokenStream.TokenStream;
	exports.tokenize = tokenize;