DuyTa
/

Graduation

Model card Files Files and versions Community

Graduation / ui /node_modules /micromark-util-character /index.js

DuyTa's picture

Upload folder using huggingface_hub

bc20498 verified 10 months ago

7.05 kB

	/**
	* @typedef {import('micromark-util-types').Code} Code
	*/

	import {unicodePunctuationRegex} from './lib/unicode-punctuation-regex.js'

	/**
	* Check whether the character code represents an ASCII alpha (`a` through `z`,
	* case insensitive).
	*
	* An ASCII alpha is an ASCII upper alpha or ASCII lower alpha.
	*
	* An ASCII upper alpha is a character in the inclusive range U+0041 (`A`)
	* to U+005A (`Z`).
	*
	* An ASCII lower alpha is a character in the inclusive range U+0061 (`a`)
	* to U+007A (`z`).
	*
	* @param code
	* Code.
	* @returns
	* Whether it matches.
	*/
	export const asciiAlpha = regexCheck(/[A-Za-z]/)

	/**
	* Check whether the character code represents an ASCII alphanumeric (`a`
	* through `z`, case insensitive, or `0` through `9`).
	*
	* An ASCII alphanumeric is an ASCII digit (see `asciiDigit`) or ASCII alpha
	* (see `asciiAlpha`).
	*
	* @param code
	* Code.
	* @returns
	* Whether it matches.
	*/
	export const asciiAlphanumeric = regexCheck(/[\dA-Za-z]/)

	/**
	* Check whether the character code represents an ASCII atext.
	*
	* atext is an ASCII alphanumeric (see `asciiAlphanumeric`), or a character in
	* the inclusive ranges U+0023 NUMBER SIGN (`#`) to U+0027 APOSTROPHE (`'`),
	* U+002A ASTERISK (`*`), U+002B PLUS SIGN (`+`), U+002D DASH (`-`), U+002F
	* SLASH (`/`), U+003D EQUALS TO (`=`), U+003F QUESTION MARK (`?`), U+005E
	* CARET (`^`) to U+0060 GRAVE ACCENT (`` ` ``), or U+007B LEFT CURLY BRACE
	* (`{`) to U+007E TILDE (`~`).
	*
	* See:
	* \[RFC5322]:
	* [Internet Message Format](https://tools.ietf.org/html/rfc5322).
	* P. Resnick.
	* IETF.
	*
	* @param code
	* Code.
	* @returns
	* Whether it matches.
	*/
	export const asciiAtext = regexCheck(/[#-'*+\--9=?A-Z^-~]/)

	/**
	* Check whether a character code is an ASCII control character.
	*
	* An ASCII control is a character in the inclusive range U+0000 NULL (NUL)
	* to U+001F (US), or U+007F (DEL).
	*
	* @param {Code} code
	* Code.
	* @returns {boolean}
	* Whether it matches.
	*/
	export function asciiControl(code) {
	return (
	// Special whitespace codes (which have negative values), C0 and Control
	// character DEL
	code !== null && (code < 32 \|\| code === 127)
	)
	}

	/**
	* Check whether the character code represents an ASCII digit (`0` through `9`).
	*
	* An ASCII digit is a character in the inclusive range U+0030 (`0`) to
	* U+0039 (`9`).
	*
	* @param code
	* Code.
	* @returns
	* Whether it matches.
	*/
	export const asciiDigit = regexCheck(/\d/)

	/**
	* Check whether the character code represents an ASCII hex digit (`a` through
	* `f`, case insensitive, or `0` through `9`).
	*
	* An ASCII hex digit is an ASCII digit (see `asciiDigit`), ASCII upper hex
	* digit, or an ASCII lower hex digit.
	*
	* An ASCII upper hex digit is a character in the inclusive range U+0041
	* (`A`) to U+0046 (`F`).
	*
	* An ASCII lower hex digit is a character in the inclusive range U+0061
	* (`a`) to U+0066 (`f`).
	*
	* @param code
	* Code.
	* @returns
	* Whether it matches.
	*/
	export const asciiHexDigit = regexCheck(/[\dA-Fa-f]/)

	/**
	* Check whether the character code represents ASCII punctuation.
	*
	* An ASCII punctuation is a character in the inclusive ranges U+0021
	* EXCLAMATION MARK (`!`) to U+002F SLASH (`/`), U+003A COLON (`:`) to U+0040 AT
	* SIGN (`@`), U+005B LEFT SQUARE BRACKET (`[`) to U+0060 GRAVE ACCENT
	* (`` ` ``), or U+007B LEFT CURLY BRACE (`{`) to U+007E TILDE (`~`).
	*
	* @param code
	* Code.
	* @returns
	* Whether it matches.
	*/
	export const asciiPunctuation = regexCheck(/[!-/:-@[-`{-~]/)

	/**
	* Check whether a character code is a markdown line ending.
	*
	* A markdown line ending is the virtual characters M-0003 CARRIAGE RETURN
	* LINE FEED (CRLF), M-0004 LINE FEED (LF) and M-0005 CARRIAGE RETURN (CR).
	*
	* In micromark, the actual character U+000A LINE FEED (LF) and U+000D CARRIAGE
	* RETURN (CR) are replaced by these virtual characters depending on whether
	* they occurred together.
	*
	* @param {Code} code
	* Code.
	* @returns {boolean}
	* Whether it matches.
	*/
	export function markdownLineEnding(code) {
	return code !== null && code < -2
	}

	/**
	* Check whether a character code is a markdown line ending (see
	* `markdownLineEnding`) or markdown space (see `markdownSpace`).
	*
	* @param {Code} code
	* Code.
	* @returns {boolean}
	* Whether it matches.
	*/
	export function markdownLineEndingOrSpace(code) {
	return code !== null && (code < 0 \|\| code === 32)
	}

	/**
	* Check whether a character code is a markdown space.
	*
	* A markdown space is the concrete character U+0020 SPACE (SP) and the
	* virtual characters M-0001 VIRTUAL SPACE (VS) and M-0002 HORIZONTAL TAB (HT).
	*
	* In micromark, the actual character U+0009 CHARACTER TABULATION (HT) is
	* replaced by one M-0002 HORIZONTAL TAB (HT) and between 0 and 3 M-0001 VIRTUAL
	* SPACE (VS) characters, depending on the column at which the tab occurred.
	*
	* @param {Code} code
	* Code.
	* @returns {boolean}
	* Whether it matches.
	*/
	export function markdownSpace(code) {
	return code === -2 \|\| code === -1 \|\| code === 32
	}

	// Size note: removing ASCII from the regex and using `asciiPunctuation` here
	// In fact adds to the bundle size.
	/**
	* Check whether the character code represents Unicode punctuation.
	*
	* A Unicode punctuation is a character in the Unicode `Pc` (Punctuation,
	* Connector), `Pd` (Punctuation, Dash), `Pe` (Punctuation, Close), `Pf`
	* (Punctuation, Final quote), `Pi` (Punctuation, Initial quote), `Po`
	* (Punctuation, Other), or `Ps` (Punctuation, Open) categories, or an ASCII
	* punctuation (see `asciiPunctuation`).
	*
	* See:
	* \[UNICODE]:
	* [The Unicode Standard](https://www.unicode.org/versions/).
	* Unicode Consortium.
	*
	* @param code
	* Code.
	* @returns
	* Whether it matches.
	*/
	export const unicodePunctuation = regexCheck(unicodePunctuationRegex)

	/**
	* Check whether the character code represents Unicode whitespace.
	*
	* Note that this does handle micromark specific markdown whitespace characters.
	* See `markdownLineEndingOrSpace` to check that.
	*
	* A Unicode whitespace is a character in the Unicode `Zs` (Separator,
	* Space) category, or U+0009 CHARACTER TABULATION (HT), U+000A LINE FEED (LF),
	* U+000C (FF), or U+000D CARRIAGE RETURN (CR) (\[UNICODE]).
	*
	* See:
	* \[UNICODE]:
	* [The Unicode Standard](https://www.unicode.org/versions/).
	* Unicode Consortium.
	*
	* @param code
	* Code.
	* @returns
	* Whether it matches.
	*/
	export const unicodeWhitespace = regexCheck(/\s/)

	/**
	* Create a code check from a regex.
	*
	* @param {RegExp} regex
	* @returns {(code: Code) => boolean}
	*/
	function regexCheck(regex) {
	return check

	/**
	* Check whether a code matches the bound regex.
	*
	* @param {Code} code
	* Character code.
	* @returns {boolean}
	* Whether the character code matches the bound regex.
	*/
	function check(code) {
	return code !== null && regex.test(String.fromCharCode(code))
	}
	}