DuyTa
/

Graduation

Model card Files Files and versions Community

Graduation / ui /node_modules /katex /src /unicodeScripts.js

DuyTa's picture

Upload folder using huggingface_hub

bc20498 verified 10 months ago

3.97 kB

	// @flow

	/*
	* This file defines the Unicode scripts and script families that we
	* support. To add new scripts or families, just add a new entry to the
	* scriptData array below. Adding scripts to the scriptData array allows
	* characters from that script to appear in \text{} environments.
	*/

	/**
	* Each script or script family has a name and an array of blocks.
	* Each block is an array of two numbers which specify the start and
	* end points (inclusive) of a block of Unicode codepoints.
	*/
	type Script = {
	name: string;
	blocks: Array<Array<number>>;
	};

	/**
	* Unicode block data for the families of scripts we support in \text{}.
	* Scripts only need to appear here if they do not have font metrics.
	*/
	const scriptData: Array<Script> = [
	{
	// Latin characters beyond the Latin-1 characters we have metrics for.
	// Needed for Czech, Hungarian and Turkish text, for example.
	name: 'latin',
	blocks: [
	[0x0100, 0x024f], // Latin Extended-A and Latin Extended-B
	[0x0300, 0x036f], // Combining Diacritical marks
	],
	},
	{
	// The Cyrillic script used by Russian and related languages.
	// A Cyrillic subset used to be supported as explicitly defined
	// symbols in symbols.js
	name: 'cyrillic',
	blocks: [[0x0400, 0x04ff]],
	},
	{
	// Armenian
	name: 'armenian',
	blocks: [[0x0530, 0x058F]],
	},
	{
	// The Brahmic scripts of South and Southeast Asia
	// Devanagari (0900–097F)
	// Bengali (0980–09FF)
	// Gurmukhi (0A00–0A7F)
	// Gujarati (0A80–0AFF)
	// Oriya (0B00–0B7F)
	// Tamil (0B80–0BFF)
	// Telugu (0C00–0C7F)
	// Kannada (0C80–0CFF)
	// Malayalam (0D00–0D7F)
	// Sinhala (0D80–0DFF)
	// Thai (0E00–0E7F)
	// Lao (0E80–0EFF)
	// Tibetan (0F00–0FFF)
	// Myanmar (1000–109F)
	name: 'brahmic',
	blocks: [[0x0900, 0x109F]],
	},
	{
	name: 'georgian',
	blocks: [[0x10A0, 0x10ff]],
	},
	{
	// Chinese and Japanese.
	// The "k" in cjk is for Korean, but we've separated Korean out
	name: "cjk",
	blocks: [
	[0x3000, 0x30FF], // CJK symbols and punctuation, Hiragana, Katakana
	[0x4E00, 0x9FAF], // CJK ideograms
	[0xFF00, 0xFF60], // Fullwidth punctuation
	// TODO: add halfwidth Katakana and Romanji glyphs
	],
	},
	{
	// Korean
	name: 'hangul',
	blocks: [[0xAC00, 0xD7AF]],
	},
	];

	/**
	* Given a codepoint, return the name of the script or script family
	* it is from, or null if it is not part of a known block
	*/
	export function scriptFromCodepoint(codepoint: number): ?string {
	for (let i = 0; i < scriptData.length; i++) {
	const script = scriptData[i];
	for (let i = 0; i < script.blocks.length; i++) {
	const block = script.blocks[i];
	if (codepoint >= block[0] && codepoint <= block[1]) {
	return script.name;
	}
	}
	}
	return null;
	}

	/**
	* A flattened version of all the supported blocks in a single array.
	* This is an optimization to make supportedCodepoint() fast.
	*/
	const allBlocks: Array<number> = [];
	scriptData.forEach(s => s.blocks.forEach(b => allBlocks.push(...b)));

	/**
	* Given a codepoint, return true if it falls within one of the
	* scripts or script families defined above and false otherwise.
	*
	* Micro benchmarks shows that this is faster than
	* /[\u3000-\u30FF\u4E00-\u9FAF\uFF00-\uFF60\uAC00-\uD7AF\u0900-\u109F]/.test()
	* in Firefox, Chrome and Node.
	*/
	export function supportedCodepoint(codepoint: number): boolean {
	for (let i = 0; i < allBlocks.length; i += 2) {
	if (codepoint >= allBlocks[i] && codepoint <= allBlocks[i + 1]) {
	return true;
	}
	}
	return false;
	}