DuyTa
/

Graduation

Model card Files Files and versions Community

Graduation / ui /node_modules /katex /src /Parser.js

DuyTa's picture

Upload folder using huggingface_hub

bc20498 verified 10 months ago

37.6 kB

	// @flow
	/* eslint no-constant-condition:0 */
	import functions from "./functions";
	import MacroExpander, {implicitCommands} from "./MacroExpander";
	import symbols, {ATOMS, extraLatin} from "./symbols";
	import {validUnit} from "./units";
	import {supportedCodepoint} from "./unicodeScripts";
	import ParseError from "./ParseError";
	import {combiningDiacriticalMarksEndRegex} from "./Lexer";
	import Settings from "./Settings";
	import SourceLocation from "./SourceLocation";
	import {uSubsAndSups, unicodeSubRegEx} from "./unicodeSupOrSub";
	import {Token} from "./Token";

	// Pre-evaluate both modules as unicodeSymbols require String.normalize()
	import unicodeAccents from /preval/ "./unicodeAccents";
	import unicodeSymbols from /preval/ "./unicodeSymbols";

	import type {ParseNode, AnyParseNode, SymbolParseNode, UnsupportedCmdParseNode}
	from "./parseNode";
	import type {Atom, Group} from "./symbols";
	import type {Mode, ArgType, BreakToken} from "./types";
	import type {FunctionContext, FunctionSpec} from "./defineFunction";
	import type {EnvSpec} from "./defineEnvironment";

	/**
	* This file contains the parser used to parse out a TeX expression from the
	* input. Since TeX isn't context-free, standard parsers don't work particularly
	* well.
	*
	* The strategy of this parser is as such:
	*
	* The main functions (the `.parse...` ones) take a position in the current
	* parse string to parse tokens from. The lexer (found in Lexer.js, stored at
	* this.gullet.lexer) also supports pulling out tokens at arbitrary places. When
	* individual tokens are needed at a position, the lexer is called to pull out a
	* token, which is then used.
	*
	* The parser has a property called "mode" indicating the mode that
	* the parser is currently in. Currently it has to be one of "math" or
	* "text", which denotes whether the current environment is a math-y
	* one or a text-y one (e.g. inside \text). Currently, this serves to
	* limit the functions which can be used in text mode.
	*
	* The main functions then return an object which contains the useful data that
	* was parsed at its given point, and a new position at the end of the parsed
	* data. The main functions can call each other and continue the parsing by
	* using the returned position as a new starting point.
	*
	* There are also extra `.handle...` functions, which pull out some reused
	* functionality into self-contained functions.
	*
	* The functions return ParseNodes.
	*/

	export default class Parser {
	mode: Mode;
	gullet: MacroExpander;
	settings: Settings;
	leftrightDepth: number;
	nextToken: ?Token;

	constructor(input: string, settings: Settings) {
	// Start in math mode
	this.mode = "math";
	// Create a new macro expander (gullet) and (indirectly via that) also a
	// new lexer (mouth) for this parser (stomach, in the language of TeX)
	this.gullet = new MacroExpander(input, settings, this.mode);
	// Store the settings for use in parsing
	this.settings = settings;
	// Count leftright depth (for \middle errors)
	this.leftrightDepth = 0;
	}

	/**
	* Checks a result to make sure it has the right type, and throws an
	* appropriate error otherwise.
	*/
	expect(text: string, consume?: boolean = true) {
	if (this.fetch().text !== text) {
	throw new ParseError(
	`Expected '${text}', got '${this.fetch().text}'`, this.fetch()
	);
	}
	if (consume) {
	this.consume();
	}
	}

	/**
	* Discards the current lookahead token, considering it consumed.
	*/
	consume() {
	this.nextToken = null;
	}

	/**
	* Return the current lookahead token, or if there isn't one (at the
	* beginning, or if the previous lookahead token was consume()d),
	* fetch the next token as the new lookahead token and return it.
	*/
	fetch(): Token {
	if (this.nextToken == null) {
	this.nextToken = this.gullet.expandNextToken();
	}
	return this.nextToken;
	}

	/**
	* Switches between "text" and "math" modes.
	*/
	switchMode(newMode: Mode) {
	this.mode = newMode;
	this.gullet.switchMode(newMode);
	}

	/**
	* Main parsing function, which parses an entire input.
	*/
	parse(): AnyParseNode[] {
	if (!this.settings.globalGroup) {
	// Create a group namespace for the math expression.
	// (LaTeX creates a new group for every $...$, $$...$$, \[...\].)
	this.gullet.beginGroup();
	}

	// Use old \color behavior (same as LaTeX's \textcolor) if requested.
	// We do this within the group for the math expression, so it doesn't
	// pollute settings.macros.
	if (this.settings.colorIsTextColor) {
	this.gullet.macros.set("\\color", "\\textcolor");
	}

	try {
	// Try to parse the input
	const parse = this.parseExpression(false);

	// If we succeeded, make sure there's an EOF at the end
	this.expect("EOF");

	// End the group namespace for the expression
	if (!this.settings.globalGroup) {
	this.gullet.endGroup();
	}

	return parse;

	// Close any leftover groups in case of a parse error.
	} finally {
	this.gullet.endGroups();
	}
	}

	/**
	* Fully parse a separate sequence of tokens as a separate job.
	* Tokens should be specified in reverse order, as in a MacroDefinition.
	*/
	subparse(tokens: Token[]): AnyParseNode[] {
	// Save the next token from the current job.
	const oldToken = this.nextToken;
	this.consume();

	// Run the new job, terminating it with an excess '}'
	this.gullet.pushToken(new Token("}"));
	this.gullet.pushTokens(tokens);
	const parse = this.parseExpression(false);
	this.expect("}");

	// Restore the next token from the current job.
	this.nextToken = oldToken;

	return parse;
	}

	static endOfExpression: string[] = ["}", "\\endgroup", "\\end", "\\right", "&"];

	/**
	* Parses an "expression", which is a list of atoms.
	*
	* `breakOnInfix`: Should the parsing stop when we hit infix nodes? This
	* happens when functions have higher precedence han infix
	* nodes in implicit parses.
	*
	* `breakOnTokenText`: The text of the token that the expression should end
	* with, or `null` if something else should end the
	* expression.
	*/
	parseExpression(
	breakOnInfix: boolean,
	breakOnTokenText?: BreakToken,
	): AnyParseNode[] {
	const body = [];
	// Keep adding atoms to the body until we can't parse any more atoms (either
	// we reached the end, a }, or a \right)
	while (true) {
	// Ignore spaces in math mode
	if (this.mode === "math") {
	this.consumeSpaces();
	}
	const lex = this.fetch();
	if (Parser.endOfExpression.indexOf(lex.text) !== -1) {
	break;
	}
	if (breakOnTokenText && lex.text === breakOnTokenText) {
	break;
	}
	if (breakOnInfix && functions[lex.text] && functions[lex.text].infix) {
	break;
	}
	const atom = this.parseAtom(breakOnTokenText);
	if (!atom) {
	break;
	} else if (atom.type === "internal") {
	continue;
	}
	body.push(atom);
	}
	if (this.mode === "text") {
	this.formLigatures(body);
	}
	return this.handleInfixNodes(body);
	}

	/**
	* Rewrites infix operators such as \over with corresponding commands such
	* as \frac.
	*
	* There can only be one infix operator per group. If there's more than one
	* then the expression is ambiguous. This can be resolved by adding {}.
	*/
	handleInfixNodes(body: AnyParseNode[]): AnyParseNode[] {
	let overIndex = -1;
	let funcName;

	for (let i = 0; i < body.length; i++) {
	if (body[i].type === "infix") {
	if (overIndex !== -1) {
	throw new ParseError(
	"only one infix operator per group",
	body[i].token);
	}
	overIndex = i;
	funcName = body[i].replaceWith;
	}
	}

	if (overIndex !== -1 && funcName) {
	let numerNode;
	let denomNode;

	const numerBody = body.slice(0, overIndex);
	const denomBody = body.slice(overIndex + 1);

	if (numerBody.length === 1 && numerBody[0].type === "ordgroup") {
	numerNode = numerBody[0];
	} else {
	numerNode = {type: "ordgroup", mode: this.mode, body: numerBody};
	}

	if (denomBody.length === 1 && denomBody[0].type === "ordgroup") {
	denomNode = denomBody[0];
	} else {
	denomNode = {type: "ordgroup", mode: this.mode, body: denomBody};
	}

	let node;
	if (funcName === "\\\\abovefrac") {
	node = this.callFunction(funcName,
	[numerNode, body[overIndex], denomNode], []);
	} else {
	node = this.callFunction(funcName, [numerNode, denomNode], []);
	}
	return [node];
	} else {
	return body;
	}
	}

	/**
	* Handle a subscript or superscript with nice errors.
	*/
	handleSupSubscript(
	name: string, // For error reporting.
	): AnyParseNode {
	const symbolToken = this.fetch();
	const symbol = symbolToken.text;
	this.consume();
	this.consumeSpaces(); // ignore spaces before sup/subscript argument
	const group = this.parseGroup(name);

	if (!group) {
	throw new ParseError(
	"Expected group after '" + symbol + "'",
	symbolToken
	);
	}

	return group;
	}

	/**
	* Converts the textual input of an unsupported command into a text node
	* contained within a color node whose color is determined by errorColor
	*/
	formatUnsupportedCmd(text: string): UnsupportedCmdParseNode {
	const textordArray = [];

	for (let i = 0; i < text.length; i++) {
	textordArray.push({type: "textord", mode: "text", text: text[i]});
	}

	const textNode = {
	type: "text",
	mode: this.mode,
	body: textordArray,
	};

	const colorNode = {
	type: "color",
	mode: this.mode,
	color: this.settings.errorColor,
	body: [textNode],
	};

	return colorNode;
	}

	/**
	* Parses a group with optional super/subscripts.
	*/
	parseAtom(breakOnTokenText?: BreakToken): ?AnyParseNode {
	// The body of an atom is an implicit group, so that things like
	// \left(x\right)^2 work correctly.
	const base = this.parseGroup("atom", breakOnTokenText);

	// In text mode, we don't have superscripts or subscripts
	if (this.mode === "text") {
	return base;
	}

	// Note that base may be empty (i.e. null) at this point.

	let superscript;
	let subscript;
	while (true) {
	// Guaranteed in math mode, so eat any spaces first.
	this.consumeSpaces();

	// Lex the first token
	const lex = this.fetch();

	if (lex.text === "\\limits" \|\| lex.text === "\\nolimits") {
	// We got a limit control
	if (base && base.type === "op") {
	const limits = lex.text === "\\limits";
	base.limits = limits;
	base.alwaysHandleSupSub = true;
	} else if (base && base.type === "operatorname") {
	if (base.alwaysHandleSupSub) {
	base.limits = lex.text === "\\limits";
	}
	} else {
	throw new ParseError(
	"Limit controls must follow a math operator",
	lex);
	}
	this.consume();
	} else if (lex.text === "^") {
	// We got a superscript start
	if (superscript) {
	throw new ParseError("Double superscript", lex);
	}
	superscript = this.handleSupSubscript("superscript");
	} else if (lex.text === "_") {
	// We got a subscript start
	if (subscript) {
	throw new ParseError("Double subscript", lex);
	}
	subscript = this.handleSupSubscript("subscript");
	} else if (lex.text === "'") {
	// We got a prime
	if (superscript) {
	throw new ParseError("Double superscript", lex);
	}
	const prime = {type: "textord", mode: this.mode, text: "\\prime"};

	// Many primes can be grouped together, so we handle this here
	const primes = [prime];
	this.consume();
	// Keep lexing tokens until we get something that's not a prime
	while (this.fetch().text === "'") {
	// For each one, add another prime to the list
	primes.push(prime);
	this.consume();
	}
	// If there's a superscript following the primes, combine that
	// superscript in with the primes.
	if (this.fetch().text === "^") {
	primes.push(this.handleSupSubscript("superscript"));
	}
	// Put everything into an ordgroup as the superscript
	superscript = {type: "ordgroup", mode: this.mode, body: primes};
	} else if (uSubsAndSups[lex.text]) {
	// A Unicode subscript or superscript character.
	// We treat these similarly to the unicode-math package.
	// So we render a string of Unicode (sub\|super)scripts the
	// same as a (sub\|super)script of regular characters.
	const isSub = unicodeSubRegEx.test(lex.text);
	const subsupTokens = [];
	subsupTokens.push(new Token(uSubsAndSups[lex.text]));
	this.consume();
	// Continue fetching tokens to fill out the string.
	while (true) {
	const token = this.fetch().text;
	if (!(uSubsAndSups[token])) { break; }
	if (unicodeSubRegEx.test(token) !== isSub) { break; }
	subsupTokens.unshift(new Token(uSubsAndSups[token]));
	this.consume();
	}
	// Now create a (sub\|super)script.
	const body = this.subparse(subsupTokens);
	if (isSub) {
	subscript = {type: "ordgroup", mode: "math", body};
	} else {
	superscript = {type: "ordgroup", mode: "math", body};
	}
	} else {
	// If it wasn't ^, _, or ', stop parsing super/subscripts
	break;
	}
	}

	// Base must be set if superscript or subscript are set per logic above,
	// but need to check here for type check to pass.
	if (superscript \|\| subscript) {
	// If we got either a superscript or subscript, create a supsub
	return {
	type: "supsub",
	mode: this.mode,
	base: base,
	sup: superscript,
	sub: subscript,
	};
	} else {
	// Otherwise return the original body
	return base;
	}
	}

	/**
	* Parses an entire function, including its base and all of its arguments.
	*/
	parseFunction(
	breakOnTokenText?: BreakToken,
	name?: string, // For determining its context
	): ?AnyParseNode {
	const token = this.fetch();
	const func = token.text;
	const funcData = functions[func];
	if (!funcData) {
	return null;
	}
	this.consume(); // consume command token

	if (name && name !== "atom" && !funcData.allowedInArgument) {
	throw new ParseError(
	"Got function '" + func + "' with no arguments" +
	(name ? " as " + name : ""), token);
	} else if (this.mode === "text" && !funcData.allowedInText) {
	throw new ParseError(
	"Can't use function '" + func + "' in text mode", token);
	} else if (this.mode === "math" && funcData.allowedInMath === false) {
	throw new ParseError(
	"Can't use function '" + func + "' in math mode", token);
	}

	const {args, optArgs} = this.parseArguments(func, funcData);
	return this.callFunction(func, args, optArgs, token, breakOnTokenText);
	}

	/**
	* Call a function handler with a suitable context and arguments.
	*/
	callFunction(
	name: string,
	args: AnyParseNode[],
	optArgs: (?AnyParseNode)[],
	token?: Token,
	breakOnTokenText?: BreakToken,
	): AnyParseNode {
	const context: FunctionContext = {
	funcName: name,
	parser: this,
	token,
	breakOnTokenText,
	};
	const func = functions[name];
	if (func && func.handler) {
	return func.handler(context, args, optArgs);
	} else {
	throw new ParseError(`No function handler for ${name}`);
	}
	}

	/**
	* Parses the arguments of a function or environment
	*/
	parseArguments(
	func: string, // Should look like "\name" or "\begin{name}".
	funcData: FunctionSpec<> \| EnvSpec<>,
	): {
	args: AnyParseNode[],
	optArgs: (?AnyParseNode)[],
	} {
	const totalArgs = funcData.numArgs + funcData.numOptionalArgs;
	if (totalArgs === 0) {
	return {args: [], optArgs: []};
	}

	const args = [];
	const optArgs = [];

	for (let i = 0; i < totalArgs; i++) {
	let argType = funcData.argTypes && funcData.argTypes[i];
	const isOptional = i < funcData.numOptionalArgs;

	if ((funcData.primitive && argType == null) \|\|
	// \sqrt expands into primitive if optional argument doesn't exist
	(funcData.type === "sqrt" && i === 1 && optArgs[0] == null)) {
	argType = "primitive";
	}

	const arg = this.parseGroupOfType(`argument to '${func}'`,
	argType, isOptional);
	if (isOptional) {
	optArgs.push(arg);
	} else if (arg != null) {
	args.push(arg);
	} else { // should be unreachable
	throw new ParseError("Null argument, please report this as a bug");
	}
	}

	return {args, optArgs};
	}

	/**
	* Parses a group when the mode is changing.
	*/
	parseGroupOfType(
	name: string,
	type: ?ArgType,
	optional: boolean,
	): ?AnyParseNode {
	switch (type) {
	case "color":
	return this.parseColorGroup(optional);
	case "size":
	return this.parseSizeGroup(optional);
	case "url":
	return this.parseUrlGroup(optional);
	case "math":
	case "text":
	return this.parseArgumentGroup(optional, type);
	case "hbox": {
	// hbox argument type wraps the argument in the equivalent of
	// \hbox, which is like \text but switching to \textstyle size.
	const group = this.parseArgumentGroup(optional, "text");
	return group != null ? {
	type: "styling",
	mode: group.mode,
	body: [group],
	style: "text", // simulate \textstyle
	} : null;
	}
	case "raw": {
	const token = this.parseStringGroup("raw", optional);
	return token != null ? {
	type: "raw",
	mode: "text",
	string: token.text,
	} : null;
	}
	case "primitive": {
	if (optional) {
	throw new ParseError("A primitive argument cannot be optional");
	}
	const group = this.parseGroup(name);
	if (group == null) {
	throw new ParseError("Expected group as " + name, this.fetch());
	}
	return group;
	}
	case "original":
	case null:
	case undefined:
	return this.parseArgumentGroup(optional);
	default:
	throw new ParseError(
	"Unknown group type as " + name, this.fetch());
	}
	}

	/**
	* Discard any space tokens, fetching the next non-space token.
	*/
	consumeSpaces() {
	while (this.fetch().text === " ") {
	this.consume();
	}
	}

	/**
	* Parses a group, essentially returning the string formed by the
	* brace-enclosed tokens plus some position information.
	*/
	parseStringGroup(
	modeName: ArgType, // Used to describe the mode in error messages.
	optional: boolean,
	): ?Token {
	const argToken = this.gullet.scanArgument(optional);
	if (argToken == null) {
	return null;
	}
	let str = "";
	let nextToken;
	while ((nextToken = this.fetch()).text !== "EOF") {
	str += nextToken.text;
	this.consume();
	}
	this.consume(); // consume the end of the argument
	argToken.text = str;
	return argToken;
	}

	/**
	* Parses a regex-delimited group: the largest sequence of tokens
	* whose concatenated strings match `regex`. Returns the string
	* formed by the tokens plus some position information.
	*/
	parseRegexGroup(
	regex: RegExp,
	modeName: string, // Used to describe the mode in error messages.
	): Token {
	const firstToken = this.fetch();
	let lastToken = firstToken;
	let str = "";
	let nextToken;
	while ((nextToken = this.fetch()).text !== "EOF" &&
	regex.test(str + nextToken.text)) {
	lastToken = nextToken;
	str += lastToken.text;
	this.consume();
	}
	if (str === "") {
	throw new ParseError(
	"Invalid " + modeName + ": '" + firstToken.text + "'",
	firstToken);
	}
	return firstToken.range(lastToken, str);
	}

	/**
	* Parses a color description.
	*/
	parseColorGroup(optional: boolean): ?ParseNode<"color-token"> {
	const res = this.parseStringGroup("color", optional);
	if (res == null) {
	return null;
	}
	const match = (/^(#[a-f0-9]{3}\|#?[a-f0-9]{6}\|[a-z]+)$/i).exec(res.text);
	if (!match) {
	throw new ParseError("Invalid color: '" + res.text + "'", res);
	}
	let color = match[0];
	if (/^[0-9a-f]{6}$/i.test(color)) {
	// We allow a 6-digit HTML color spec without a leading "#".
	// This follows the xcolor package's HTML color model.
	// Predefined color names are all missed by this RegEx pattern.
	color = "#" + color;
	}
	return {
	type: "color-token",
	mode: this.mode,
	color,
	};
	}

	/**
	* Parses a size specification, consisting of magnitude and unit.
	*/
	parseSizeGroup(optional: boolean): ?ParseNode<"size"> {
	let res;
	let isBlank = false;
	// don't expand before parseStringGroup
	this.gullet.consumeSpaces();
	if (!optional && this.gullet.future().text !== "{") {
	res = this.parseRegexGroup(
	/^[-+]? (?:$\|\d+\|\d+\.\d\|\.\d) [a-z]{0,2} *$/, "size");
	} else {
	res = this.parseStringGroup("size", optional);
	}
	if (!res) {
	return null;
	}
	if (!optional && res.text.length === 0) {
	// Because we've tested for what is !optional, this block won't
	// affect \kern, \hspace, etc. It will capture the mandatory arguments
	// to \genfrac and \above.
	res.text = "0pt"; // Enable \above{}
	isBlank = true; // This is here specifically for \genfrac
	}
	const match = (/([-+]?) (\d+(?:\.\d)?\|\.\d+) *([a-z]{2})/).exec(res.text);
	if (!match) {
	throw new ParseError("Invalid size: '" + res.text + "'", res);
	}
	const data = {
	number: +(match[1] + match[2]), // sign + magnitude, cast to number
	unit: match[3],
	};
	if (!validUnit(data)) {
	throw new ParseError("Invalid unit: '" + data.unit + "'", res);
	}
	return {
	type: "size",
	mode: this.mode,
	value: data,
	isBlank,
	};
	}

	/**
	* Parses an URL, checking escaped letters and allowed protocols,
	* and setting the catcode of % as an active character (as in \hyperref).
	*/
	parseUrlGroup(optional: boolean): ?ParseNode<"url"> {
	this.gullet.lexer.setCatcode("%", 13); // active character
	this.gullet.lexer.setCatcode("~", 12); // other character
	const res = this.parseStringGroup("url", optional);
	this.gullet.lexer.setCatcode("%", 14); // comment character
	this.gullet.lexer.setCatcode("~", 13); // active character
	if (res == null) {
	return null;
	}
	// hyperref package allows backslashes alone in href, but doesn't
	// generate valid links in such cases; we interpret this as
	// "undefined" behaviour, and keep them as-is. Some browser will
	// replace backslashes with forward slashes.
	const url = res.text.replace(/\\([#$%&~_^{}])/g, '$1');
	return {
	type: "url",
	mode: this.mode,
	url,
	};
	}

	/**
	* Parses an argument with the mode specified.
	*/
	parseArgumentGroup(optional: boolean, mode?: Mode): ?ParseNode<"ordgroup"> {
	const argToken = this.gullet.scanArgument(optional);
	if (argToken == null) {
	return null;
	}
	const outerMode = this.mode;
	if (mode) { // Switch to specified mode
	this.switchMode(mode);
	}

	this.gullet.beginGroup();
	const expression = this.parseExpression(false, "EOF");
	// TODO: find an alternative way to denote the end
	this.expect("EOF"); // expect the end of the argument
	this.gullet.endGroup();
	const result = {
	type: "ordgroup",
	mode: this.mode,
	loc: argToken.loc,
	body: expression,
	};

	if (mode) { // Switch mode back
	this.switchMode(outerMode);
	}
	return result;
	}

	/**
	* Parses an ordinary group, which is either a single nucleus (like "x")
	* or an expression in braces (like "{x+y}") or an implicit group, a group
	* that starts at the current position, and ends right before a higher explicit
	* group ends, or at EOF.
	*/
	parseGroup(
	name: string, // For error reporting.
	breakOnTokenText?: BreakToken,
	): ?AnyParseNode {
	const firstToken = this.fetch();
	const text = firstToken.text;

	let result;
	// Try to parse an open brace or \begingroup
	if (text === "{" \|\| text === "\\begingroup") {
	this.consume();
	const groupEnd = text === "{" ? "}" : "\\endgroup";

	this.gullet.beginGroup();
	// If we get a brace, parse an expression
	const expression = this.parseExpression(false, groupEnd);
	const lastToken = this.fetch();
	this.expect(groupEnd); // Check that we got a matching closing brace
	this.gullet.endGroup();
	result = {
	type: "ordgroup",
	mode: this.mode,
	loc: SourceLocation.range(firstToken, lastToken),
	body: expression,
	// A group formed by \begingroup...\endgroup is a semi-simple group
	// which doesn't affect spacing in math mode, i.e., is transparent.
	// https://tex.stackexchange.com/questions/1930/when-should-one-
	// use-begingroup-instead-of-bgroup
	semisimple: text === "\\begingroup" \|\| undefined,
	};
	} else {
	// If there exists a function with this name, parse the function.
	// Otherwise, just return a nucleus
	result = this.parseFunction(breakOnTokenText, name) \|\|
	this.parseSymbol();
	if (result == null && text[0] === "\\" &&
	!implicitCommands.hasOwnProperty(text)) {
	if (this.settings.throwOnError) {
	throw new ParseError(
	"Undefined control sequence: " + text, firstToken);
	}
	result = this.formatUnsupportedCmd(text);
	this.consume();
	}
	}
	return result;
	}

	/**
	* Form ligature-like combinations of characters for text mode.
	* This includes inputs like "--", "---", "``" and "''".
	* The result will simply replace multiple textord nodes with a single
	* character in each value by a single textord node having multiple
	* characters in its value. The representation is still ASCII source.
	* The group will be modified in place.
	*/
	formLigatures(group: AnyParseNode[]) {
	let n = group.length - 1;
	for (let i = 0; i < n; ++i) {
	const a = group[i];
	// $FlowFixMe: Not every node type has a `text` property.
	const v = a.text;
	if (v === "-" && group[i + 1].text === "-") {
	if (i + 1 < n && group[i + 2].text === "-") {
	group.splice(i, 3, {
	type: "textord",
	mode: "text",
	loc: SourceLocation.range(a, group[i + 2]),
	text: "---",
	});
	n -= 2;
	} else {
	group.splice(i, 2, {
	type: "textord",
	mode: "text",
	loc: SourceLocation.range(a, group[i + 1]),
	text: "--",
	});
	n -= 1;
	}
	}
	if ((v === "'" \|\| v === "`") && group[i + 1].text === v) {
	group.splice(i, 2, {
	type: "textord",
	mode: "text",
	loc: SourceLocation.range(a, group[i + 1]),
	text: v + v,
	});
	n -= 1;
	}
	}
	}

	/**
	* Parse a single symbol out of the string. Here, we handle single character
	* symbols and special functions like \verb.
	*/
	parseSymbol(): ?AnyParseNode {
	const nucleus = this.fetch();
	let text = nucleus.text;

	if (/^\\verb[^a-zA-Z]/.test(text)) {
	this.consume();
	let arg = text.slice(5);
	const star = (arg.charAt(0) === "*");
	if (star) {
	arg = arg.slice(1);
	}
	// Lexer's tokenRegex is constructed to always have matching
	// first/last characters.
	if (arg.length < 2 \|\| arg.charAt(0) !== arg.slice(-1)) {
	throw new ParseError(`\\verb assertion failed --
	please report what input caused this bug`);
	}
	arg = arg.slice(1, -1); // remove first and last char
	return {
	type: "verb",
	mode: "text",
	body: arg,
	star,
	};
	}
	// At this point, we should have a symbol, possibly with accents.
	// First expand any accented base symbol according to unicodeSymbols.
	if (unicodeSymbols.hasOwnProperty(text[0]) &&
	!symbols[this.mode][text[0]]) {
	// This behavior is not strict (XeTeX-compatible) in math mode.
	if (this.settings.strict && this.mode === "math") {
	this.settings.reportNonstrict("unicodeTextInMathMode",
	`Accented Unicode text character "${text[0]}" used in ` +
	`math mode`, nucleus);
	}
	text = unicodeSymbols[text[0]] + text.slice(1);
	}
	// Strip off any combining characters
	const match = combiningDiacriticalMarksEndRegex.exec(text);
	if (match) {
	text = text.substring(0, match.index);
	if (text === 'i') {
	text = '\u0131'; // dotless i, in math and text mode
	} else if (text === 'j') {
	text = '\u0237'; // dotless j, in math and text mode
	}
	}
	// Recognize base symbol
	let symbol: AnyParseNode;
	if (symbols[this.mode][text]) {
	if (this.settings.strict && this.mode === 'math' &&
	extraLatin.indexOf(text) >= 0) {
	this.settings.reportNonstrict("unicodeTextInMathMode",
	`Latin-1/Unicode text character "${text[0]}" used in ` +
	`math mode`, nucleus);
	}
	const group: Group = symbols[this.mode][text].group;
	const loc = SourceLocation.range(nucleus);
	let s: SymbolParseNode;
	if (ATOMS.hasOwnProperty(group)) {
	// $FlowFixMe
	const family: Atom = group;
	s = {
	type: "atom",
	mode: this.mode,
	family,
	loc,
	text,
	};
	} else {
	// $FlowFixMe
	s = {
	type: group,
	mode: this.mode,
	loc,
	text,
	};
	}
	// $FlowFixMe
	symbol = s;
	} else if (text.charCodeAt(0) >= 0x80) { // no symbol for e.g. ^
	if (this.settings.strict) {
	if (!supportedCodepoint(text.charCodeAt(0))) {
	this.settings.reportNonstrict("unknownSymbol",
	`Unrecognized Unicode character "${text[0]}"` +
	` (${text.charCodeAt(0)})`, nucleus);
	} else if (this.mode === "math") {
	this.settings.reportNonstrict("unicodeTextInMathMode",
	`Unicode text character "${text[0]}" used in math mode`,
	nucleus);
	}
	}
	// All nonmathematical Unicode characters are rendered as if they
	// are in text mode (wrapped in \text) because that's what it
	// takes to render them in LaTeX. Setting `mode: this.mode` is
	// another natural choice (the user requested math mode), but
	// this makes it more difficult for getCharacterMetrics() to
	// distinguish Unicode characters without metrics and those for
	// which we want to simulate the letter M.
	symbol = {
	type: "textord",
	mode: "text",
	loc: SourceLocation.range(nucleus),
	text,
	};
	} else {
	return null; // EOF, ^, _, {, }, etc.
	}
	this.consume();
	// Transform combining characters into accents
	if (match) {
	for (let i = 0; i < match[0].length; i++) {
	const accent: string = match[0][i];
	if (!unicodeAccents[accent]) {
	throw new ParseError(`Unknown accent ' ${accent}'`, nucleus);
	}
	const command = unicodeAccents[accent][this.mode] \|\|
	unicodeAccents[accent].text;
	if (!command) {
	throw new ParseError(
	`Accent ${accent} unsupported in ${this.mode} mode`,
	nucleus);
	}
	symbol = {
	type: "accent",
	mode: this.mode,
	loc: SourceLocation.range(nucleus),
	label: command,
	isStretchy: false,
	isShifty: true,
	// $FlowFixMe
	base: symbol,
	};
	}
	}
	// $FlowFixMe
	return symbol;
	}
	}