Spaces:

Doa-doa
/

grad

Runtime error

App Files Files Community

grad / installer_files /conda /Tools /scripts /generate_token.py

Doa-doa

Upload folder using huggingface_hub

72268ee over 1 year ago

raw

history blame contribute delete

6.98 kB

	#! /usr/bin/env python3
	# This script generates token related files from Grammar/Tokens:
	#
	# Doc/library/token-list.inc
	# Include/token.h
	# Parser/token.c
	# Lib/token.py


	NT_OFFSET = 256

	def load_tokens(path):
	tok_names = []
	string_to_tok = {}
	ERRORTOKEN = None
	with open(path) as fp:
	for line in fp:
	line = line.strip()
	# strip comments
	i = line.find('#')
	if i >= 0:
	line = line[:i].strip()
	if not line:
	continue
	fields = line.split()
	name = fields[0]
	value = len(tok_names)
	if name == 'ERRORTOKEN':
	ERRORTOKEN = value
	string = fields[1] if len(fields) > 1 else None
	if string:
	string = eval(string)
	string_to_tok[string] = value
	tok_names.append(name)
	return tok_names, ERRORTOKEN, string_to_tok


	def update_file(file, content):
	try:
	with open(file, 'r') as fobj:
	if fobj.read() == content:
	return False
	except (OSError, ValueError):
	pass
	with open(file, 'w') as fobj:
	fobj.write(content)
	return True


	token_h_template = """\
	/* Auto-generated by Tools/scripts/generate_token.py */

	/* Token types */
	#ifndef Py_LIMITED_API
	#ifndef Py_TOKEN_H
	#define Py_TOKEN_H
	#ifdef __cplusplus
	extern "C" {
	#endif

	#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */

	%s\
	#define N_TOKENS %d
	#define NT_OFFSET %d

	/* Special definitions for cooperation with parser */

	#define ISTERMINAL(x) ((x) < NT_OFFSET)
	#define ISNONTERMINAL(x) ((x) >= NT_OFFSET)
	#define ISEOF(x) ((x) == ENDMARKER)
	#define ISWHITESPACE(x) ((x) == ENDMARKER \|\| \\
	(x) == NEWLINE \|\| \\
	(x) == INDENT \|\| \\
	(x) == DEDENT)


	PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
	PyAPI_FUNC(int) PyToken_OneChar(int);
	PyAPI_FUNC(int) PyToken_TwoChars(int, int);
	PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int);

	#ifdef __cplusplus
	}
	#endif
	#endif /* !Py_TOKEN_H */
	#endif /* Py_LIMITED_API */
	"""

	def make_h(infile, outfile='Include/token.h'):
	tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)

	defines = []
	for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
	defines.append("#define %-15s %d\n" % (name, value))

	if update_file(outfile, token_h_template % (
	''.join(defines),
	len(tok_names),
	NT_OFFSET
	)):
	print("%s regenerated from %s" % (outfile, infile))


	token_c_template = """\
	/* Auto-generated by Tools/scripts/generate_token.py */

	#include "Python.h"
	#include "token.h"

	/* Token names */

	const char * const _PyParser_TokenNames[] = {
	%s\
	};

	/* Return the token corresponding to a single character */

	int
	PyToken_OneChar(int c1)
	{
	%s\
	return OP;
	}

	int
	PyToken_TwoChars(int c1, int c2)
	{
	%s\
	return OP;
	}

	int
	PyToken_ThreeChars(int c1, int c2, int c3)
	{
	%s\
	return OP;
	}
	"""

	def generate_chars_to_token(mapping, n=1):
	result = []
	write = result.append
	indent = ' ' * n
	write(indent)
	write('switch (c%d) {\n' % (n,))
	for c in sorted(mapping):
	write(indent)
	value = mapping[c]
	if isinstance(value, dict):
	write("case '%s':\n" % (c,))
	write(generate_chars_to_token(value, n + 1))
	write(indent)
	write(' break;\n')
	else:
	write("case '%s': return %s;\n" % (c, value))
	write(indent)
	write('}\n')
	return ''.join(result)

	def make_c(infile, outfile='Parser/token.c'):
	tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
	string_to_tok['<>'] = string_to_tok['!=']
	chars_to_token = {}
	for string, value in string_to_tok.items():
	assert 1 <= len(string) <= 3
	name = tok_names[value]
	m = chars_to_token.setdefault(len(string), {})
	for c in string[:-1]:
	m = m.setdefault(c, {})
	m[string[-1]] = name

	names = []
	for value, name in enumerate(tok_names):
	if value >= ERRORTOKEN:
	name = '<%s>' % name
	names.append(' "%s",\n' % name)
	names.append(' "<N_TOKENS>",\n')

	if update_file(outfile, token_c_template % (
	''.join(names),
	generate_chars_to_token(chars_to_token[1]),
	generate_chars_to_token(chars_to_token[2]),
	generate_chars_to_token(chars_to_token[3])
	)):
	print("%s regenerated from %s" % (outfile, infile))


	token_inc_template = """\
	.. Auto-generated by Tools/scripts/generate_token.py
	%s
	.. data:: N_TOKENS

	.. data:: NT_OFFSET
	"""

	def make_rst(infile, outfile='Doc/library/token-list.inc'):
	tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
	tok_to_string = {value: s for s, value in string_to_tok.items()}

	names = []
	for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
	names.append('.. data:: %s' % (name,))
	if value in tok_to_string:
	names.append('')
	names.append(' Token value for ``"%s"``.' % tok_to_string[value])
	names.append('')

	if update_file(outfile, token_inc_template % '\n'.join(names)):
	print("%s regenerated from %s" % (outfile, infile))


	token_py_template = '''\
	"""Token constants."""
	# Auto-generated by Tools/scripts/generate_token.py

	__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']

	%s
	N_TOKENS = %d
	# Special definitions for cooperation with parser
	NT_OFFSET = %d

	tok_name = {value: name
	for name, value in globals().items()
	if isinstance(value, int) and not name.startswith('_')}
	__all__.extend(tok_name.values())

	EXACT_TOKEN_TYPES = {
	%s
	}

	def ISTERMINAL(x):
	return x < NT_OFFSET

	def ISNONTERMINAL(x):
	return x >= NT_OFFSET

	def ISEOF(x):
	return x == ENDMARKER
	'''

	def make_py(infile, outfile='Lib/token.py'):
	tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)

	constants = []
	for value, name in enumerate(tok_names):
	constants.append('%s = %d' % (name, value))
	constants.insert(ERRORTOKEN,
	"# These aren't used by the C tokenizer but are needed for tokenize.py")

	token_types = []
	for s, value in sorted(string_to_tok.items()):
	token_types.append(' %r: %s,' % (s, tok_names[value]))

	if update_file(outfile, token_py_template % (
	'\n'.join(constants),
	len(tok_names),
	NT_OFFSET,
	'\n'.join(token_types),
	)):
	print("%s regenerated from %s" % (outfile, infile))


	def main(op, infile='Grammar/Tokens', *args):
	make = globals()['make_' + op]
	make(infile, *args)


	if __name__ == '__main__':
	import sys
	main(*sys.argv[1:])