|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
NT_OFFSET = 256 |
|
|
|
def load_tokens(path): |
|
tok_names = [] |
|
string_to_tok = {} |
|
ERRORTOKEN = None |
|
with open(path) as fp: |
|
for line in fp: |
|
line = line.strip() |
|
|
|
i = line.find('#') |
|
if i >= 0: |
|
line = line[:i].strip() |
|
if not line: |
|
continue |
|
fields = line.split() |
|
name = fields[0] |
|
value = len(tok_names) |
|
if name == 'ERRORTOKEN': |
|
ERRORTOKEN = value |
|
string = fields[1] if len(fields) > 1 else None |
|
if string: |
|
string = eval(string) |
|
string_to_tok[string] = value |
|
tok_names.append(name) |
|
return tok_names, ERRORTOKEN, string_to_tok |
|
|
|
|
|
def update_file(file, content): |
|
try: |
|
with open(file, 'r') as fobj: |
|
if fobj.read() == content: |
|
return False |
|
except (OSError, ValueError): |
|
pass |
|
with open(file, 'w') as fobj: |
|
fobj.write(content) |
|
return True |
|
|
|
|
|
token_h_template = """\ |
|
/* Auto-generated by Tools/scripts/generate_token.py */ |
|
|
|
/* Token types */ |
|
#ifndef Py_LIMITED_API |
|
#ifndef Py_TOKEN_H |
|
#define Py_TOKEN_H |
|
#ifdef __cplusplus |
|
extern "C" { |
|
#endif |
|
|
|
#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */ |
|
|
|
%s\ |
|
#define N_TOKENS %d |
|
#define NT_OFFSET %d |
|
|
|
/* Special definitions for cooperation with parser */ |
|
|
|
#define ISTERMINAL(x) ((x) < NT_OFFSET) |
|
#define ISNONTERMINAL(x) ((x) >= NT_OFFSET) |
|
#define ISEOF(x) ((x) == ENDMARKER) |
|
#define ISWHITESPACE(x) ((x) == ENDMARKER || \\ |
|
(x) == NEWLINE || \\ |
|
(x) == INDENT || \\ |
|
(x) == DEDENT) |
|
|
|
|
|
PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */ |
|
PyAPI_FUNC(int) PyToken_OneChar(int); |
|
PyAPI_FUNC(int) PyToken_TwoChars(int, int); |
|
PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int); |
|
|
|
#ifdef __cplusplus |
|
} |
|
#endif |
|
#endif /* !Py_TOKEN_H */ |
|
#endif /* Py_LIMITED_API */ |
|
""" |
|
|
|
def make_h(infile, outfile='Include/token.h'): |
|
tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) |
|
|
|
defines = [] |
|
for value, name in enumerate(tok_names[:ERRORTOKEN + 1]): |
|
defines.append("#define %-15s %d\n" % (name, value)) |
|
|
|
if update_file(outfile, token_h_template % ( |
|
''.join(defines), |
|
len(tok_names), |
|
NT_OFFSET |
|
)): |
|
print("%s regenerated from %s" % (outfile, infile)) |
|
|
|
|
|
token_c_template = """\ |
|
/* Auto-generated by Tools/scripts/generate_token.py */ |
|
|
|
#include "Python.h" |
|
#include "token.h" |
|
|
|
/* Token names */ |
|
|
|
const char * const _PyParser_TokenNames[] = { |
|
%s\ |
|
}; |
|
|
|
/* Return the token corresponding to a single character */ |
|
|
|
int |
|
PyToken_OneChar(int c1) |
|
{ |
|
%s\ |
|
return OP; |
|
} |
|
|
|
int |
|
PyToken_TwoChars(int c1, int c2) |
|
{ |
|
%s\ |
|
return OP; |
|
} |
|
|
|
int |
|
PyToken_ThreeChars(int c1, int c2, int c3) |
|
{ |
|
%s\ |
|
return OP; |
|
} |
|
""" |
|
|
|
def generate_chars_to_token(mapping, n=1): |
|
result = [] |
|
write = result.append |
|
indent = ' ' * n |
|
write(indent) |
|
write('switch (c%d) {\n' % (n,)) |
|
for c in sorted(mapping): |
|
write(indent) |
|
value = mapping[c] |
|
if isinstance(value, dict): |
|
write("case '%s':\n" % (c,)) |
|
write(generate_chars_to_token(value, n + 1)) |
|
write(indent) |
|
write(' break;\n') |
|
else: |
|
write("case '%s': return %s;\n" % (c, value)) |
|
write(indent) |
|
write('}\n') |
|
return ''.join(result) |
|
|
|
def make_c(infile, outfile='Parser/token.c'): |
|
tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) |
|
string_to_tok['<>'] = string_to_tok['!='] |
|
chars_to_token = {} |
|
for string, value in string_to_tok.items(): |
|
assert 1 <= len(string) <= 3 |
|
name = tok_names[value] |
|
m = chars_to_token.setdefault(len(string), {}) |
|
for c in string[:-1]: |
|
m = m.setdefault(c, {}) |
|
m[string[-1]] = name |
|
|
|
names = [] |
|
for value, name in enumerate(tok_names): |
|
if value >= ERRORTOKEN: |
|
name = '<%s>' % name |
|
names.append(' "%s",\n' % name) |
|
names.append(' "<N_TOKENS>",\n') |
|
|
|
if update_file(outfile, token_c_template % ( |
|
''.join(names), |
|
generate_chars_to_token(chars_to_token[1]), |
|
generate_chars_to_token(chars_to_token[2]), |
|
generate_chars_to_token(chars_to_token[3]) |
|
)): |
|
print("%s regenerated from %s" % (outfile, infile)) |
|
|
|
|
|
token_inc_template = """\ |
|
.. Auto-generated by Tools/scripts/generate_token.py |
|
%s |
|
.. data:: N_TOKENS |
|
|
|
.. data:: NT_OFFSET |
|
""" |
|
|
|
def make_rst(infile, outfile='Doc/library/token-list.inc'): |
|
tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) |
|
tok_to_string = {value: s for s, value in string_to_tok.items()} |
|
|
|
names = [] |
|
for value, name in enumerate(tok_names[:ERRORTOKEN + 1]): |
|
names.append('.. data:: %s' % (name,)) |
|
if value in tok_to_string: |
|
names.append('') |
|
names.append(' Token value for ``"%s"``.' % tok_to_string[value]) |
|
names.append('') |
|
|
|
if update_file(outfile, token_inc_template % '\n'.join(names)): |
|
print("%s regenerated from %s" % (outfile, infile)) |
|
|
|
|
|
token_py_template = '''\ |
|
"""Token constants.""" |
|
# Auto-generated by Tools/scripts/generate_token.py |
|
|
|
__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF'] |
|
|
|
%s |
|
N_TOKENS = %d |
|
# Special definitions for cooperation with parser |
|
NT_OFFSET = %d |
|
|
|
tok_name = {value: name |
|
for name, value in globals().items() |
|
if isinstance(value, int) and not name.startswith('_')} |
|
__all__.extend(tok_name.values()) |
|
|
|
EXACT_TOKEN_TYPES = { |
|
%s |
|
} |
|
|
|
def ISTERMINAL(x): |
|
return x < NT_OFFSET |
|
|
|
def ISNONTERMINAL(x): |
|
return x >= NT_OFFSET |
|
|
|
def ISEOF(x): |
|
return x == ENDMARKER |
|
''' |
|
|
|
def make_py(infile, outfile='Lib/token.py'): |
|
tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) |
|
|
|
constants = [] |
|
for value, name in enumerate(tok_names): |
|
constants.append('%s = %d' % (name, value)) |
|
constants.insert(ERRORTOKEN, |
|
"# These aren't used by the C tokenizer but are needed for tokenize.py") |
|
|
|
token_types = [] |
|
for s, value in sorted(string_to_tok.items()): |
|
token_types.append(' %r: %s,' % (s, tok_names[value])) |
|
|
|
if update_file(outfile, token_py_template % ( |
|
'\n'.join(constants), |
|
len(tok_names), |
|
NT_OFFSET, |
|
'\n'.join(token_types), |
|
)): |
|
print("%s regenerated from %s" % (outfile, infile)) |
|
|
|
|
|
def main(op, infile='Grammar/Tokens', *args): |
|
make = globals()['make_' + op] |
|
make(infile, *args) |
|
|
|
|
|
if __name__ == '__main__': |
|
import sys |
|
main(*sys.argv[1:]) |
|
|