|
|
|
|
|
|
|
"""Pattern compiler. |
|
|
|
The grammar is taken from PatternGrammar.txt. |
|
|
|
The compiler compiles a pattern to a pytree.*Pattern instance. |
|
""" |
|
|
|
__author__ = "Guido van Rossum <[email protected]>" |
|
|
|
|
|
import io |
|
|
|
|
|
from .pgen2 import driver, literals, token, tokenize, parse, grammar |
|
|
|
|
|
from . import pytree |
|
from . import pygram |
|
|
|
|
|
class PatternSyntaxError(Exception): |
|
pass |
|
|
|
|
|
def tokenize_wrapper(input): |
|
"""Tokenizes a string suppressing significant whitespace.""" |
|
skip = {token.NEWLINE, token.INDENT, token.DEDENT} |
|
tokens = tokenize.generate_tokens(io.StringIO(input).readline) |
|
for quintuple in tokens: |
|
type, value, start, end, line_text = quintuple |
|
if type not in skip: |
|
yield quintuple |
|
|
|
|
|
class PatternCompiler(object): |
|
|
|
def __init__(self, grammar_file=None): |
|
"""Initializer. |
|
|
|
Takes an optional alternative filename for the pattern grammar. |
|
""" |
|
if grammar_file is None: |
|
self.grammar = pygram.pattern_grammar |
|
self.syms = pygram.pattern_symbols |
|
else: |
|
self.grammar = driver.load_grammar(grammar_file) |
|
self.syms = pygram.Symbols(self.grammar) |
|
self.pygrammar = pygram.python_grammar |
|
self.pysyms = pygram.python_symbols |
|
self.driver = driver.Driver(self.grammar, convert=pattern_convert) |
|
|
|
def compile_pattern(self, input, debug=False, with_tree=False): |
|
"""Compiles a pattern string to a nested pytree.*Pattern object.""" |
|
tokens = tokenize_wrapper(input) |
|
try: |
|
root = self.driver.parse_tokens(tokens, debug=debug) |
|
except parse.ParseError as e: |
|
raise PatternSyntaxError(str(e)) from None |
|
if with_tree: |
|
return self.compile_node(root), root |
|
else: |
|
return self.compile_node(root) |
|
|
|
def compile_node(self, node): |
|
"""Compiles a node, recursively. |
|
|
|
This is one big switch on the node type. |
|
""" |
|
|
|
|
|
if node.type == self.syms.Matcher: |
|
node = node.children[0] |
|
|
|
if node.type == self.syms.Alternatives: |
|
|
|
alts = [self.compile_node(ch) for ch in node.children[::2]] |
|
if len(alts) == 1: |
|
return alts[0] |
|
p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1) |
|
return p.optimize() |
|
|
|
if node.type == self.syms.Alternative: |
|
units = [self.compile_node(ch) for ch in node.children] |
|
if len(units) == 1: |
|
return units[0] |
|
p = pytree.WildcardPattern([units], min=1, max=1) |
|
return p.optimize() |
|
|
|
if node.type == self.syms.NegatedUnit: |
|
pattern = self.compile_basic(node.children[1:]) |
|
p = pytree.NegatedPattern(pattern) |
|
return p.optimize() |
|
|
|
assert node.type == self.syms.Unit |
|
|
|
name = None |
|
nodes = node.children |
|
if len(nodes) >= 3 and nodes[1].type == token.EQUAL: |
|
name = nodes[0].value |
|
nodes = nodes[2:] |
|
repeat = None |
|
if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater: |
|
repeat = nodes[-1] |
|
nodes = nodes[:-1] |
|
|
|
|
|
pattern = self.compile_basic(nodes, repeat) |
|
|
|
if repeat is not None: |
|
assert repeat.type == self.syms.Repeater |
|
children = repeat.children |
|
child = children[0] |
|
if child.type == token.STAR: |
|
min = 0 |
|
max = pytree.HUGE |
|
elif child.type == token.PLUS: |
|
min = 1 |
|
max = pytree.HUGE |
|
elif child.type == token.LBRACE: |
|
assert children[-1].type == token.RBRACE |
|
assert len(children) in (3, 5) |
|
min = max = self.get_int(children[1]) |
|
if len(children) == 5: |
|
max = self.get_int(children[3]) |
|
else: |
|
assert False |
|
if min != 1 or max != 1: |
|
pattern = pattern.optimize() |
|
pattern = pytree.WildcardPattern([[pattern]], min=min, max=max) |
|
|
|
if name is not None: |
|
pattern.name = name |
|
return pattern.optimize() |
|
|
|
def compile_basic(self, nodes, repeat=None): |
|
|
|
assert len(nodes) >= 1 |
|
node = nodes[0] |
|
if node.type == token.STRING: |
|
value = str(literals.evalString(node.value)) |
|
return pytree.LeafPattern(_type_of_literal(value), value) |
|
elif node.type == token.NAME: |
|
value = node.value |
|
if value.isupper(): |
|
if value not in TOKEN_MAP: |
|
raise PatternSyntaxError("Invalid token: %r" % value) |
|
if nodes[1:]: |
|
raise PatternSyntaxError("Can't have details for token") |
|
return pytree.LeafPattern(TOKEN_MAP[value]) |
|
else: |
|
if value == "any": |
|
type = None |
|
elif not value.startswith("_"): |
|
type = getattr(self.pysyms, value, None) |
|
if type is None: |
|
raise PatternSyntaxError("Invalid symbol: %r" % value) |
|
if nodes[1:]: |
|
content = [self.compile_node(nodes[1].children[1])] |
|
else: |
|
content = None |
|
return pytree.NodePattern(type, content) |
|
elif node.value == "(": |
|
return self.compile_node(nodes[1]) |
|
elif node.value == "[": |
|
assert repeat is None |
|
subpattern = self.compile_node(nodes[1]) |
|
return pytree.WildcardPattern([[subpattern]], min=0, max=1) |
|
assert False, node |
|
|
|
def get_int(self, node): |
|
assert node.type == token.NUMBER |
|
return int(node.value) |
|
|
|
|
|
|
|
TOKEN_MAP = {"NAME": token.NAME, |
|
"STRING": token.STRING, |
|
"NUMBER": token.NUMBER, |
|
"TOKEN": None} |
|
|
|
|
|
def _type_of_literal(value): |
|
if value[0].isalpha(): |
|
return token.NAME |
|
elif value in grammar.opmap: |
|
return grammar.opmap[value] |
|
else: |
|
return None |
|
|
|
|
|
def pattern_convert(grammar, raw_node_info): |
|
"""Converts raw node information to a Node or Leaf instance.""" |
|
type, value, context, children = raw_node_info |
|
if children or type in grammar.number2symbol: |
|
return pytree.Node(type, children, context=context) |
|
else: |
|
return pytree.Leaf(type, value, context=context) |
|
|
|
|
|
def compile_pattern(pattern): |
|
return PatternCompiler().compile_pattern(pattern) |
|
|