Spaces:
Runtime error
Runtime error
# helpers.py | |
import html.entities | |
import re | |
import sys | |
import typing | |
from . import __diag__ | |
from .core import * | |
from .util import ( | |
_bslash, | |
_flatten, | |
_escape_regex_range_chars, | |
replaced_by_pep8, | |
) | |
# | |
# global helpers | |
# | |
def counted_array( | |
expr: ParserElement, | |
int_expr: typing.Optional[ParserElement] = None, | |
*, | |
intExpr: typing.Optional[ParserElement] = None, | |
) -> ParserElement: | |
"""Helper to define a counted list of expressions. | |
This helper defines a pattern of the form:: | |
integer expr expr expr... | |
where the leading integer tells how many expr expressions follow. | |
The matched tokens returns the array of expr tokens as a list - the | |
leading count token is suppressed. | |
If ``int_expr`` is specified, it should be a pyparsing expression | |
that produces an integer value. | |
Example:: | |
counted_array(Word(alphas)).parse_string('2 ab cd ef') # -> ['ab', 'cd'] | |
# in this parser, the leading integer value is given in binary, | |
# '10' indicating that 2 values are in the array | |
binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2)) | |
counted_array(Word(alphas), int_expr=binary_constant).parse_string('10 ab cd ef') # -> ['ab', 'cd'] | |
# if other fields must be parsed after the count but before the | |
# list items, give the fields results names and they will | |
# be preserved in the returned ParseResults: | |
count_with_metadata = integer + Word(alphas)("type") | |
typed_array = counted_array(Word(alphanums), int_expr=count_with_metadata)("items") | |
result = typed_array.parse_string("3 bool True True False") | |
print(result.dump()) | |
# prints | |
# ['True', 'True', 'False'] | |
# - items: ['True', 'True', 'False'] | |
# - type: 'bool' | |
""" | |
intExpr = intExpr or int_expr | |
array_expr = Forward() | |
def count_field_parse_action(s, l, t): | |
nonlocal array_expr | |
n = t[0] | |
array_expr <<= (expr * n) if n else Empty() | |
# clear list contents, but keep any named results | |
del t[:] | |
if intExpr is None: | |
intExpr = Word(nums).set_parse_action(lambda t: int(t[0])) | |
else: | |
intExpr = intExpr.copy() | |
intExpr.set_name("arrayLen") | |
intExpr.add_parse_action(count_field_parse_action, call_during_try=True) | |
return (intExpr + array_expr).set_name("(len) " + str(expr) + "...") | |
def match_previous_literal(expr: ParserElement) -> ParserElement: | |
"""Helper to define an expression that is indirectly defined from | |
the tokens matched in a previous expression, that is, it looks for | |
a 'repeat' of a previous expression. For example:: | |
first = Word(nums) | |
second = match_previous_literal(first) | |
match_expr = first + ":" + second | |
will match ``"1:1"``, but not ``"1:2"``. Because this | |
matches a previous literal, will also match the leading | |
``"1:1"`` in ``"1:10"``. If this is not desired, use | |
:class:`match_previous_expr`. Do *not* use with packrat parsing | |
enabled. | |
""" | |
rep = Forward() | |
def copy_token_to_repeater(s, l, t): | |
if t: | |
if len(t) == 1: | |
rep << t[0] | |
else: | |
# flatten t tokens | |
tflat = _flatten(t.as_list()) | |
rep << And(Literal(tt) for tt in tflat) | |
else: | |
rep << Empty() | |
expr.add_parse_action(copy_token_to_repeater, callDuringTry=True) | |
rep.set_name("(prev) " + str(expr)) | |
return rep | |
def match_previous_expr(expr: ParserElement) -> ParserElement: | |
"""Helper to define an expression that is indirectly defined from | |
the tokens matched in a previous expression, that is, it looks for | |
a 'repeat' of a previous expression. For example:: | |
first = Word(nums) | |
second = match_previous_expr(first) | |
match_expr = first + ":" + second | |
will match ``"1:1"``, but not ``"1:2"``. Because this | |
matches by expressions, will *not* match the leading ``"1:1"`` | |
in ``"1:10"``; the expressions are evaluated first, and then | |
compared, so ``"1"`` is compared with ``"10"``. Do *not* use | |
with packrat parsing enabled. | |
""" | |
rep = Forward() | |
e2 = expr.copy() | |
rep <<= e2 | |
def copy_token_to_repeater(s, l, t): | |
matchTokens = _flatten(t.as_list()) | |
def must_match_these_tokens(s, l, t): | |
theseTokens = _flatten(t.as_list()) | |
if theseTokens != matchTokens: | |
raise ParseException( | |
s, l, f"Expected {matchTokens}, found{theseTokens}" | |
) | |
rep.set_parse_action(must_match_these_tokens, callDuringTry=True) | |
expr.add_parse_action(copy_token_to_repeater, callDuringTry=True) | |
rep.set_name("(prev) " + str(expr)) | |
return rep | |
def one_of( | |
strs: Union[typing.Iterable[str], str], | |
caseless: bool = False, | |
use_regex: bool = True, | |
as_keyword: bool = False, | |
*, | |
useRegex: bool = True, | |
asKeyword: bool = False, | |
) -> ParserElement: | |
"""Helper to quickly define a set of alternative :class:`Literal` s, | |
and makes sure to do longest-first testing when there is a conflict, | |
regardless of the input order, but returns | |
a :class:`MatchFirst` for best performance. | |
Parameters: | |
- ``strs`` - a string of space-delimited literals, or a collection of | |
string literals | |
- ``caseless`` - treat all literals as caseless - (default= ``False``) | |
- ``use_regex`` - as an optimization, will | |
generate a :class:`Regex` object; otherwise, will generate | |
a :class:`MatchFirst` object (if ``caseless=True`` or ``as_keyword=True``, or if | |
creating a :class:`Regex` raises an exception) - (default= ``True``) | |
- ``as_keyword`` - enforce :class:`Keyword`-style matching on the | |
generated expressions - (default= ``False``) | |
- ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 compatibility, | |
but will be removed in a future release | |
Example:: | |
comp_oper = one_of("< = > <= >= !=") | |
var = Word(alphas) | |
number = Word(nums) | |
term = var | number | |
comparison_expr = term + comp_oper + term | |
print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12")) | |
prints:: | |
[['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] | |
""" | |
asKeyword = asKeyword or as_keyword | |
useRegex = useRegex and use_regex | |
if ( | |
isinstance(caseless, str_type) | |
and __diag__.warn_on_multiple_string_args_to_oneof | |
): | |
warnings.warn( | |
"More than one string argument passed to one_of, pass" | |
" choices as a list or space-delimited string", | |
stacklevel=2, | |
) | |
if caseless: | |
isequal = lambda a, b: a.upper() == b.upper() | |
masks = lambda a, b: b.upper().startswith(a.upper()) | |
parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral | |
else: | |
isequal = lambda a, b: a == b | |
masks = lambda a, b: b.startswith(a) | |
parseElementClass = Keyword if asKeyword else Literal | |
symbols: List[str] = [] | |
if isinstance(strs, str_type): | |
strs = typing.cast(str, strs) | |
symbols = strs.split() | |
elif isinstance(strs, Iterable): | |
symbols = list(strs) | |
else: | |
raise TypeError("Invalid argument to one_of, expected string or iterable") | |
if not symbols: | |
return NoMatch() | |
# reorder given symbols to take care to avoid masking longer choices with shorter ones | |
# (but only if the given symbols are not just single characters) | |
if any(len(sym) > 1 for sym in symbols): | |
i = 0 | |
while i < len(symbols) - 1: | |
cur = symbols[i] | |
for j, other in enumerate(symbols[i + 1 :]): | |
if isequal(other, cur): | |
del symbols[i + j + 1] | |
break | |
elif masks(cur, other): | |
del symbols[i + j + 1] | |
symbols.insert(i, other) | |
break | |
else: | |
i += 1 | |
if useRegex: | |
re_flags: int = re.IGNORECASE if caseless else 0 | |
try: | |
if all(len(sym) == 1 for sym in symbols): | |
# symbols are just single characters, create range regex pattern | |
patt = f"[{''.join(_escape_regex_range_chars(sym) for sym in symbols)}]" | |
else: | |
patt = "|".join(re.escape(sym) for sym in symbols) | |
# wrap with \b word break markers if defining as keywords | |
if asKeyword: | |
patt = rf"\b(?:{patt})\b" | |
ret = Regex(patt, flags=re_flags).set_name(" | ".join(symbols)) | |
if caseless: | |
# add parse action to return symbols as specified, not in random | |
# casing as found in input string | |
symbol_map = {sym.lower(): sym for sym in symbols} | |
ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()]) | |
return ret | |
except re.error: | |
warnings.warn( | |
"Exception creating Regex for one_of, building MatchFirst", stacklevel=2 | |
) | |
# last resort, just use MatchFirst | |
return MatchFirst(parseElementClass(sym) for sym in symbols).set_name( | |
" | ".join(symbols) | |
) | |
def dict_of(key: ParserElement, value: ParserElement) -> ParserElement: | |
"""Helper to easily and clearly define a dictionary by specifying | |
the respective patterns for the key and value. Takes care of | |
defining the :class:`Dict`, :class:`ZeroOrMore`, and | |
:class:`Group` tokens in the proper order. The key pattern | |
can include delimiting markers or punctuation, as long as they are | |
suppressed, thereby leaving the significant key text. The value | |
pattern can include named results, so that the :class:`Dict` results | |
can include named token fields. | |
Example:: | |
text = "shape: SQUARE posn: upper left color: light blue texture: burlap" | |
attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) | |
print(attr_expr[1, ...].parse_string(text).dump()) | |
attr_label = label | |
attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join) | |
# similar to Dict, but simpler call format | |
result = dict_of(attr_label, attr_value).parse_string(text) | |
print(result.dump()) | |
print(result['shape']) | |
print(result.shape) # object attribute access works too | |
print(result.as_dict()) | |
prints:: | |
[['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] | |
- color: 'light blue' | |
- posn: 'upper left' | |
- shape: 'SQUARE' | |
- texture: 'burlap' | |
SQUARE | |
SQUARE | |
{'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} | |
""" | |
return Dict(OneOrMore(Group(key + value))) | |
def original_text_for( | |
expr: ParserElement, as_string: bool = True, *, asString: bool = True | |
) -> ParserElement: | |
"""Helper to return the original, untokenized text for a given | |
expression. Useful to restore the parsed fields of an HTML start | |
tag into the raw tag text itself, or to revert separate tokens with | |
intervening whitespace back to the original matching input text. By | |
default, returns a string containing the original parsed text. | |
If the optional ``as_string`` argument is passed as | |
``False``, then the return value is | |
a :class:`ParseResults` containing any results names that | |
were originally matched, and a single token containing the original | |
matched text from the input string. So if the expression passed to | |
:class:`original_text_for` contains expressions with defined | |
results names, you must set ``as_string`` to ``False`` if you | |
want to preserve those results name values. | |
The ``asString`` pre-PEP8 argument is retained for compatibility, | |
but will be removed in a future release. | |
Example:: | |
src = "this is test <b> bold <i>text</i> </b> normal text " | |
for tag in ("b", "i"): | |
opener, closer = make_html_tags(tag) | |
patt = original_text_for(opener + ... + closer) | |
print(patt.search_string(src)[0]) | |
prints:: | |
['<b> bold <i>text</i> </b>'] | |
['<i>text</i>'] | |
""" | |
asString = asString and as_string | |
locMarker = Empty().set_parse_action(lambda s, loc, t: loc) | |
endlocMarker = locMarker.copy() | |
endlocMarker.callPreparse = False | |
matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") | |
if asString: | |
extractText = lambda s, l, t: s[t._original_start : t._original_end] | |
else: | |
def extractText(s, l, t): | |
t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]] | |
matchExpr.set_parse_action(extractText) | |
matchExpr.ignoreExprs = expr.ignoreExprs | |
matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection) | |
return matchExpr | |
def ungroup(expr: ParserElement) -> ParserElement: | |
"""Helper to undo pyparsing's default grouping of And expressions, | |
even if all but one are non-empty. | |
""" | |
return TokenConverter(expr).add_parse_action(lambda t: t[0]) | |
def locatedExpr(expr: ParserElement) -> ParserElement: | |
""" | |
(DEPRECATED - future code should use the :class:`Located` class) | |
Helper to decorate a returned token with its starting and ending | |
locations in the input string. | |
This helper adds the following results names: | |
- ``locn_start`` - location where matched expression begins | |
- ``locn_end`` - location where matched expression ends | |
- ``value`` - the actual parsed results | |
Be careful if the input text contains ``<TAB>`` characters, you | |
may want to call :class:`ParserElement.parse_with_tabs` | |
Example:: | |
wd = Word(alphas) | |
for match in locatedExpr(wd).search_string("ljsdf123lksdjjf123lkkjj1222"): | |
print(match) | |
prints:: | |
[[0, 'ljsdf', 5]] | |
[[8, 'lksdjjf', 15]] | |
[[18, 'lkkjj', 23]] | |
""" | |
locator = Empty().set_parse_action(lambda ss, ll, tt: ll) | |
return Group( | |
locator("locn_start") | |
+ expr("value") | |
+ locator.copy().leaveWhitespace()("locn_end") | |
) | |
def nested_expr( | |
opener: Union[str, ParserElement] = "(", | |
closer: Union[str, ParserElement] = ")", | |
content: typing.Optional[ParserElement] = None, | |
ignore_expr: ParserElement = quoted_string(), | |
*, | |
ignoreExpr: ParserElement = quoted_string(), | |
) -> ParserElement: | |
"""Helper method for defining nested lists enclosed in opening and | |
closing delimiters (``"("`` and ``")"`` are the default). | |
Parameters: | |
- ``opener`` - opening character for a nested list | |
(default= ``"("``); can also be a pyparsing expression | |
- ``closer`` - closing character for a nested list | |
(default= ``")"``); can also be a pyparsing expression | |
- ``content`` - expression for items within the nested lists | |
(default= ``None``) | |
- ``ignore_expr`` - expression for ignoring opening and closing delimiters | |
(default= :class:`quoted_string`) | |
- ``ignoreExpr`` - this pre-PEP8 argument is retained for compatibility | |
but will be removed in a future release | |
If an expression is not provided for the content argument, the | |
nested expression will capture all whitespace-delimited content | |
between delimiters as a list of separate values. | |
Use the ``ignore_expr`` argument to define expressions that may | |
contain opening or closing characters that should not be treated as | |
opening or closing characters for nesting, such as quoted_string or | |
a comment expression. Specify multiple expressions using an | |
:class:`Or` or :class:`MatchFirst`. The default is | |
:class:`quoted_string`, but if no expressions are to be ignored, then | |
pass ``None`` for this argument. | |
Example:: | |
data_type = one_of("void int short long char float double") | |
decl_data_type = Combine(data_type + Opt(Word('*'))) | |
ident = Word(alphas+'_', alphanums+'_') | |
number = pyparsing_common.number | |
arg = Group(decl_data_type + ident) | |
LPAR, RPAR = map(Suppress, "()") | |
code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment)) | |
c_function = (decl_data_type("type") | |
+ ident("name") | |
+ LPAR + Opt(DelimitedList(arg), [])("args") + RPAR | |
+ code_body("body")) | |
c_function.ignore(c_style_comment) | |
source_code = ''' | |
int is_odd(int x) { | |
return (x%2); | |
} | |
int dec_to_hex(char hchar) { | |
if (hchar >= '0' && hchar <= '9') { | |
return (ord(hchar)-ord('0')); | |
} else { | |
return (10+ord(hchar)-ord('A')); | |
} | |
} | |
''' | |
for func in c_function.search_string(source_code): | |
print("%(name)s (%(type)s) args: %(args)s" % func) | |
prints:: | |
is_odd (int) args: [['int', 'x']] | |
dec_to_hex (int) args: [['char', 'hchar']] | |
""" | |
if ignoreExpr != ignore_expr: | |
ignoreExpr = ignore_expr if ignoreExpr == quoted_string() else ignoreExpr | |
if opener == closer: | |
raise ValueError("opening and closing strings cannot be the same") | |
if content is None: | |
if isinstance(opener, str_type) and isinstance(closer, str_type): | |
opener = typing.cast(str, opener) | |
closer = typing.cast(str, closer) | |
if len(opener) == 1 and len(closer) == 1: | |
if ignoreExpr is not None: | |
content = Combine( | |
OneOrMore( | |
~ignoreExpr | |
+ CharsNotIn( | |
opener + closer + ParserElement.DEFAULT_WHITE_CHARS, | |
exact=1, | |
) | |
) | |
).set_parse_action(lambda t: t[0].strip()) | |
else: | |
content = empty.copy() + CharsNotIn( | |
opener + closer + ParserElement.DEFAULT_WHITE_CHARS | |
).set_parse_action(lambda t: t[0].strip()) | |
else: | |
if ignoreExpr is not None: | |
content = Combine( | |
OneOrMore( | |
~ignoreExpr | |
+ ~Literal(opener) | |
+ ~Literal(closer) | |
+ CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) | |
) | |
).set_parse_action(lambda t: t[0].strip()) | |
else: | |
content = Combine( | |
OneOrMore( | |
~Literal(opener) | |
+ ~Literal(closer) | |
+ CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) | |
) | |
).set_parse_action(lambda t: t[0].strip()) | |
else: | |
raise ValueError( | |
"opening and closing arguments must be strings if no content expression is given" | |
) | |
ret = Forward() | |
if ignoreExpr is not None: | |
ret <<= Group( | |
Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer) | |
) | |
else: | |
ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) | |
ret.set_name("nested %s%s expression" % (opener, closer)) | |
return ret | |
def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")): | |
"""Internal helper to construct opening and closing tag expressions, given a tag name""" | |
if isinstance(tagStr, str_type): | |
resname = tagStr | |
tagStr = Keyword(tagStr, caseless=not xml) | |
else: | |
resname = tagStr.name | |
tagAttrName = Word(alphas, alphanums + "_-:") | |
if xml: | |
tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes) | |
openTag = ( | |
suppress_LT | |
+ tagStr("tag") | |
+ Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue))) | |
+ Opt("/", default=[False])("empty").set_parse_action( | |
lambda s, l, t: t[0] == "/" | |
) | |
+ suppress_GT | |
) | |
else: | |
tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word( | |
printables, exclude_chars=">" | |
) | |
openTag = ( | |
suppress_LT | |
+ tagStr("tag") | |
+ Dict( | |
ZeroOrMore( | |
Group( | |
tagAttrName.set_parse_action(lambda t: t[0].lower()) | |
+ Opt(Suppress("=") + tagAttrValue) | |
) | |
) | |
) | |
+ Opt("/", default=[False])("empty").set_parse_action( | |
lambda s, l, t: t[0] == "/" | |
) | |
+ suppress_GT | |
) | |
closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False) | |
openTag.set_name("<%s>" % resname) | |
# add start<tagname> results name in parse action now that ungrouped names are not reported at two levels | |
openTag.add_parse_action( | |
lambda t: t.__setitem__( | |
"start" + "".join(resname.replace(":", " ").title().split()), t.copy() | |
) | |
) | |
closeTag = closeTag( | |
"end" + "".join(resname.replace(":", " ").title().split()) | |
).set_name("</%s>" % resname) | |
openTag.tag = resname | |
closeTag.tag = resname | |
openTag.tag_body = SkipTo(closeTag()) | |
return openTag, closeTag | |
def make_html_tags( | |
tag_str: Union[str, ParserElement] | |
) -> Tuple[ParserElement, ParserElement]: | |
"""Helper to construct opening and closing tag expressions for HTML, | |
given a tag name. Matches tags in either upper or lower case, | |
attributes with namespaces and with quoted or unquoted values. | |
Example:: | |
text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>' | |
# make_html_tags returns pyparsing expressions for the opening and | |
# closing tags as a 2-tuple | |
a, a_end = make_html_tags("A") | |
link_expr = a + SkipTo(a_end)("link_text") + a_end | |
for link in link_expr.search_string(text): | |
# attributes in the <A> tag (like "href" shown here) are | |
# also accessible as named results | |
print(link.link_text, '->', link.href) | |
prints:: | |
pyparsing -> https://github.com/pyparsing/pyparsing/wiki | |
""" | |
return _makeTags(tag_str, False) | |
def make_xml_tags( | |
tag_str: Union[str, ParserElement] | |
) -> Tuple[ParserElement, ParserElement]: | |
"""Helper to construct opening and closing tag expressions for XML, | |
given a tag name. Matches tags only in the given upper/lower case. | |
Example: similar to :class:`make_html_tags` | |
""" | |
return _makeTags(tag_str, True) | |
any_open_tag: ParserElement | |
any_close_tag: ParserElement | |
any_open_tag, any_close_tag = make_html_tags( | |
Word(alphas, alphanums + "_:").set_name("any tag") | |
) | |
_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()} | |
common_html_entity = Regex("&(?P<entity>" + "|".join(_htmlEntityMap) + ");").set_name( | |
"common HTML entity" | |
) | |
def replace_html_entity(s, l, t): | |
"""Helper parser action to replace common HTML entities with their special characters""" | |
return _htmlEntityMap.get(t.entity) | |
class OpAssoc(Enum): | |
"""Enumeration of operator associativity | |
- used in constructing InfixNotationOperatorSpec for :class:`infix_notation`""" | |
LEFT = 1 | |
RIGHT = 2 | |
InfixNotationOperatorArgType = Union[ | |
ParserElement, str, Tuple[Union[ParserElement, str], Union[ParserElement, str]] | |
] | |
InfixNotationOperatorSpec = Union[ | |
Tuple[ | |
InfixNotationOperatorArgType, | |
int, | |
OpAssoc, | |
typing.Optional[ParseAction], | |
], | |
Tuple[ | |
InfixNotationOperatorArgType, | |
int, | |
OpAssoc, | |
], | |
] | |
def infix_notation( | |
base_expr: ParserElement, | |
op_list: List[InfixNotationOperatorSpec], | |
lpar: Union[str, ParserElement] = Suppress("("), | |
rpar: Union[str, ParserElement] = Suppress(")"), | |
) -> ParserElement: | |
"""Helper method for constructing grammars of expressions made up of | |
operators working in a precedence hierarchy. Operators may be unary | |
or binary, left- or right-associative. Parse actions can also be | |
attached to operator expressions. The generated parser will also | |
recognize the use of parentheses to override operator precedences | |
(see example below). | |
Note: if you define a deep operator list, you may see performance | |
issues when using infix_notation. See | |
:class:`ParserElement.enable_packrat` for a mechanism to potentially | |
improve your parser performance. | |
Parameters: | |
- ``base_expr`` - expression representing the most basic operand to | |
be used in the expression | |
- ``op_list`` - list of tuples, one for each operator precedence level | |
in the expression grammar; each tuple is of the form ``(op_expr, | |
num_operands, right_left_assoc, (optional)parse_action)``, where: | |
- ``op_expr`` is the pyparsing expression for the operator; may also | |
be a string, which will be converted to a Literal; if ``num_operands`` | |
is 3, ``op_expr`` is a tuple of two expressions, for the two | |
operators separating the 3 terms | |
- ``num_operands`` is the number of terms for this operator (must be 1, | |
2, or 3) | |
- ``right_left_assoc`` is the indicator whether the operator is right | |
or left associative, using the pyparsing-defined constants | |
``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``. | |
- ``parse_action`` is the parse action to be associated with | |
expressions matching this operator expression (the parse action | |
tuple member may be omitted); if the parse action is passed | |
a tuple or list of functions, this is equivalent to calling | |
``set_parse_action(*fn)`` | |
(:class:`ParserElement.set_parse_action`) | |
- ``lpar`` - expression for matching left-parentheses; if passed as a | |
str, then will be parsed as ``Suppress(lpar)``. If lpar is passed as | |
an expression (such as ``Literal('(')``), then it will be kept in | |
the parsed results, and grouped with them. (default= ``Suppress('(')``) | |
- ``rpar`` - expression for matching right-parentheses; if passed as a | |
str, then will be parsed as ``Suppress(rpar)``. If rpar is passed as | |
an expression (such as ``Literal(')')``), then it will be kept in | |
the parsed results, and grouped with them. (default= ``Suppress(')')``) | |
Example:: | |
# simple example of four-function arithmetic with ints and | |
# variable names | |
integer = pyparsing_common.signed_integer | |
varname = pyparsing_common.identifier | |
arith_expr = infix_notation(integer | varname, | |
[ | |
('-', 1, OpAssoc.RIGHT), | |
(one_of('* /'), 2, OpAssoc.LEFT), | |
(one_of('+ -'), 2, OpAssoc.LEFT), | |
]) | |
arith_expr.run_tests(''' | |
5+3*6 | |
(5+3)*6 | |
-2--11 | |
''', full_dump=False) | |
prints:: | |
5+3*6 | |
[[5, '+', [3, '*', 6]]] | |
(5+3)*6 | |
[[[5, '+', 3], '*', 6]] | |
(5+x)*y | |
[[[5, '+', 'x'], '*', 'y']] | |
-2--11 | |
[[['-', 2], '-', ['-', 11]]] | |
""" | |
# captive version of FollowedBy that does not do parse actions or capture results names | |
class _FB(FollowedBy): | |
def parseImpl(self, instring, loc, doActions=True): | |
self.expr.try_parse(instring, loc) | |
return loc, [] | |
_FB.__name__ = "FollowedBy>" | |
ret = Forward() | |
if isinstance(lpar, str): | |
lpar = Suppress(lpar) | |
if isinstance(rpar, str): | |
rpar = Suppress(rpar) | |
# if lpar and rpar are not suppressed, wrap in group | |
if not (isinstance(rpar, Suppress) and isinstance(rpar, Suppress)): | |
lastExpr = base_expr | Group(lpar + ret + rpar) | |
else: | |
lastExpr = base_expr | (lpar + ret + rpar) | |
arity: int | |
rightLeftAssoc: opAssoc | |
pa: typing.Optional[ParseAction] | |
opExpr1: ParserElement | |
opExpr2: ParserElement | |
for i, operDef in enumerate(op_list): | |
opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] # type: ignore[assignment] | |
if isinstance(opExpr, str_type): | |
opExpr = ParserElement._literalStringClass(opExpr) | |
opExpr = typing.cast(ParserElement, opExpr) | |
if arity == 3: | |
if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2: | |
raise ValueError( | |
"if numterms=3, opExpr must be a tuple or list of two expressions" | |
) | |
opExpr1, opExpr2 = opExpr | |
term_name = f"{opExpr1}{opExpr2} term" | |
else: | |
term_name = f"{opExpr} term" | |
if not 1 <= arity <= 3: | |
raise ValueError("operator must be unary (1), binary (2), or ternary (3)") | |
if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT): | |
raise ValueError("operator must indicate right or left associativity") | |
thisExpr: ParserElement = Forward().set_name(term_name) | |
thisExpr = typing.cast(Forward, thisExpr) | |
if rightLeftAssoc is OpAssoc.LEFT: | |
if arity == 1: | |
matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + opExpr[1, ...]) | |
elif arity == 2: | |
if opExpr is not None: | |
matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group( | |
lastExpr + (opExpr + lastExpr)[1, ...] | |
) | |
else: | |
matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr[2, ...]) | |
elif arity == 3: | |
matchExpr = _FB( | |
lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr | |
) + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr)) | |
elif rightLeftAssoc is OpAssoc.RIGHT: | |
if arity == 1: | |
# try to avoid LR with this extra test | |
if not isinstance(opExpr, Opt): | |
opExpr = Opt(opExpr) | |
matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr) | |
elif arity == 2: | |
if opExpr is not None: | |
matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group( | |
lastExpr + (opExpr + thisExpr)[1, ...] | |
) | |
else: | |
matchExpr = _FB(lastExpr + thisExpr) + Group( | |
lastExpr + thisExpr[1, ...] | |
) | |
elif arity == 3: | |
matchExpr = _FB( | |
lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr | |
) + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) | |
if pa: | |
if isinstance(pa, (tuple, list)): | |
matchExpr.set_parse_action(*pa) | |
else: | |
matchExpr.set_parse_action(pa) | |
thisExpr <<= (matchExpr | lastExpr).setName(term_name) | |
lastExpr = thisExpr | |
ret <<= lastExpr | |
return ret | |
def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]): | |
""" | |
(DEPRECATED - use :class:`IndentedBlock` class instead) | |
Helper method for defining space-delimited indentation blocks, | |
such as those used to define block statements in Python source code. | |
Parameters: | |
- ``blockStatementExpr`` - expression defining syntax of statement that | |
is repeated within the indented block | |
- ``indentStack`` - list created by caller to manage indentation stack | |
(multiple ``statementWithIndentedBlock`` expressions within a single | |
grammar should share a common ``indentStack``) | |
- ``indent`` - boolean indicating whether block must be indented beyond | |
the current level; set to ``False`` for block of left-most statements | |
(default= ``True``) | |
A valid block must contain at least one ``blockStatement``. | |
(Note that indentedBlock uses internal parse actions which make it | |
incompatible with packrat parsing.) | |
Example:: | |
data = ''' | |
def A(z): | |
A1 | |
B = 100 | |
G = A2 | |
A2 | |
A3 | |
B | |
def BB(a,b,c): | |
BB1 | |
def BBA(): | |
bba1 | |
bba2 | |
bba3 | |
C | |
D | |
def spam(x,y): | |
def eggs(z): | |
pass | |
''' | |
indentStack = [1] | |
stmt = Forward() | |
identifier = Word(alphas, alphanums) | |
funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":") | |
func_body = indentedBlock(stmt, indentStack) | |
funcDef = Group(funcDecl + func_body) | |
rvalue = Forward() | |
funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")") | |
rvalue << (funcCall | identifier | Word(nums)) | |
assignment = Group(identifier + "=" + rvalue) | |
stmt << (funcDef | assignment | identifier) | |
module_body = stmt[1, ...] | |
parseTree = module_body.parseString(data) | |
parseTree.pprint() | |
prints:: | |
[['def', | |
'A', | |
['(', 'z', ')'], | |
':', | |
[['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], | |
'B', | |
['def', | |
'BB', | |
['(', 'a', 'b', 'c', ')'], | |
':', | |
[['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], | |
'C', | |
'D', | |
['def', | |
'spam', | |
['(', 'x', 'y', ')'], | |
':', | |
[[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] | |
""" | |
backup_stacks.append(indentStack[:]) | |
def reset_stack(): | |
indentStack[:] = backup_stacks[-1] | |
def checkPeerIndent(s, l, t): | |
if l >= len(s): | |
return | |
curCol = col(l, s) | |
if curCol != indentStack[-1]: | |
if curCol > indentStack[-1]: | |
raise ParseException(s, l, "illegal nesting") | |
raise ParseException(s, l, "not a peer entry") | |
def checkSubIndent(s, l, t): | |
curCol = col(l, s) | |
if curCol > indentStack[-1]: | |
indentStack.append(curCol) | |
else: | |
raise ParseException(s, l, "not a subentry") | |
def checkUnindent(s, l, t): | |
if l >= len(s): | |
return | |
curCol = col(l, s) | |
if not (indentStack and curCol in indentStack): | |
raise ParseException(s, l, "not an unindent") | |
if curCol < indentStack[-1]: | |
indentStack.pop() | |
NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress()) | |
INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT") | |
PEER = Empty().set_parse_action(checkPeerIndent).set_name("") | |
UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT") | |
if indent: | |
smExpr = Group( | |
Opt(NL) | |
+ INDENT | |
+ OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) | |
+ UNDENT | |
) | |
else: | |
smExpr = Group( | |
Opt(NL) | |
+ OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) | |
+ Opt(UNDENT) | |
) | |
# add a parse action to remove backup_stack from list of backups | |
smExpr.add_parse_action( | |
lambda: backup_stacks.pop(-1) and None if backup_stacks else None | |
) | |
smExpr.set_fail_action(lambda a, b, c, d: reset_stack()) | |
blockStatementExpr.ignore(_bslash + LineEnd()) | |
return smExpr.set_name("indented block") | |
# it's easy to get these comment structures wrong - they're very common, so may as well make them available | |
c_style_comment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/").set_name( | |
"C style comment" | |
) | |
"Comment of the form ``/* ... */``" | |
html_comment = Regex(r"<!--[\s\S]*?-->").set_name("HTML comment") | |
"Comment of the form ``<!-- ... -->``" | |
rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line") | |
dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment") | |
"Comment of the form ``// ... (to end of line)``" | |
cpp_style_comment = Combine( | |
Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dbl_slash_comment | |
).set_name("C++ style comment") | |
"Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`" | |
java_style_comment = cpp_style_comment | |
"Same as :class:`cpp_style_comment`" | |
python_style_comment = Regex(r"#.*").set_name("Python style comment") | |
"Comment of the form ``# ... (to end of line)``" | |
# build list of built-in expressions, for future reference if a global default value | |
# gets updated | |
_builtin_exprs: List[ParserElement] = [ | |
v for v in vars().values() if isinstance(v, ParserElement) | |
] | |
# compatibility function, superseded by DelimitedList class | |
def delimited_list( | |
expr: Union[str, ParserElement], | |
delim: Union[str, ParserElement] = ",", | |
combine: bool = False, | |
min: typing.Optional[int] = None, | |
max: typing.Optional[int] = None, | |
*, | |
allow_trailing_delim: bool = False, | |
) -> ParserElement: | |
"""(DEPRECATED - use :class:`DelimitedList` class)""" | |
return DelimitedList( | |
expr, delim, combine, min, max, allow_trailing_delim=allow_trailing_delim | |
) | |
# pre-PEP8 compatible names | |
# fmt: off | |
opAssoc = OpAssoc | |
anyOpenTag = any_open_tag | |
anyCloseTag = any_close_tag | |
commonHTMLEntity = common_html_entity | |
cStyleComment = c_style_comment | |
htmlComment = html_comment | |
restOfLine = rest_of_line | |
dblSlashComment = dbl_slash_comment | |
cppStyleComment = cpp_style_comment | |
javaStyleComment = java_style_comment | |
pythonStyleComment = python_style_comment | |
def delimitedList(): ... | |
def delimited_list(): ... | |
def countedArray(): ... | |
def matchPreviousLiteral(): ... | |
def matchPreviousExpr(): ... | |
def oneOf(): ... | |
def dictOf(): ... | |
def originalTextFor(): ... | |
def nestedExpr(): ... | |
def makeHTMLTags(): ... | |
def makeXMLTags(): ... | |
def replaceHTMLEntity(): ... | |
def infixNotation(): ... | |
# fmt: on | |