diff options
Diffstat (limited to 'env/lib/python3.10/site-packages/pyparsing/helpers.py')
-rw-r--r-- | env/lib/python3.10/site-packages/pyparsing/helpers.py | 1088 |
1 files changed, 0 insertions, 1088 deletions
diff --git a/env/lib/python3.10/site-packages/pyparsing/helpers.py b/env/lib/python3.10/site-packages/pyparsing/helpers.py deleted file mode 100644 index 9588b3b..0000000 --- a/env/lib/python3.10/site-packages/pyparsing/helpers.py +++ /dev/null @@ -1,1088 +0,0 @@ -# helpers.py -import html.entities -import re -import typing - -from . import __diag__ -from .core import * -from .util import _bslash, _flatten, _escape_regex_range_chars - - -# -# global helpers -# -def delimited_list( - expr: Union[str, ParserElement], - delim: Union[str, ParserElement] = ",", - combine: bool = False, - min: typing.Optional[int] = None, - max: typing.Optional[int] = None, - *, - allow_trailing_delim: bool = False, -) -> ParserElement: - """Helper to define a delimited list of expressions - the delimiter - defaults to ','. By default, the list elements and delimiters can - have intervening whitespace, and comments, but this can be - overridden by passing ``combine=True`` in the constructor. If - ``combine`` is set to ``True``, the matching tokens are - returned as a single token string, with the delimiters included; - otherwise, the matching tokens are returned as a list of tokens, - with the delimiters suppressed. - - If ``allow_trailing_delim`` is set to True, then the list may end with - a delimiter. - - Example:: - - delimited_list(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc'] - delimited_list(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] - """ - if isinstance(expr, str_type): - expr = ParserElement._literalStringClass(expr) - - dlName = "{expr} [{delim} {expr}]...{end}".format( - expr=str(expr.copy().streamline()), - delim=str(delim), - end=" [{}]".format(str(delim)) if allow_trailing_delim else "", - ) - - if not combine: - delim = Suppress(delim) - - if min is not None: - if min < 1: - raise ValueError("min must be greater than 0") - min -= 1 - if max is not None: - if min is not None and max <= min: - raise ValueError("max must be greater than, or equal to min") - max -= 1 - delimited_list_expr = expr + (delim + expr)[min, max] - - if allow_trailing_delim: - delimited_list_expr += Opt(delim) - - if combine: - return Combine(delimited_list_expr).set_name(dlName) - else: - return delimited_list_expr.set_name(dlName) - - -def counted_array( - expr: ParserElement, - int_expr: typing.Optional[ParserElement] = None, - *, - intExpr: typing.Optional[ParserElement] = None, -) -> ParserElement: - """Helper to define a counted list of expressions. - - This helper defines a pattern of the form:: - - integer expr expr expr... - - where the leading integer tells how many expr expressions follow. - The matched tokens returns the array of expr tokens as a list - the - leading count token is suppressed. - - If ``int_expr`` is specified, it should be a pyparsing expression - that produces an integer value. - - Example:: - - counted_array(Word(alphas)).parse_string('2 ab cd ef') # -> ['ab', 'cd'] - - # in this parser, the leading integer value is given in binary, - # '10' indicating that 2 values are in the array - binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2)) - counted_array(Word(alphas), int_expr=binary_constant).parse_string('10 ab cd ef') # -> ['ab', 'cd'] - - # if other fields must be parsed after the count but before the - # list items, give the fields results names and they will - # be preserved in the returned ParseResults: - count_with_metadata = integer + Word(alphas)("type") - typed_array = counted_array(Word(alphanums), int_expr=count_with_metadata)("items") - result = typed_array.parse_string("3 bool True True False") - print(result.dump()) - - # prints - # ['True', 'True', 'False'] - # - items: ['True', 'True', 'False'] - # - type: 'bool' - """ - intExpr = intExpr or int_expr - array_expr = Forward() - - def count_field_parse_action(s, l, t): - nonlocal array_expr - n = t[0] - array_expr <<= (expr * n) if n else Empty() - # clear list contents, but keep any named results - del t[:] - - if intExpr is None: - intExpr = Word(nums).set_parse_action(lambda t: int(t[0])) - else: - intExpr = intExpr.copy() - intExpr.set_name("arrayLen") - intExpr.add_parse_action(count_field_parse_action, call_during_try=True) - return (intExpr + array_expr).set_name("(len) " + str(expr) + "...") - - -def match_previous_literal(expr: ParserElement) -> ParserElement: - """Helper to define an expression that is indirectly defined from - the tokens matched in a previous expression, that is, it looks for - a 'repeat' of a previous expression. For example:: - - first = Word(nums) - second = match_previous_literal(first) - match_expr = first + ":" + second - - will match ``"1:1"``, but not ``"1:2"``. Because this - matches a previous literal, will also match the leading - ``"1:1"`` in ``"1:10"``. If this is not desired, use - :class:`match_previous_expr`. Do *not* use with packrat parsing - enabled. - """ - rep = Forward() - - def copy_token_to_repeater(s, l, t): - if t: - if len(t) == 1: - rep << t[0] - else: - # flatten t tokens - tflat = _flatten(t.as_list()) - rep << And(Literal(tt) for tt in tflat) - else: - rep << Empty() - - expr.add_parse_action(copy_token_to_repeater, callDuringTry=True) - rep.set_name("(prev) " + str(expr)) - return rep - - -def match_previous_expr(expr: ParserElement) -> ParserElement: - """Helper to define an expression that is indirectly defined from - the tokens matched in a previous expression, that is, it looks for - a 'repeat' of a previous expression. For example:: - - first = Word(nums) - second = match_previous_expr(first) - match_expr = first + ":" + second - - will match ``"1:1"``, but not ``"1:2"``. Because this - matches by expressions, will *not* match the leading ``"1:1"`` - in ``"1:10"``; the expressions are evaluated first, and then - compared, so ``"1"`` is compared with ``"10"``. Do *not* use - with packrat parsing enabled. - """ - rep = Forward() - e2 = expr.copy() - rep <<= e2 - - def copy_token_to_repeater(s, l, t): - matchTokens = _flatten(t.as_list()) - - def must_match_these_tokens(s, l, t): - theseTokens = _flatten(t.as_list()) - if theseTokens != matchTokens: - raise ParseException( - s, l, "Expected {}, found{}".format(matchTokens, theseTokens) - ) - - rep.set_parse_action(must_match_these_tokens, callDuringTry=True) - - expr.add_parse_action(copy_token_to_repeater, callDuringTry=True) - rep.set_name("(prev) " + str(expr)) - return rep - - -def one_of( - strs: Union[typing.Iterable[str], str], - caseless: bool = False, - use_regex: bool = True, - as_keyword: bool = False, - *, - useRegex: bool = True, - asKeyword: bool = False, -) -> ParserElement: - """Helper to quickly define a set of alternative :class:`Literal` s, - and makes sure to do longest-first testing when there is a conflict, - regardless of the input order, but returns - a :class:`MatchFirst` for best performance. - - Parameters: - - - ``strs`` - a string of space-delimited literals, or a collection of - string literals - - ``caseless`` - treat all literals as caseless - (default= ``False``) - - ``use_regex`` - as an optimization, will - generate a :class:`Regex` object; otherwise, will generate - a :class:`MatchFirst` object (if ``caseless=True`` or ``asKeyword=True``, or if - creating a :class:`Regex` raises an exception) - (default= ``True``) - - ``as_keyword`` - enforce :class:`Keyword`-style matching on the - generated expressions - (default= ``False``) - - ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 compatibility, - but will be removed in a future release - - Example:: - - comp_oper = one_of("< = > <= >= !=") - var = Word(alphas) - number = Word(nums) - term = var | number - comparison_expr = term + comp_oper + term - print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12")) - - prints:: - - [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] - """ - asKeyword = asKeyword or as_keyword - useRegex = useRegex and use_regex - - if ( - isinstance(caseless, str_type) - and __diag__.warn_on_multiple_string_args_to_oneof - ): - warnings.warn( - "More than one string argument passed to one_of, pass" - " choices as a list or space-delimited string", - stacklevel=2, - ) - - if caseless: - isequal = lambda a, b: a.upper() == b.upper() - masks = lambda a, b: b.upper().startswith(a.upper()) - parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral - else: - isequal = lambda a, b: a == b - masks = lambda a, b: b.startswith(a) - parseElementClass = Keyword if asKeyword else Literal - - symbols: List[str] = [] - if isinstance(strs, str_type): - symbols = strs.split() - elif isinstance(strs, Iterable): - symbols = list(strs) - else: - raise TypeError("Invalid argument to one_of, expected string or iterable") - if not symbols: - return NoMatch() - - # reorder given symbols to take care to avoid masking longer choices with shorter ones - # (but only if the given symbols are not just single characters) - if any(len(sym) > 1 for sym in symbols): - i = 0 - while i < len(symbols) - 1: - cur = symbols[i] - for j, other in enumerate(symbols[i + 1 :]): - if isequal(other, cur): - del symbols[i + j + 1] - break - elif masks(cur, other): - del symbols[i + j + 1] - symbols.insert(i, other) - break - else: - i += 1 - - if useRegex: - re_flags: int = re.IGNORECASE if caseless else 0 - - try: - if all(len(sym) == 1 for sym in symbols): - # symbols are just single characters, create range regex pattern - patt = "[{}]".format( - "".join(_escape_regex_range_chars(sym) for sym in symbols) - ) - else: - patt = "|".join(re.escape(sym) for sym in symbols) - - # wrap with \b word break markers if defining as keywords - if asKeyword: - patt = r"\b(?:{})\b".format(patt) - - ret = Regex(patt, flags=re_flags).set_name(" | ".join(symbols)) - - if caseless: - # add parse action to return symbols as specified, not in random - # casing as found in input string - symbol_map = {sym.lower(): sym for sym in symbols} - ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()]) - - return ret - - except re.error: - warnings.warn( - "Exception creating Regex for one_of, building MatchFirst", stacklevel=2 - ) - - # last resort, just use MatchFirst - return MatchFirst(parseElementClass(sym) for sym in symbols).set_name( - " | ".join(symbols) - ) - - -def dict_of(key: ParserElement, value: ParserElement) -> ParserElement: - """Helper to easily and clearly define a dictionary by specifying - the respective patterns for the key and value. Takes care of - defining the :class:`Dict`, :class:`ZeroOrMore`, and - :class:`Group` tokens in the proper order. The key pattern - can include delimiting markers or punctuation, as long as they are - suppressed, thereby leaving the significant key text. The value - pattern can include named results, so that the :class:`Dict` results - can include named token fields. - - Example:: - - text = "shape: SQUARE posn: upper left color: light blue texture: burlap" - attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) - print(attr_expr[1, ...].parse_string(text).dump()) - - attr_label = label - attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join) - - # similar to Dict, but simpler call format - result = dict_of(attr_label, attr_value).parse_string(text) - print(result.dump()) - print(result['shape']) - print(result.shape) # object attribute access works too - print(result.as_dict()) - - prints:: - - [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] - - color: 'light blue' - - posn: 'upper left' - - shape: 'SQUARE' - - texture: 'burlap' - SQUARE - SQUARE - {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} - """ - return Dict(OneOrMore(Group(key + value))) - - -def original_text_for( - expr: ParserElement, as_string: bool = True, *, asString: bool = True -) -> ParserElement: - """Helper to return the original, untokenized text for a given - expression. Useful to restore the parsed fields of an HTML start - tag into the raw tag text itself, or to revert separate tokens with - intervening whitespace back to the original matching input text. By - default, returns astring containing the original parsed text. - - If the optional ``as_string`` argument is passed as - ``False``, then the return value is - a :class:`ParseResults` containing any results names that - were originally matched, and a single token containing the original - matched text from the input string. So if the expression passed to - :class:`original_text_for` contains expressions with defined - results names, you must set ``as_string`` to ``False`` if you - want to preserve those results name values. - - The ``asString`` pre-PEP8 argument is retained for compatibility, - but will be removed in a future release. - - Example:: - - src = "this is test <b> bold <i>text</i> </b> normal text " - for tag in ("b", "i"): - opener, closer = make_html_tags(tag) - patt = original_text_for(opener + SkipTo(closer) + closer) - print(patt.search_string(src)[0]) - - prints:: - - ['<b> bold <i>text</i> </b>'] - ['<i>text</i>'] - """ - asString = asString and as_string - - locMarker = Empty().set_parse_action(lambda s, loc, t: loc) - endlocMarker = locMarker.copy() - endlocMarker.callPreparse = False - matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") - if asString: - extractText = lambda s, l, t: s[t._original_start : t._original_end] - else: - - def extractText(s, l, t): - t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]] - - matchExpr.set_parse_action(extractText) - matchExpr.ignoreExprs = expr.ignoreExprs - matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection) - return matchExpr - - -def ungroup(expr: ParserElement) -> ParserElement: - """Helper to undo pyparsing's default grouping of And expressions, - even if all but one are non-empty. - """ - return TokenConverter(expr).add_parse_action(lambda t: t[0]) - - -def locatedExpr(expr: ParserElement) -> ParserElement: - """ - (DEPRECATED - future code should use the Located class) - Helper to decorate a returned token with its starting and ending - locations in the input string. - - This helper adds the following results names: - - - ``locn_start`` - location where matched expression begins - - ``locn_end`` - location where matched expression ends - - ``value`` - the actual parsed results - - Be careful if the input text contains ``<TAB>`` characters, you - may want to call :class:`ParserElement.parseWithTabs` - - Example:: - - wd = Word(alphas) - for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"): - print(match) - - prints:: - - [[0, 'ljsdf', 5]] - [[8, 'lksdjjf', 15]] - [[18, 'lkkjj', 23]] - """ - locator = Empty().set_parse_action(lambda ss, ll, tt: ll) - return Group( - locator("locn_start") - + expr("value") - + locator.copy().leaveWhitespace()("locn_end") - ) - - -def nested_expr( - opener: Union[str, ParserElement] = "(", - closer: Union[str, ParserElement] = ")", - content: typing.Optional[ParserElement] = None, - ignore_expr: ParserElement = quoted_string(), - *, - ignoreExpr: ParserElement = quoted_string(), -) -> ParserElement: - """Helper method for defining nested lists enclosed in opening and - closing delimiters (``"("`` and ``")"`` are the default). - - Parameters: - - ``opener`` - opening character for a nested list - (default= ``"("``); can also be a pyparsing expression - - ``closer`` - closing character for a nested list - (default= ``")"``); can also be a pyparsing expression - - ``content`` - expression for items within the nested lists - (default= ``None``) - - ``ignore_expr`` - expression for ignoring opening and closing delimiters - (default= :class:`quoted_string`) - - ``ignoreExpr`` - this pre-PEP8 argument is retained for compatibility - but will be removed in a future release - - If an expression is not provided for the content argument, the - nested expression will capture all whitespace-delimited content - between delimiters as a list of separate values. - - Use the ``ignore_expr`` argument to define expressions that may - contain opening or closing characters that should not be treated as - opening or closing characters for nesting, such as quoted_string or - a comment expression. Specify multiple expressions using an - :class:`Or` or :class:`MatchFirst`. The default is - :class:`quoted_string`, but if no expressions are to be ignored, then - pass ``None`` for this argument. - - Example:: - - data_type = one_of("void int short long char float double") - decl_data_type = Combine(data_type + Opt(Word('*'))) - ident = Word(alphas+'_', alphanums+'_') - number = pyparsing_common.number - arg = Group(decl_data_type + ident) - LPAR, RPAR = map(Suppress, "()") - - code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment)) - - c_function = (decl_data_type("type") - + ident("name") - + LPAR + Opt(delimited_list(arg), [])("args") + RPAR - + code_body("body")) - c_function.ignore(c_style_comment) - - source_code = ''' - int is_odd(int x) { - return (x%2); - } - - int dec_to_hex(char hchar) { - if (hchar >= '0' && hchar <= '9') { - return (ord(hchar)-ord('0')); - } else { - return (10+ord(hchar)-ord('A')); - } - } - ''' - for func in c_function.search_string(source_code): - print("%(name)s (%(type)s) args: %(args)s" % func) - - - prints:: - - is_odd (int) args: [['int', 'x']] - dec_to_hex (int) args: [['char', 'hchar']] - """ - if ignoreExpr != ignore_expr: - ignoreExpr = ignore_expr if ignoreExpr == quoted_string() else ignoreExpr - if opener == closer: - raise ValueError("opening and closing strings cannot be the same") - if content is None: - if isinstance(opener, str_type) and isinstance(closer, str_type): - if len(opener) == 1 and len(closer) == 1: - if ignoreExpr is not None: - content = Combine( - OneOrMore( - ~ignoreExpr - + CharsNotIn( - opener + closer + ParserElement.DEFAULT_WHITE_CHARS, - exact=1, - ) - ) - ).set_parse_action(lambda t: t[0].strip()) - else: - content = empty.copy() + CharsNotIn( - opener + closer + ParserElement.DEFAULT_WHITE_CHARS - ).set_parse_action(lambda t: t[0].strip()) - else: - if ignoreExpr is not None: - content = Combine( - OneOrMore( - ~ignoreExpr - + ~Literal(opener) - + ~Literal(closer) - + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) - ) - ).set_parse_action(lambda t: t[0].strip()) - else: - content = Combine( - OneOrMore( - ~Literal(opener) - + ~Literal(closer) - + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) - ) - ).set_parse_action(lambda t: t[0].strip()) - else: - raise ValueError( - "opening and closing arguments must be strings if no content expression is given" - ) - ret = Forward() - if ignoreExpr is not None: - ret <<= Group( - Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer) - ) - else: - ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) - ret.set_name("nested %s%s expression" % (opener, closer)) - return ret - - -def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")): - """Internal helper to construct opening and closing tag expressions, given a tag name""" - if isinstance(tagStr, str_type): - resname = tagStr - tagStr = Keyword(tagStr, caseless=not xml) - else: - resname = tagStr.name - - tagAttrName = Word(alphas, alphanums + "_-:") - if xml: - tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes) - openTag = ( - suppress_LT - + tagStr("tag") - + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue))) - + Opt("/", default=[False])("empty").set_parse_action( - lambda s, l, t: t[0] == "/" - ) - + suppress_GT - ) - else: - tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word( - printables, exclude_chars=">" - ) - openTag = ( - suppress_LT - + tagStr("tag") - + Dict( - ZeroOrMore( - Group( - tagAttrName.set_parse_action(lambda t: t[0].lower()) - + Opt(Suppress("=") + tagAttrValue) - ) - ) - ) - + Opt("/", default=[False])("empty").set_parse_action( - lambda s, l, t: t[0] == "/" - ) - + suppress_GT - ) - closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False) - - openTag.set_name("<%s>" % resname) - # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels - openTag.add_parse_action( - lambda t: t.__setitem__( - "start" + "".join(resname.replace(":", " ").title().split()), t.copy() - ) - ) - closeTag = closeTag( - "end" + "".join(resname.replace(":", " ").title().split()) - ).set_name("</%s>" % resname) - openTag.tag = resname - closeTag.tag = resname - openTag.tag_body = SkipTo(closeTag()) - return openTag, closeTag - - -def make_html_tags( - tag_str: Union[str, ParserElement] -) -> Tuple[ParserElement, ParserElement]: - """Helper to construct opening and closing tag expressions for HTML, - given a tag name. Matches tags in either upper or lower case, - attributes with namespaces and with quoted or unquoted values. - - Example:: - - text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>' - # make_html_tags returns pyparsing expressions for the opening and - # closing tags as a 2-tuple - a, a_end = make_html_tags("A") - link_expr = a + SkipTo(a_end)("link_text") + a_end - - for link in link_expr.search_string(text): - # attributes in the <A> tag (like "href" shown here) are - # also accessible as named results - print(link.link_text, '->', link.href) - - prints:: - - pyparsing -> https://github.com/pyparsing/pyparsing/wiki - """ - return _makeTags(tag_str, False) - - -def make_xml_tags( - tag_str: Union[str, ParserElement] -) -> Tuple[ParserElement, ParserElement]: - """Helper to construct opening and closing tag expressions for XML, - given a tag name. Matches tags only in the given upper/lower case. - - Example: similar to :class:`make_html_tags` - """ - return _makeTags(tag_str, True) - - -any_open_tag: ParserElement -any_close_tag: ParserElement -any_open_tag, any_close_tag = make_html_tags( - Word(alphas, alphanums + "_:").set_name("any tag") -) - -_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()} -common_html_entity = Regex("&(?P<entity>" + "|".join(_htmlEntityMap) + ");").set_name( - "common HTML entity" -) - - -def replace_html_entity(t): - """Helper parser action to replace common HTML entities with their special characters""" - return _htmlEntityMap.get(t.entity) - - -class OpAssoc(Enum): - LEFT = 1 - RIGHT = 2 - - -InfixNotationOperatorArgType = Union[ - ParserElement, str, Tuple[Union[ParserElement, str], Union[ParserElement, str]] -] -InfixNotationOperatorSpec = Union[ - Tuple[ - InfixNotationOperatorArgType, - int, - OpAssoc, - typing.Optional[ParseAction], - ], - Tuple[ - InfixNotationOperatorArgType, - int, - OpAssoc, - ], -] - - -def infix_notation( - base_expr: ParserElement, - op_list: List[InfixNotationOperatorSpec], - lpar: Union[str, ParserElement] = Suppress("("), - rpar: Union[str, ParserElement] = Suppress(")"), -) -> ParserElement: - """Helper method for constructing grammars of expressions made up of - operators working in a precedence hierarchy. Operators may be unary - or binary, left- or right-associative. Parse actions can also be - attached to operator expressions. The generated parser will also - recognize the use of parentheses to override operator precedences - (see example below). - - Note: if you define a deep operator list, you may see performance - issues when using infix_notation. See - :class:`ParserElement.enable_packrat` for a mechanism to potentially - improve your parser performance. - - Parameters: - - ``base_expr`` - expression representing the most basic operand to - be used in the expression - - ``op_list`` - list of tuples, one for each operator precedence level - in the expression grammar; each tuple is of the form ``(op_expr, - num_operands, right_left_assoc, (optional)parse_action)``, where: - - - ``op_expr`` is the pyparsing expression for the operator; may also - be a string, which will be converted to a Literal; if ``num_operands`` - is 3, ``op_expr`` is a tuple of two expressions, for the two - operators separating the 3 terms - - ``num_operands`` is the number of terms for this operator (must be 1, - 2, or 3) - - ``right_left_assoc`` is the indicator whether the operator is right - or left associative, using the pyparsing-defined constants - ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``. - - ``parse_action`` is the parse action to be associated with - expressions matching this operator expression (the parse action - tuple member may be omitted); if the parse action is passed - a tuple or list of functions, this is equivalent to calling - ``set_parse_action(*fn)`` - (:class:`ParserElement.set_parse_action`) - - ``lpar`` - expression for matching left-parentheses; if passed as a - str, then will be parsed as Suppress(lpar). If lpar is passed as - an expression (such as ``Literal('(')``), then it will be kept in - the parsed results, and grouped with them. (default= ``Suppress('(')``) - - ``rpar`` - expression for matching right-parentheses; if passed as a - str, then will be parsed as Suppress(rpar). If rpar is passed as - an expression (such as ``Literal(')')``), then it will be kept in - the parsed results, and grouped with them. (default= ``Suppress(')')``) - - Example:: - - # simple example of four-function arithmetic with ints and - # variable names - integer = pyparsing_common.signed_integer - varname = pyparsing_common.identifier - - arith_expr = infix_notation(integer | varname, - [ - ('-', 1, OpAssoc.RIGHT), - (one_of('* /'), 2, OpAssoc.LEFT), - (one_of('+ -'), 2, OpAssoc.LEFT), - ]) - - arith_expr.run_tests(''' - 5+3*6 - (5+3)*6 - -2--11 - ''', full_dump=False) - - prints:: - - 5+3*6 - [[5, '+', [3, '*', 6]]] - - (5+3)*6 - [[[5, '+', 3], '*', 6]] - - -2--11 - [[['-', 2], '-', ['-', 11]]] - """ - # captive version of FollowedBy that does not do parse actions or capture results names - class _FB(FollowedBy): - def parseImpl(self, instring, loc, doActions=True): - self.expr.try_parse(instring, loc) - return loc, [] - - _FB.__name__ = "FollowedBy>" - - ret = Forward() - if isinstance(lpar, str): - lpar = Suppress(lpar) - if isinstance(rpar, str): - rpar = Suppress(rpar) - - # if lpar and rpar are not suppressed, wrap in group - if not (isinstance(rpar, Suppress) and isinstance(rpar, Suppress)): - lastExpr = base_expr | Group(lpar + ret + rpar) - else: - lastExpr = base_expr | (lpar + ret + rpar) - - for i, operDef in enumerate(op_list): - opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] - if isinstance(opExpr, str_type): - opExpr = ParserElement._literalStringClass(opExpr) - if arity == 3: - if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2: - raise ValueError( - "if numterms=3, opExpr must be a tuple or list of two expressions" - ) - opExpr1, opExpr2 = opExpr - term_name = "{}{} term".format(opExpr1, opExpr2) - else: - term_name = "{} term".format(opExpr) - - if not 1 <= arity <= 3: - raise ValueError("operator must be unary (1), binary (2), or ternary (3)") - - if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT): - raise ValueError("operator must indicate right or left associativity") - - thisExpr: Forward = Forward().set_name(term_name) - if rightLeftAssoc is OpAssoc.LEFT: - if arity == 1: - matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + opExpr[1, ...]) - elif arity == 2: - if opExpr is not None: - matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group( - lastExpr + (opExpr + lastExpr)[1, ...] - ) - else: - matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr[2, ...]) - elif arity == 3: - matchExpr = _FB( - lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr - ) + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr)) - elif rightLeftAssoc is OpAssoc.RIGHT: - if arity == 1: - # try to avoid LR with this extra test - if not isinstance(opExpr, Opt): - opExpr = Opt(opExpr) - matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr) - elif arity == 2: - if opExpr is not None: - matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group( - lastExpr + (opExpr + thisExpr)[1, ...] - ) - else: - matchExpr = _FB(lastExpr + thisExpr) + Group( - lastExpr + thisExpr[1, ...] - ) - elif arity == 3: - matchExpr = _FB( - lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr - ) + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) - if pa: - if isinstance(pa, (tuple, list)): - matchExpr.set_parse_action(*pa) - else: - matchExpr.set_parse_action(pa) - thisExpr <<= (matchExpr | lastExpr).setName(term_name) - lastExpr = thisExpr - ret <<= lastExpr - return ret - - -def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]): - """ - (DEPRECATED - use IndentedBlock class instead) - Helper method for defining space-delimited indentation blocks, - such as those used to define block statements in Python source code. - - Parameters: - - - ``blockStatementExpr`` - expression defining syntax of statement that - is repeated within the indented block - - ``indentStack`` - list created by caller to manage indentation stack - (multiple ``statementWithIndentedBlock`` expressions within a single - grammar should share a common ``indentStack``) - - ``indent`` - boolean indicating whether block must be indented beyond - the current level; set to ``False`` for block of left-most statements - (default= ``True``) - - A valid block must contain at least one ``blockStatement``. - - (Note that indentedBlock uses internal parse actions which make it - incompatible with packrat parsing.) - - Example:: - - data = ''' - def A(z): - A1 - B = 100 - G = A2 - A2 - A3 - B - def BB(a,b,c): - BB1 - def BBA(): - bba1 - bba2 - bba3 - C - D - def spam(x,y): - def eggs(z): - pass - ''' - - - indentStack = [1] - stmt = Forward() - - identifier = Word(alphas, alphanums) - funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":") - func_body = indentedBlock(stmt, indentStack) - funcDef = Group(funcDecl + func_body) - - rvalue = Forward() - funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")") - rvalue << (funcCall | identifier | Word(nums)) - assignment = Group(identifier + "=" + rvalue) - stmt << (funcDef | assignment | identifier) - - module_body = stmt[1, ...] - - parseTree = module_body.parseString(data) - parseTree.pprint() - - prints:: - - [['def', - 'A', - ['(', 'z', ')'], - ':', - [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], - 'B', - ['def', - 'BB', - ['(', 'a', 'b', 'c', ')'], - ':', - [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], - 'C', - 'D', - ['def', - 'spam', - ['(', 'x', 'y', ')'], - ':', - [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] - """ - backup_stacks.append(indentStack[:]) - - def reset_stack(): - indentStack[:] = backup_stacks[-1] - - def checkPeerIndent(s, l, t): - if l >= len(s): - return - curCol = col(l, s) - if curCol != indentStack[-1]: - if curCol > indentStack[-1]: - raise ParseException(s, l, "illegal nesting") - raise ParseException(s, l, "not a peer entry") - - def checkSubIndent(s, l, t): - curCol = col(l, s) - if curCol > indentStack[-1]: - indentStack.append(curCol) - else: - raise ParseException(s, l, "not a subentry") - - def checkUnindent(s, l, t): - if l >= len(s): - return - curCol = col(l, s) - if not (indentStack and curCol in indentStack): - raise ParseException(s, l, "not an unindent") - if curCol < indentStack[-1]: - indentStack.pop() - - NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress()) - INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT") - PEER = Empty().set_parse_action(checkPeerIndent).set_name("") - UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT") - if indent: - smExpr = Group( - Opt(NL) - + INDENT - + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) - + UNDENT - ) - else: - smExpr = Group( - Opt(NL) - + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) - + Opt(UNDENT) - ) - - # add a parse action to remove backup_stack from list of backups - smExpr.add_parse_action( - lambda: backup_stacks.pop(-1) and None if backup_stacks else None - ) - smExpr.set_fail_action(lambda a, b, c, d: reset_stack()) - blockStatementExpr.ignore(_bslash + LineEnd()) - return smExpr.set_name("indented block") - - -# it's easy to get these comment structures wrong - they're very common, so may as well make them available -c_style_comment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/").set_name( - "C style comment" -) -"Comment of the form ``/* ... */``" - -html_comment = Regex(r"<!--[\s\S]*?-->").set_name("HTML comment") -"Comment of the form ``<!-- ... -->``" - -rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line") -dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment") -"Comment of the form ``// ... (to end of line)``" - -cpp_style_comment = Combine( - Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dbl_slash_comment -).set_name("C++ style comment") -"Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`" - -java_style_comment = cpp_style_comment -"Same as :class:`cpp_style_comment`" - -python_style_comment = Regex(r"#.*").set_name("Python style comment") -"Comment of the form ``# ... (to end of line)``" - - -# build list of built-in expressions, for future reference if a global default value -# gets updated -_builtin_exprs: List[ParserElement] = [ - v for v in vars().values() if isinstance(v, ParserElement) -] - - -# pre-PEP8 compatible names -delimitedList = delimited_list -countedArray = counted_array -matchPreviousLiteral = match_previous_literal -matchPreviousExpr = match_previous_expr -oneOf = one_of -dictOf = dict_of -originalTextFor = original_text_for -nestedExpr = nested_expr -makeHTMLTags = make_html_tags -makeXMLTags = make_xml_tags -anyOpenTag, anyCloseTag = any_open_tag, any_close_tag -commonHTMLEntity = common_html_entity -replaceHTMLEntity = replace_html_entity -opAssoc = OpAssoc -infixNotation = infix_notation -cStyleComment = c_style_comment -htmlComment = html_comment -restOfLine = rest_of_line -dblSlashComment = dbl_slash_comment -cppStyleComment = cpp_style_comment -javaStyleComment = java_style_comment -pythonStyleComment = python_style_comment |