aboutsummaryrefslogtreecommitdiffstats
path: root/env/lib/python3.10/site-packages/pyparsing/helpers.py
diff options
context:
space:
mode:
Diffstat (limited to 'env/lib/python3.10/site-packages/pyparsing/helpers.py')
-rw-r--r--env/lib/python3.10/site-packages/pyparsing/helpers.py1088
1 files changed, 1088 insertions, 0 deletions
diff --git a/env/lib/python3.10/site-packages/pyparsing/helpers.py b/env/lib/python3.10/site-packages/pyparsing/helpers.py
new file mode 100644
index 0000000..9588b3b
--- /dev/null
+++ b/env/lib/python3.10/site-packages/pyparsing/helpers.py
@@ -0,0 +1,1088 @@
+# helpers.py
+import html.entities
+import re
+import typing
+
+from . import __diag__
+from .core import *
+from .util import _bslash, _flatten, _escape_regex_range_chars
+
+
+#
+# global helpers
+#
+def delimited_list(
+ expr: Union[str, ParserElement],
+ delim: Union[str, ParserElement] = ",",
+ combine: bool = False,
+ min: typing.Optional[int] = None,
+ max: typing.Optional[int] = None,
+ *,
+ allow_trailing_delim: bool = False,
+) -> ParserElement:
+ """Helper to define a delimited list of expressions - the delimiter
+ defaults to ','. By default, the list elements and delimiters can
+ have intervening whitespace, and comments, but this can be
+ overridden by passing ``combine=True`` in the constructor. If
+ ``combine`` is set to ``True``, the matching tokens are
+ returned as a single token string, with the delimiters included;
+ otherwise, the matching tokens are returned as a list of tokens,
+ with the delimiters suppressed.
+
+ If ``allow_trailing_delim`` is set to True, then the list may end with
+ a delimiter.
+
+ Example::
+
+ delimited_list(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc']
+ delimited_list(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
+ """
+ if isinstance(expr, str_type):
+ expr = ParserElement._literalStringClass(expr)
+
+ dlName = "{expr} [{delim} {expr}]...{end}".format(
+ expr=str(expr.copy().streamline()),
+ delim=str(delim),
+ end=" [{}]".format(str(delim)) if allow_trailing_delim else "",
+ )
+
+ if not combine:
+ delim = Suppress(delim)
+
+ if min is not None:
+ if min < 1:
+ raise ValueError("min must be greater than 0")
+ min -= 1
+ if max is not None:
+ if min is not None and max <= min:
+ raise ValueError("max must be greater than, or equal to min")
+ max -= 1
+ delimited_list_expr = expr + (delim + expr)[min, max]
+
+ if allow_trailing_delim:
+ delimited_list_expr += Opt(delim)
+
+ if combine:
+ return Combine(delimited_list_expr).set_name(dlName)
+ else:
+ return delimited_list_expr.set_name(dlName)
+
+
+def counted_array(
+ expr: ParserElement,
+ int_expr: typing.Optional[ParserElement] = None,
+ *,
+ intExpr: typing.Optional[ParserElement] = None,
+) -> ParserElement:
+ """Helper to define a counted list of expressions.
+
+ This helper defines a pattern of the form::
+
+ integer expr expr expr...
+
+ where the leading integer tells how many expr expressions follow.
+ The matched tokens returns the array of expr tokens as a list - the
+ leading count token is suppressed.
+
+ If ``int_expr`` is specified, it should be a pyparsing expression
+ that produces an integer value.
+
+ Example::
+
+ counted_array(Word(alphas)).parse_string('2 ab cd ef') # -> ['ab', 'cd']
+
+ # in this parser, the leading integer value is given in binary,
+ # '10' indicating that 2 values are in the array
+ binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2))
+ counted_array(Word(alphas), int_expr=binary_constant).parse_string('10 ab cd ef') # -> ['ab', 'cd']
+
+ # if other fields must be parsed after the count but before the
+ # list items, give the fields results names and they will
+ # be preserved in the returned ParseResults:
+ count_with_metadata = integer + Word(alphas)("type")
+ typed_array = counted_array(Word(alphanums), int_expr=count_with_metadata)("items")
+ result = typed_array.parse_string("3 bool True True False")
+ print(result.dump())
+
+ # prints
+ # ['True', 'True', 'False']
+ # - items: ['True', 'True', 'False']
+ # - type: 'bool'
+ """
+ intExpr = intExpr or int_expr
+ array_expr = Forward()
+
+ def count_field_parse_action(s, l, t):
+ nonlocal array_expr
+ n = t[0]
+ array_expr <<= (expr * n) if n else Empty()
+ # clear list contents, but keep any named results
+ del t[:]
+
+ if intExpr is None:
+ intExpr = Word(nums).set_parse_action(lambda t: int(t[0]))
+ else:
+ intExpr = intExpr.copy()
+ intExpr.set_name("arrayLen")
+ intExpr.add_parse_action(count_field_parse_action, call_during_try=True)
+ return (intExpr + array_expr).set_name("(len) " + str(expr) + "...")
+
+
+def match_previous_literal(expr: ParserElement) -> ParserElement:
+ """Helper to define an expression that is indirectly defined from
+ the tokens matched in a previous expression, that is, it looks for
+ a 'repeat' of a previous expression. For example::
+
+ first = Word(nums)
+ second = match_previous_literal(first)
+ match_expr = first + ":" + second
+
+ will match ``"1:1"``, but not ``"1:2"``. Because this
+ matches a previous literal, will also match the leading
+ ``"1:1"`` in ``"1:10"``. If this is not desired, use
+ :class:`match_previous_expr`. Do *not* use with packrat parsing
+ enabled.
+ """
+ rep = Forward()
+
+ def copy_token_to_repeater(s, l, t):
+ if t:
+ if len(t) == 1:
+ rep << t[0]
+ else:
+ # flatten t tokens
+ tflat = _flatten(t.as_list())
+ rep << And(Literal(tt) for tt in tflat)
+ else:
+ rep << Empty()
+
+ expr.add_parse_action(copy_token_to_repeater, callDuringTry=True)
+ rep.set_name("(prev) " + str(expr))
+ return rep
+
+
+def match_previous_expr(expr: ParserElement) -> ParserElement:
+ """Helper to define an expression that is indirectly defined from
+ the tokens matched in a previous expression, that is, it looks for
+ a 'repeat' of a previous expression. For example::
+
+ first = Word(nums)
+ second = match_previous_expr(first)
+ match_expr = first + ":" + second
+
+ will match ``"1:1"``, but not ``"1:2"``. Because this
+ matches by expressions, will *not* match the leading ``"1:1"``
+ in ``"1:10"``; the expressions are evaluated first, and then
+ compared, so ``"1"`` is compared with ``"10"``. Do *not* use
+ with packrat parsing enabled.
+ """
+ rep = Forward()
+ e2 = expr.copy()
+ rep <<= e2
+
+ def copy_token_to_repeater(s, l, t):
+ matchTokens = _flatten(t.as_list())
+
+ def must_match_these_tokens(s, l, t):
+ theseTokens = _flatten(t.as_list())
+ if theseTokens != matchTokens:
+ raise ParseException(
+ s, l, "Expected {}, found{}".format(matchTokens, theseTokens)
+ )
+
+ rep.set_parse_action(must_match_these_tokens, callDuringTry=True)
+
+ expr.add_parse_action(copy_token_to_repeater, callDuringTry=True)
+ rep.set_name("(prev) " + str(expr))
+ return rep
+
+
+def one_of(
+ strs: Union[typing.Iterable[str], str],
+ caseless: bool = False,
+ use_regex: bool = True,
+ as_keyword: bool = False,
+ *,
+ useRegex: bool = True,
+ asKeyword: bool = False,
+) -> ParserElement:
+ """Helper to quickly define a set of alternative :class:`Literal` s,
+ and makes sure to do longest-first testing when there is a conflict,
+ regardless of the input order, but returns
+ a :class:`MatchFirst` for best performance.
+
+ Parameters:
+
+ - ``strs`` - a string of space-delimited literals, or a collection of
+ string literals
+ - ``caseless`` - treat all literals as caseless - (default= ``False``)
+ - ``use_regex`` - as an optimization, will
+ generate a :class:`Regex` object; otherwise, will generate
+ a :class:`MatchFirst` object (if ``caseless=True`` or ``asKeyword=True``, or if
+ creating a :class:`Regex` raises an exception) - (default= ``True``)
+ - ``as_keyword`` - enforce :class:`Keyword`-style matching on the
+ generated expressions - (default= ``False``)
+ - ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 compatibility,
+ but will be removed in a future release
+
+ Example::
+
+ comp_oper = one_of("< = > <= >= !=")
+ var = Word(alphas)
+ number = Word(nums)
+ term = var | number
+ comparison_expr = term + comp_oper + term
+ print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12"))
+
+ prints::
+
+ [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
+ """
+ asKeyword = asKeyword or as_keyword
+ useRegex = useRegex and use_regex
+
+ if (
+ isinstance(caseless, str_type)
+ and __diag__.warn_on_multiple_string_args_to_oneof
+ ):
+ warnings.warn(
+ "More than one string argument passed to one_of, pass"
+ " choices as a list or space-delimited string",
+ stacklevel=2,
+ )
+
+ if caseless:
+ isequal = lambda a, b: a.upper() == b.upper()
+ masks = lambda a, b: b.upper().startswith(a.upper())
+ parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral
+ else:
+ isequal = lambda a, b: a == b
+ masks = lambda a, b: b.startswith(a)
+ parseElementClass = Keyword if asKeyword else Literal
+
+ symbols: List[str] = []
+ if isinstance(strs, str_type):
+ symbols = strs.split()
+ elif isinstance(strs, Iterable):
+ symbols = list(strs)
+ else:
+ raise TypeError("Invalid argument to one_of, expected string or iterable")
+ if not symbols:
+ return NoMatch()
+
+ # reorder given symbols to take care to avoid masking longer choices with shorter ones
+ # (but only if the given symbols are not just single characters)
+ if any(len(sym) > 1 for sym in symbols):
+ i = 0
+ while i < len(symbols) - 1:
+ cur = symbols[i]
+ for j, other in enumerate(symbols[i + 1 :]):
+ if isequal(other, cur):
+ del symbols[i + j + 1]
+ break
+ elif masks(cur, other):
+ del symbols[i + j + 1]
+ symbols.insert(i, other)
+ break
+ else:
+ i += 1
+
+ if useRegex:
+ re_flags: int = re.IGNORECASE if caseless else 0
+
+ try:
+ if all(len(sym) == 1 for sym in symbols):
+ # symbols are just single characters, create range regex pattern
+ patt = "[{}]".format(
+ "".join(_escape_regex_range_chars(sym) for sym in symbols)
+ )
+ else:
+ patt = "|".join(re.escape(sym) for sym in symbols)
+
+ # wrap with \b word break markers if defining as keywords
+ if asKeyword:
+ patt = r"\b(?:{})\b".format(patt)
+
+ ret = Regex(patt, flags=re_flags).set_name(" | ".join(symbols))
+
+ if caseless:
+ # add parse action to return symbols as specified, not in random
+ # casing as found in input string
+ symbol_map = {sym.lower(): sym for sym in symbols}
+ ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()])
+
+ return ret
+
+ except re.error:
+ warnings.warn(
+ "Exception creating Regex for one_of, building MatchFirst", stacklevel=2
+ )
+
+ # last resort, just use MatchFirst
+ return MatchFirst(parseElementClass(sym) for sym in symbols).set_name(
+ " | ".join(symbols)
+ )
+
+
+def dict_of(key: ParserElement, value: ParserElement) -> ParserElement:
+ """Helper to easily and clearly define a dictionary by specifying
+ the respective patterns for the key and value. Takes care of
+ defining the :class:`Dict`, :class:`ZeroOrMore`, and
+ :class:`Group` tokens in the proper order. The key pattern
+ can include delimiting markers or punctuation, as long as they are
+ suppressed, thereby leaving the significant key text. The value
+ pattern can include named results, so that the :class:`Dict` results
+ can include named token fields.
+
+ Example::
+
+ text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
+ attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
+ print(attr_expr[1, ...].parse_string(text).dump())
+
+ attr_label = label
+ attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)
+
+ # similar to Dict, but simpler call format
+ result = dict_of(attr_label, attr_value).parse_string(text)
+ print(result.dump())
+ print(result['shape'])
+ print(result.shape) # object attribute access works too
+ print(result.as_dict())
+
+ prints::
+
+ [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
+ - color: 'light blue'
+ - posn: 'upper left'
+ - shape: 'SQUARE'
+ - texture: 'burlap'
+ SQUARE
+ SQUARE
+ {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
+ """
+ return Dict(OneOrMore(Group(key + value)))
+
+
+def original_text_for(
+ expr: ParserElement, as_string: bool = True, *, asString: bool = True
+) -> ParserElement:
+ """Helper to return the original, untokenized text for a given
+ expression. Useful to restore the parsed fields of an HTML start
+ tag into the raw tag text itself, or to revert separate tokens with
+ intervening whitespace back to the original matching input text. By
+ default, returns astring containing the original parsed text.
+
+ If the optional ``as_string`` argument is passed as
+ ``False``, then the return value is
+ a :class:`ParseResults` containing any results names that
+ were originally matched, and a single token containing the original
+ matched text from the input string. So if the expression passed to
+ :class:`original_text_for` contains expressions with defined
+ results names, you must set ``as_string`` to ``False`` if you
+ want to preserve those results name values.
+
+ The ``asString`` pre-PEP8 argument is retained for compatibility,
+ but will be removed in a future release.
+
+ Example::
+
+ src = "this is test <b> bold <i>text</i> </b> normal text "
+ for tag in ("b", "i"):
+ opener, closer = make_html_tags(tag)
+ patt = original_text_for(opener + SkipTo(closer) + closer)
+ print(patt.search_string(src)[0])
+
+ prints::
+
+ ['<b> bold <i>text</i> </b>']
+ ['<i>text</i>']
+ """
+ asString = asString and as_string
+
+ locMarker = Empty().set_parse_action(lambda s, loc, t: loc)
+ endlocMarker = locMarker.copy()
+ endlocMarker.callPreparse = False
+ matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
+ if asString:
+ extractText = lambda s, l, t: s[t._original_start : t._original_end]
+ else:
+
+ def extractText(s, l, t):
+ t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]]
+
+ matchExpr.set_parse_action(extractText)
+ matchExpr.ignoreExprs = expr.ignoreExprs
+ matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection)
+ return matchExpr
+
+
+def ungroup(expr: ParserElement) -> ParserElement:
+ """Helper to undo pyparsing's default grouping of And expressions,
+ even if all but one are non-empty.
+ """
+ return TokenConverter(expr).add_parse_action(lambda t: t[0])
+
+
+def locatedExpr(expr: ParserElement) -> ParserElement:
+ """
+ (DEPRECATED - future code should use the Located class)
+ Helper to decorate a returned token with its starting and ending
+ locations in the input string.
+
+ This helper adds the following results names:
+
+ - ``locn_start`` - location where matched expression begins
+ - ``locn_end`` - location where matched expression ends
+ - ``value`` - the actual parsed results
+
+ Be careful if the input text contains ``<TAB>`` characters, you
+ may want to call :class:`ParserElement.parseWithTabs`
+
+ Example::
+
+ wd = Word(alphas)
+ for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
+ print(match)
+
+ prints::
+
+ [[0, 'ljsdf', 5]]
+ [[8, 'lksdjjf', 15]]
+ [[18, 'lkkjj', 23]]
+ """
+ locator = Empty().set_parse_action(lambda ss, ll, tt: ll)
+ return Group(
+ locator("locn_start")
+ + expr("value")
+ + locator.copy().leaveWhitespace()("locn_end")
+ )
+
+
+def nested_expr(
+ opener: Union[str, ParserElement] = "(",
+ closer: Union[str, ParserElement] = ")",
+ content: typing.Optional[ParserElement] = None,
+ ignore_expr: ParserElement = quoted_string(),
+ *,
+ ignoreExpr: ParserElement = quoted_string(),
+) -> ParserElement:
+ """Helper method for defining nested lists enclosed in opening and
+ closing delimiters (``"("`` and ``")"`` are the default).
+
+ Parameters:
+ - ``opener`` - opening character for a nested list
+ (default= ``"("``); can also be a pyparsing expression
+ - ``closer`` - closing character for a nested list
+ (default= ``")"``); can also be a pyparsing expression
+ - ``content`` - expression for items within the nested lists
+ (default= ``None``)
+ - ``ignore_expr`` - expression for ignoring opening and closing delimiters
+ (default= :class:`quoted_string`)
+ - ``ignoreExpr`` - this pre-PEP8 argument is retained for compatibility
+ but will be removed in a future release
+
+ If an expression is not provided for the content argument, the
+ nested expression will capture all whitespace-delimited content
+ between delimiters as a list of separate values.
+
+ Use the ``ignore_expr`` argument to define expressions that may
+ contain opening or closing characters that should not be treated as
+ opening or closing characters for nesting, such as quoted_string or
+ a comment expression. Specify multiple expressions using an
+ :class:`Or` or :class:`MatchFirst`. The default is
+ :class:`quoted_string`, but if no expressions are to be ignored, then
+ pass ``None`` for this argument.
+
+ Example::
+
+ data_type = one_of("void int short long char float double")
+ decl_data_type = Combine(data_type + Opt(Word('*')))
+ ident = Word(alphas+'_', alphanums+'_')
+ number = pyparsing_common.number
+ arg = Group(decl_data_type + ident)
+ LPAR, RPAR = map(Suppress, "()")
+
+ code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment))
+
+ c_function = (decl_data_type("type")
+ + ident("name")
+ + LPAR + Opt(delimited_list(arg), [])("args") + RPAR
+ + code_body("body"))
+ c_function.ignore(c_style_comment)
+
+ source_code = '''
+ int is_odd(int x) {
+ return (x%2);
+ }
+
+ int dec_to_hex(char hchar) {
+ if (hchar >= '0' && hchar <= '9') {
+ return (ord(hchar)-ord('0'));
+ } else {
+ return (10+ord(hchar)-ord('A'));
+ }
+ }
+ '''
+ for func in c_function.search_string(source_code):
+ print("%(name)s (%(type)s) args: %(args)s" % func)
+
+
+ prints::
+
+ is_odd (int) args: [['int', 'x']]
+ dec_to_hex (int) args: [['char', 'hchar']]
+ """
+ if ignoreExpr != ignore_expr:
+ ignoreExpr = ignore_expr if ignoreExpr == quoted_string() else ignoreExpr
+ if opener == closer:
+ raise ValueError("opening and closing strings cannot be the same")
+ if content is None:
+ if isinstance(opener, str_type) and isinstance(closer, str_type):
+ if len(opener) == 1 and len(closer) == 1:
+ if ignoreExpr is not None:
+ content = Combine(
+ OneOrMore(
+ ~ignoreExpr
+ + CharsNotIn(
+ opener + closer + ParserElement.DEFAULT_WHITE_CHARS,
+ exact=1,
+ )
+ )
+ ).set_parse_action(lambda t: t[0].strip())
+ else:
+ content = empty.copy() + CharsNotIn(
+ opener + closer + ParserElement.DEFAULT_WHITE_CHARS
+ ).set_parse_action(lambda t: t[0].strip())
+ else:
+ if ignoreExpr is not None:
+ content = Combine(
+ OneOrMore(
+ ~ignoreExpr
+ + ~Literal(opener)
+ + ~Literal(closer)
+ + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)
+ )
+ ).set_parse_action(lambda t: t[0].strip())
+ else:
+ content = Combine(
+ OneOrMore(
+ ~Literal(opener)
+ + ~Literal(closer)
+ + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)
+ )
+ ).set_parse_action(lambda t: t[0].strip())
+ else:
+ raise ValueError(
+ "opening and closing arguments must be strings if no content expression is given"
+ )
+ ret = Forward()
+ if ignoreExpr is not None:
+ ret <<= Group(
+ Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer)
+ )
+ else:
+ ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer))
+ ret.set_name("nested %s%s expression" % (opener, closer))
+ return ret
+
+
+def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")):
+ """Internal helper to construct opening and closing tag expressions, given a tag name"""
+ if isinstance(tagStr, str_type):
+ resname = tagStr
+ tagStr = Keyword(tagStr, caseless=not xml)
+ else:
+ resname = tagStr.name
+
+ tagAttrName = Word(alphas, alphanums + "_-:")
+ if xml:
+ tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes)
+ openTag = (
+ suppress_LT
+ + tagStr("tag")
+ + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue)))
+ + Opt("/", default=[False])("empty").set_parse_action(
+ lambda s, l, t: t[0] == "/"
+ )
+ + suppress_GT
+ )
+ else:
+ tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word(
+ printables, exclude_chars=">"
+ )
+ openTag = (
+ suppress_LT
+ + tagStr("tag")
+ + Dict(
+ ZeroOrMore(
+ Group(
+ tagAttrName.set_parse_action(lambda t: t[0].lower())
+ + Opt(Suppress("=") + tagAttrValue)
+ )
+ )
+ )
+ + Opt("/", default=[False])("empty").set_parse_action(
+ lambda s, l, t: t[0] == "/"
+ )
+ + suppress_GT
+ )
+ closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False)
+
+ openTag.set_name("<%s>" % resname)
+ # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels
+ openTag.add_parse_action(
+ lambda t: t.__setitem__(
+ "start" + "".join(resname.replace(":", " ").title().split()), t.copy()
+ )
+ )
+ closeTag = closeTag(
+ "end" + "".join(resname.replace(":", " ").title().split())
+ ).set_name("</%s>" % resname)
+ openTag.tag = resname
+ closeTag.tag = resname
+ openTag.tag_body = SkipTo(closeTag())
+ return openTag, closeTag
+
+
+def make_html_tags(
+ tag_str: Union[str, ParserElement]
+) -> Tuple[ParserElement, ParserElement]:
+ """Helper to construct opening and closing tag expressions for HTML,
+ given a tag name. Matches tags in either upper or lower case,
+ attributes with namespaces and with quoted or unquoted values.
+
+ Example::
+
+ text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
+ # make_html_tags returns pyparsing expressions for the opening and
+ # closing tags as a 2-tuple
+ a, a_end = make_html_tags("A")
+ link_expr = a + SkipTo(a_end)("link_text") + a_end
+
+ for link in link_expr.search_string(text):
+ # attributes in the <A> tag (like "href" shown here) are
+ # also accessible as named results
+ print(link.link_text, '->', link.href)
+
+ prints::
+
+ pyparsing -> https://github.com/pyparsing/pyparsing/wiki
+ """
+ return _makeTags(tag_str, False)
+
+
+def make_xml_tags(
+ tag_str: Union[str, ParserElement]
+) -> Tuple[ParserElement, ParserElement]:
+ """Helper to construct opening and closing tag expressions for XML,
+ given a tag name. Matches tags only in the given upper/lower case.
+
+ Example: similar to :class:`make_html_tags`
+ """
+ return _makeTags(tag_str, True)
+
+
+any_open_tag: ParserElement
+any_close_tag: ParserElement
+any_open_tag, any_close_tag = make_html_tags(
+ Word(alphas, alphanums + "_:").set_name("any tag")
+)
+
+_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()}
+common_html_entity = Regex("&(?P<entity>" + "|".join(_htmlEntityMap) + ");").set_name(
+ "common HTML entity"
+)
+
+
+def replace_html_entity(t):
+ """Helper parser action to replace common HTML entities with their special characters"""
+ return _htmlEntityMap.get(t.entity)
+
+
+class OpAssoc(Enum):
+ LEFT = 1
+ RIGHT = 2
+
+
+InfixNotationOperatorArgType = Union[
+ ParserElement, str, Tuple[Union[ParserElement, str], Union[ParserElement, str]]
+]
+InfixNotationOperatorSpec = Union[
+ Tuple[
+ InfixNotationOperatorArgType,
+ int,
+ OpAssoc,
+ typing.Optional[ParseAction],
+ ],
+ Tuple[
+ InfixNotationOperatorArgType,
+ int,
+ OpAssoc,
+ ],
+]
+
+
+def infix_notation(
+ base_expr: ParserElement,
+ op_list: List[InfixNotationOperatorSpec],
+ lpar: Union[str, ParserElement] = Suppress("("),
+ rpar: Union[str, ParserElement] = Suppress(")"),
+) -> ParserElement:
+ """Helper method for constructing grammars of expressions made up of
+ operators working in a precedence hierarchy. Operators may be unary
+ or binary, left- or right-associative. Parse actions can also be
+ attached to operator expressions. The generated parser will also
+ recognize the use of parentheses to override operator precedences
+ (see example below).
+
+ Note: if you define a deep operator list, you may see performance
+ issues when using infix_notation. See
+ :class:`ParserElement.enable_packrat` for a mechanism to potentially
+ improve your parser performance.
+
+ Parameters:
+ - ``base_expr`` - expression representing the most basic operand to
+ be used in the expression
+ - ``op_list`` - list of tuples, one for each operator precedence level
+ in the expression grammar; each tuple is of the form ``(op_expr,
+ num_operands, right_left_assoc, (optional)parse_action)``, where:
+
+ - ``op_expr`` is the pyparsing expression for the operator; may also
+ be a string, which will be converted to a Literal; if ``num_operands``
+ is 3, ``op_expr`` is a tuple of two expressions, for the two
+ operators separating the 3 terms
+ - ``num_operands`` is the number of terms for this operator (must be 1,
+ 2, or 3)
+ - ``right_left_assoc`` is the indicator whether the operator is right
+ or left associative, using the pyparsing-defined constants
+ ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``.
+ - ``parse_action`` is the parse action to be associated with
+ expressions matching this operator expression (the parse action
+ tuple member may be omitted); if the parse action is passed
+ a tuple or list of functions, this is equivalent to calling
+ ``set_parse_action(*fn)``
+ (:class:`ParserElement.set_parse_action`)
+ - ``lpar`` - expression for matching left-parentheses; if passed as a
+ str, then will be parsed as Suppress(lpar). If lpar is passed as
+ an expression (such as ``Literal('(')``), then it will be kept in
+ the parsed results, and grouped with them. (default= ``Suppress('(')``)
+ - ``rpar`` - expression for matching right-parentheses; if passed as a
+ str, then will be parsed as Suppress(rpar). If rpar is passed as
+ an expression (such as ``Literal(')')``), then it will be kept in
+ the parsed results, and grouped with them. (default= ``Suppress(')')``)
+
+ Example::
+
+ # simple example of four-function arithmetic with ints and
+ # variable names
+ integer = pyparsing_common.signed_integer
+ varname = pyparsing_common.identifier
+
+ arith_expr = infix_notation(integer | varname,
+ [
+ ('-', 1, OpAssoc.RIGHT),
+ (one_of('* /'), 2, OpAssoc.LEFT),
+ (one_of('+ -'), 2, OpAssoc.LEFT),
+ ])
+
+ arith_expr.run_tests('''
+ 5+3*6
+ (5+3)*6
+ -2--11
+ ''', full_dump=False)
+
+ prints::
+
+ 5+3*6
+ [[5, '+', [3, '*', 6]]]
+
+ (5+3)*6
+ [[[5, '+', 3], '*', 6]]
+
+ -2--11
+ [[['-', 2], '-', ['-', 11]]]
+ """
+ # captive version of FollowedBy that does not do parse actions or capture results names
+ class _FB(FollowedBy):
+ def parseImpl(self, instring, loc, doActions=True):
+ self.expr.try_parse(instring, loc)
+ return loc, []
+
+ _FB.__name__ = "FollowedBy>"
+
+ ret = Forward()
+ if isinstance(lpar, str):
+ lpar = Suppress(lpar)
+ if isinstance(rpar, str):
+ rpar = Suppress(rpar)
+
+ # if lpar and rpar are not suppressed, wrap in group
+ if not (isinstance(rpar, Suppress) and isinstance(rpar, Suppress)):
+ lastExpr = base_expr | Group(lpar + ret + rpar)
+ else:
+ lastExpr = base_expr | (lpar + ret + rpar)
+
+ for i, operDef in enumerate(op_list):
+ opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4]
+ if isinstance(opExpr, str_type):
+ opExpr = ParserElement._literalStringClass(opExpr)
+ if arity == 3:
+ if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2:
+ raise ValueError(
+ "if numterms=3, opExpr must be a tuple or list of two expressions"
+ )
+ opExpr1, opExpr2 = opExpr
+ term_name = "{}{} term".format(opExpr1, opExpr2)
+ else:
+ term_name = "{} term".format(opExpr)
+
+ if not 1 <= arity <= 3:
+ raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
+
+ if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT):
+ raise ValueError("operator must indicate right or left associativity")
+
+ thisExpr: Forward = Forward().set_name(term_name)
+ if rightLeftAssoc is OpAssoc.LEFT:
+ if arity == 1:
+ matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + opExpr[1, ...])
+ elif arity == 2:
+ if opExpr is not None:
+ matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group(
+ lastExpr + (opExpr + lastExpr)[1, ...]
+ )
+ else:
+ matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr[2, ...])
+ elif arity == 3:
+ matchExpr = _FB(
+ lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr
+ ) + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr))
+ elif rightLeftAssoc is OpAssoc.RIGHT:
+ if arity == 1:
+ # try to avoid LR with this extra test
+ if not isinstance(opExpr, Opt):
+ opExpr = Opt(opExpr)
+ matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr)
+ elif arity == 2:
+ if opExpr is not None:
+ matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group(
+ lastExpr + (opExpr + thisExpr)[1, ...]
+ )
+ else:
+ matchExpr = _FB(lastExpr + thisExpr) + Group(
+ lastExpr + thisExpr[1, ...]
+ )
+ elif arity == 3:
+ matchExpr = _FB(
+ lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr
+ ) + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr)
+ if pa:
+ if isinstance(pa, (tuple, list)):
+ matchExpr.set_parse_action(*pa)
+ else:
+ matchExpr.set_parse_action(pa)
+ thisExpr <<= (matchExpr | lastExpr).setName(term_name)
+ lastExpr = thisExpr
+ ret <<= lastExpr
+ return ret
+
+
+def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]):
+ """
+ (DEPRECATED - use IndentedBlock class instead)
+ Helper method for defining space-delimited indentation blocks,
+ such as those used to define block statements in Python source code.
+
+ Parameters:
+
+ - ``blockStatementExpr`` - expression defining syntax of statement that
+ is repeated within the indented block
+ - ``indentStack`` - list created by caller to manage indentation stack
+ (multiple ``statementWithIndentedBlock`` expressions within a single
+ grammar should share a common ``indentStack``)
+ - ``indent`` - boolean indicating whether block must be indented beyond
+ the current level; set to ``False`` for block of left-most statements
+ (default= ``True``)
+
+ A valid block must contain at least one ``blockStatement``.
+
+ (Note that indentedBlock uses internal parse actions which make it
+ incompatible with packrat parsing.)
+
+ Example::
+
+ data = '''
+ def A(z):
+ A1
+ B = 100
+ G = A2
+ A2
+ A3
+ B
+ def BB(a,b,c):
+ BB1
+ def BBA():
+ bba1
+ bba2
+ bba3
+ C
+ D
+ def spam(x,y):
+ def eggs(z):
+ pass
+ '''
+
+
+ indentStack = [1]
+ stmt = Forward()
+
+ identifier = Word(alphas, alphanums)
+ funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":")
+ func_body = indentedBlock(stmt, indentStack)
+ funcDef = Group(funcDecl + func_body)
+
+ rvalue = Forward()
+ funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")")
+ rvalue << (funcCall | identifier | Word(nums))
+ assignment = Group(identifier + "=" + rvalue)
+ stmt << (funcDef | assignment | identifier)
+
+ module_body = stmt[1, ...]
+
+ parseTree = module_body.parseString(data)
+ parseTree.pprint()
+
+ prints::
+
+ [['def',
+ 'A',
+ ['(', 'z', ')'],
+ ':',
+ [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
+ 'B',
+ ['def',
+ 'BB',
+ ['(', 'a', 'b', 'c', ')'],
+ ':',
+ [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
+ 'C',
+ 'D',
+ ['def',
+ 'spam',
+ ['(', 'x', 'y', ')'],
+ ':',
+ [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
+ """
+ backup_stacks.append(indentStack[:])
+
+ def reset_stack():
+ indentStack[:] = backup_stacks[-1]
+
+ def checkPeerIndent(s, l, t):
+ if l >= len(s):
+ return
+ curCol = col(l, s)
+ if curCol != indentStack[-1]:
+ if curCol > indentStack[-1]:
+ raise ParseException(s, l, "illegal nesting")
+ raise ParseException(s, l, "not a peer entry")
+
+ def checkSubIndent(s, l, t):
+ curCol = col(l, s)
+ if curCol > indentStack[-1]:
+ indentStack.append(curCol)
+ else:
+ raise ParseException(s, l, "not a subentry")
+
+ def checkUnindent(s, l, t):
+ if l >= len(s):
+ return
+ curCol = col(l, s)
+ if not (indentStack and curCol in indentStack):
+ raise ParseException(s, l, "not an unindent")
+ if curCol < indentStack[-1]:
+ indentStack.pop()
+
+ NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress())
+ INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT")
+ PEER = Empty().set_parse_action(checkPeerIndent).set_name("")
+ UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT")
+ if indent:
+ smExpr = Group(
+ Opt(NL)
+ + INDENT
+ + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))
+ + UNDENT
+ )
+ else:
+ smExpr = Group(
+ Opt(NL)
+ + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))
+ + Opt(UNDENT)
+ )
+
+ # add a parse action to remove backup_stack from list of backups
+ smExpr.add_parse_action(
+ lambda: backup_stacks.pop(-1) and None if backup_stacks else None
+ )
+ smExpr.set_fail_action(lambda a, b, c, d: reset_stack())
+ blockStatementExpr.ignore(_bslash + LineEnd())
+ return smExpr.set_name("indented block")
+
+
+# it's easy to get these comment structures wrong - they're very common, so may as well make them available
+c_style_comment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/").set_name(
+ "C style comment"
+)
+"Comment of the form ``/* ... */``"
+
+html_comment = Regex(r"<!--[\s\S]*?-->").set_name("HTML comment")
+"Comment of the form ``<!-- ... -->``"
+
+rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line")
+dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment")
+"Comment of the form ``// ... (to end of line)``"
+
+cpp_style_comment = Combine(
+ Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dbl_slash_comment
+).set_name("C++ style comment")
+"Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`"
+
+java_style_comment = cpp_style_comment
+"Same as :class:`cpp_style_comment`"
+
+python_style_comment = Regex(r"#.*").set_name("Python style comment")
+"Comment of the form ``# ... (to end of line)``"
+
+
+# build list of built-in expressions, for future reference if a global default value
+# gets updated
+_builtin_exprs: List[ParserElement] = [
+ v for v in vars().values() if isinstance(v, ParserElement)
+]
+
+
+# pre-PEP8 compatible names
+delimitedList = delimited_list
+countedArray = counted_array
+matchPreviousLiteral = match_previous_literal
+matchPreviousExpr = match_previous_expr
+oneOf = one_of
+dictOf = dict_of
+originalTextFor = original_text_for
+nestedExpr = nested_expr
+makeHTMLTags = make_html_tags
+makeXMLTags = make_xml_tags
+anyOpenTag, anyCloseTag = any_open_tag, any_close_tag
+commonHTMLEntity = common_html_entity
+replaceHTMLEntity = replace_html_entity
+opAssoc = OpAssoc
+infixNotation = infix_notation
+cStyleComment = c_style_comment
+htmlComment = html_comment
+restOfLine = rest_of_line
+dblSlashComment = dbl_slash_comment
+cppStyleComment = cpp_style_comment
+javaStyleComment = java_style_comment
+pythonStyleComment = python_style_comment