diff options
Diffstat (limited to 'env/lib/python3.10/site-packages/pyparsing/common.py')
-rw-r--r-- | env/lib/python3.10/site-packages/pyparsing/common.py | 424 |
1 files changed, 0 insertions, 424 deletions
diff --git a/env/lib/python3.10/site-packages/pyparsing/common.py b/env/lib/python3.10/site-packages/pyparsing/common.py deleted file mode 100644 index 1859fb7..0000000 --- a/env/lib/python3.10/site-packages/pyparsing/common.py +++ /dev/null @@ -1,424 +0,0 @@ -# common.py -from .core import * -from .helpers import delimited_list, any_open_tag, any_close_tag -from datetime import datetime - - -# some other useful expressions - using lower-case class name since we are really using this as a namespace -class pyparsing_common: - """Here are some common low-level expressions that may be useful in - jump-starting parser development: - - - numeric forms (:class:`integers<integer>`, :class:`reals<real>`, - :class:`scientific notation<sci_real>`) - - common :class:`programming identifiers<identifier>` - - network addresses (:class:`MAC<mac_address>`, - :class:`IPv4<ipv4_address>`, :class:`IPv6<ipv6_address>`) - - ISO8601 :class:`dates<iso8601_date>` and - :class:`datetime<iso8601_datetime>` - - :class:`UUID<uuid>` - - :class:`comma-separated list<comma_separated_list>` - - :class:`url` - - Parse actions: - - - :class:`convertToInteger` - - :class:`convertToFloat` - - :class:`convertToDate` - - :class:`convertToDatetime` - - :class:`stripHTMLTags` - - :class:`upcaseTokens` - - :class:`downcaseTokens` - - Example:: - - pyparsing_common.number.runTests(''' - # any int or real number, returned as the appropriate type - 100 - -100 - +100 - 3.14159 - 6.02e23 - 1e-12 - ''') - - pyparsing_common.fnumber.runTests(''' - # any int or real number, returned as float - 100 - -100 - +100 - 3.14159 - 6.02e23 - 1e-12 - ''') - - pyparsing_common.hex_integer.runTests(''' - # hex numbers - 100 - FF - ''') - - pyparsing_common.fraction.runTests(''' - # fractions - 1/2 - -3/4 - ''') - - pyparsing_common.mixed_integer.runTests(''' - # mixed fractions - 1 - 1/2 - -3/4 - 1-3/4 - ''') - - import uuid - pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) - pyparsing_common.uuid.runTests(''' - # uuid - 12345678-1234-5678-1234-567812345678 - ''') - - prints:: - - # any int or real number, returned as the appropriate type - 100 - [100] - - -100 - [-100] - - +100 - [100] - - 3.14159 - [3.14159] - - 6.02e23 - [6.02e+23] - - 1e-12 - [1e-12] - - # any int or real number, returned as float - 100 - [100.0] - - -100 - [-100.0] - - +100 - [100.0] - - 3.14159 - [3.14159] - - 6.02e23 - [6.02e+23] - - 1e-12 - [1e-12] - - # hex numbers - 100 - [256] - - FF - [255] - - # fractions - 1/2 - [0.5] - - -3/4 - [-0.75] - - # mixed fractions - 1 - [1] - - 1/2 - [0.5] - - -3/4 - [-0.75] - - 1-3/4 - [1.75] - - # uuid - 12345678-1234-5678-1234-567812345678 - [UUID('12345678-1234-5678-1234-567812345678')] - """ - - convert_to_integer = token_map(int) - """ - Parse action for converting parsed integers to Python int - """ - - convert_to_float = token_map(float) - """ - Parse action for converting parsed numbers to Python float - """ - - integer = Word(nums).set_name("integer").set_parse_action(convert_to_integer) - """expression that parses an unsigned integer, returns an int""" - - hex_integer = ( - Word(hexnums).set_name("hex integer").set_parse_action(token_map(int, 16)) - ) - """expression that parses a hexadecimal integer, returns an int""" - - signed_integer = ( - Regex(r"[+-]?\d+") - .set_name("signed integer") - .set_parse_action(convert_to_integer) - ) - """expression that parses an integer with optional leading sign, returns an int""" - - fraction = ( - signed_integer().set_parse_action(convert_to_float) - + "/" - + signed_integer().set_parse_action(convert_to_float) - ).set_name("fraction") - """fractional expression of an integer divided by an integer, returns a float""" - fraction.add_parse_action(lambda tt: tt[0] / tt[-1]) - - mixed_integer = ( - fraction | signed_integer + Opt(Opt("-").suppress() + fraction) - ).set_name("fraction or mixed integer-fraction") - """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" - mixed_integer.add_parse_action(sum) - - real = ( - Regex(r"[+-]?(?:\d+\.\d*|\.\d+)") - .set_name("real number") - .set_parse_action(convert_to_float) - ) - """expression that parses a floating point number and returns a float""" - - sci_real = ( - Regex(r"[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)") - .set_name("real number with scientific notation") - .set_parse_action(convert_to_float) - ) - """expression that parses a floating point number with optional - scientific notation and returns a float""" - - # streamlining this expression makes the docs nicer-looking - number = (sci_real | real | signed_integer).setName("number").streamline() - """any numeric expression, returns the corresponding Python type""" - - fnumber = ( - Regex(r"[+-]?\d+\.?\d*([eE][+-]?\d+)?") - .set_name("fnumber") - .set_parse_action(convert_to_float) - ) - """any int or real number, returned as float""" - - identifier = Word(identchars, identbodychars).set_name("identifier") - """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" - - ipv4_address = Regex( - r"(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}" - ).set_name("IPv4 address") - "IPv4 address (``0.0.0.0 - 255.255.255.255``)" - - _ipv6_part = Regex(r"[0-9a-fA-F]{1,4}").set_name("hex_integer") - _full_ipv6_address = (_ipv6_part + (":" + _ipv6_part) * 7).set_name( - "full IPv6 address" - ) - _short_ipv6_address = ( - Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6)) - + "::" - + Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6)) - ).set_name("short IPv6 address") - _short_ipv6_address.add_condition( - lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8 - ) - _mixed_ipv6_address = ("::ffff:" + ipv4_address).set_name("mixed IPv6 address") - ipv6_address = Combine( - (_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).set_name( - "IPv6 address" - ) - ).set_name("IPv6 address") - "IPv6 address (long, short, or mixed form)" - - mac_address = Regex( - r"[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}" - ).set_name("MAC address") - "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" - - @staticmethod - def convert_to_date(fmt: str = "%Y-%m-%d"): - """ - Helper to create a parse action for converting parsed date string to Python datetime.date - - Params - - - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``) - - Example:: - - date_expr = pyparsing_common.iso8601_date.copy() - date_expr.setParseAction(pyparsing_common.convertToDate()) - print(date_expr.parseString("1999-12-31")) - - prints:: - - [datetime.date(1999, 12, 31)] - """ - - def cvt_fn(ss, ll, tt): - try: - return datetime.strptime(tt[0], fmt).date() - except ValueError as ve: - raise ParseException(ss, ll, str(ve)) - - return cvt_fn - - @staticmethod - def convert_to_datetime(fmt: str = "%Y-%m-%dT%H:%M:%S.%f"): - """Helper to create a parse action for converting parsed - datetime string to Python datetime.datetime - - Params - - - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%dT%H:%M:%S.%f"``) - - Example:: - - dt_expr = pyparsing_common.iso8601_datetime.copy() - dt_expr.setParseAction(pyparsing_common.convertToDatetime()) - print(dt_expr.parseString("1999-12-31T23:59:59.999")) - - prints:: - - [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] - """ - - def cvt_fn(s, l, t): - try: - return datetime.strptime(t[0], fmt) - except ValueError as ve: - raise ParseException(s, l, str(ve)) - - return cvt_fn - - iso8601_date = Regex( - r"(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?" - ).set_name("ISO8601 date") - "ISO8601 date (``yyyy-mm-dd``)" - - iso8601_datetime = Regex( - r"(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?" - ).set_name("ISO8601 datetime") - "ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``" - - uuid = Regex(r"[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}").set_name("UUID") - "UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)" - - _html_stripper = any_open_tag.suppress() | any_close_tag.suppress() - - @staticmethod - def strip_html_tags(s: str, l: int, tokens: ParseResults): - """Parse action to remove HTML tags from web page HTML source - - Example:: - - # strip HTML links from normal text - text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>' - td, td_end = makeHTMLTags("TD") - table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end - print(table_text.parseString(text).body) - - Prints:: - - More info at the pyparsing wiki page - """ - return pyparsing_common._html_stripper.transform_string(tokens[0]) - - _commasepitem = ( - Combine( - OneOrMore( - ~Literal(",") - + ~LineEnd() - + Word(printables, exclude_chars=",") - + Opt(White(" \t") + ~FollowedBy(LineEnd() | ",")) - ) - ) - .streamline() - .set_name("commaItem") - ) - comma_separated_list = delimited_list( - Opt(quoted_string.copy() | _commasepitem, default="") - ).set_name("comma separated list") - """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" - - upcase_tokens = staticmethod(token_map(lambda t: t.upper())) - """Parse action to convert tokens to upper case.""" - - downcase_tokens = staticmethod(token_map(lambda t: t.lower())) - """Parse action to convert tokens to lower case.""" - - # fmt: off - url = Regex( - # https://mathiasbynens.be/demo/url-regex - # https://gist.github.com/dperini/729294 - r"^" + - # protocol identifier (optional) - # short syntax // still required - r"(?:(?:(?P<scheme>https?|ftp):)?\/\/)" + - # user:pass BasicAuth (optional) - r"(?:(?P<auth>\S+(?::\S*)?)@)?" + - r"(?P<host>" + - # IP address exclusion - # private & local networks - r"(?!(?:10|127)(?:\.\d{1,3}){3})" + - r"(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})" + - r"(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})" + - # IP address dotted notation octets - # excludes loopback network 0.0.0.0 - # excludes reserved space >= 224.0.0.0 - # excludes network & broadcast addresses - # (first & last IP address of each class) - r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])" + - r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}" + - r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))" + - r"|" + - # host & domain names, may end with dot - # can be replaced by a shortest alternative - # (?![-_])(?:[-\w\u00a1-\uffff]{0,63}[^-_]\.)+ - r"(?:" + - r"(?:" + - r"[a-z0-9\u00a1-\uffff]" + - r"[a-z0-9\u00a1-\uffff_-]{0,62}" + - r")?" + - r"[a-z0-9\u00a1-\uffff]\." + - r")+" + - # TLD identifier name, may end with dot - r"(?:[a-z\u00a1-\uffff]{2,}\.?)" + - r")" + - # port number (optional) - r"(:(?P<port>\d{2,5}))?" + - # resource path (optional) - r"(?P<path>\/[^?# ]*)?" + - # query string (optional) - r"(\?(?P<query>[^#]*))?" + - # fragment (optional) - r"(#(?P<fragment>\S*))?" + - r"$" - ).set_name("url") - # fmt: on - - # pre-PEP8 compatibility names - convertToInteger = convert_to_integer - convertToFloat = convert_to_float - convertToDate = convert_to_date - convertToDatetime = convert_to_datetime - stripHTMLTags = strip_html_tags - upcaseTokens = upcase_tokens - downcaseTokens = downcase_tokens - - -_builtin_exprs = [ - v for v in vars(pyparsing_common).values() if isinstance(v, ParserElement) -] |