diff options
Diffstat (limited to 'env/lib/python3.10/site-packages/pyparsing/unicode.py')
-rw-r--r-- | env/lib/python3.10/site-packages/pyparsing/unicode.py | 352 |
1 files changed, 0 insertions, 352 deletions
diff --git a/env/lib/python3.10/site-packages/pyparsing/unicode.py b/env/lib/python3.10/site-packages/pyparsing/unicode.py deleted file mode 100644 index 0652620..0000000 --- a/env/lib/python3.10/site-packages/pyparsing/unicode.py +++ /dev/null @@ -1,352 +0,0 @@ -# unicode.py - -import sys -from itertools import filterfalse -from typing import List, Tuple, Union - - -class _lazyclassproperty: - def __init__(self, fn): - self.fn = fn - self.__doc__ = fn.__doc__ - self.__name__ = fn.__name__ - - def __get__(self, obj, cls): - if cls is None: - cls = type(obj) - if not hasattr(cls, "_intern") or any( - cls._intern is getattr(superclass, "_intern", []) - for superclass in cls.__mro__[1:] - ): - cls._intern = {} - attrname = self.fn.__name__ - if attrname not in cls._intern: - cls._intern[attrname] = self.fn(cls) - return cls._intern[attrname] - - -UnicodeRangeList = List[Union[Tuple[int, int], Tuple[int]]] - - -class unicode_set: - """ - A set of Unicode characters, for language-specific strings for - ``alphas``, ``nums``, ``alphanums``, and ``printables``. - A unicode_set is defined by a list of ranges in the Unicode character - set, in a class attribute ``_ranges``. Ranges can be specified using - 2-tuples or a 1-tuple, such as:: - - _ranges = [ - (0x0020, 0x007e), - (0x00a0, 0x00ff), - (0x0100,), - ] - - Ranges are left- and right-inclusive. A 1-tuple of (x,) is treated as (x, x). - - A unicode set can also be defined using multiple inheritance of other unicode sets:: - - class CJK(Chinese, Japanese, Korean): - pass - """ - - _ranges: UnicodeRangeList = [] - - @_lazyclassproperty - def _chars_for_ranges(cls): - ret = [] - for cc in cls.__mro__: - if cc is unicode_set: - break - for rr in getattr(cc, "_ranges", ()): - ret.extend(range(rr[0], rr[-1] + 1)) - return [chr(c) for c in sorted(set(ret))] - - @_lazyclassproperty - def printables(cls): - "all non-whitespace characters in this range" - return "".join(filterfalse(str.isspace, cls._chars_for_ranges)) - - @_lazyclassproperty - def alphas(cls): - "all alphabetic characters in this range" - return "".join(filter(str.isalpha, cls._chars_for_ranges)) - - @_lazyclassproperty - def nums(cls): - "all numeric digit characters in this range" - return "".join(filter(str.isdigit, cls._chars_for_ranges)) - - @_lazyclassproperty - def alphanums(cls): - "all alphanumeric characters in this range" - return cls.alphas + cls.nums - - @_lazyclassproperty - def identchars(cls): - "all characters in this range that are valid identifier characters, plus underscore '_'" - return "".join( - sorted( - set( - "".join(filter(str.isidentifier, cls._chars_for_ranges)) - + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzªµº" - + "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ" - + "_" - ) - ) - ) - - @_lazyclassproperty - def identbodychars(cls): - """ - all characters in this range that are valid identifier body characters, - plus the digits 0-9 - """ - return "".join( - sorted( - set( - cls.identchars - + "0123456789" - + "".join( - [c for c in cls._chars_for_ranges if ("_" + c).isidentifier()] - ) - ) - ) - ) - - -class pyparsing_unicode(unicode_set): - """ - A namespace class for defining common language unicode_sets. - """ - - # fmt: off - - # define ranges in language character sets - _ranges: UnicodeRangeList = [ - (0x0020, sys.maxunicode), - ] - - class BasicMultilingualPlane(unicode_set): - "Unicode set for the Basic Multilingual Plane" - _ranges: UnicodeRangeList = [ - (0x0020, 0xFFFF), - ] - - class Latin1(unicode_set): - "Unicode set for Latin-1 Unicode Character Range" - _ranges: UnicodeRangeList = [ - (0x0020, 0x007E), - (0x00A0, 0x00FF), - ] - - class LatinA(unicode_set): - "Unicode set for Latin-A Unicode Character Range" - _ranges: UnicodeRangeList = [ - (0x0100, 0x017F), - ] - - class LatinB(unicode_set): - "Unicode set for Latin-B Unicode Character Range" - _ranges: UnicodeRangeList = [ - (0x0180, 0x024F), - ] - - class Greek(unicode_set): - "Unicode set for Greek Unicode Character Ranges" - _ranges: UnicodeRangeList = [ - (0x0342, 0x0345), - (0x0370, 0x0377), - (0x037A, 0x037F), - (0x0384, 0x038A), - (0x038C,), - (0x038E, 0x03A1), - (0x03A3, 0x03E1), - (0x03F0, 0x03FF), - (0x1D26, 0x1D2A), - (0x1D5E,), - (0x1D60,), - (0x1D66, 0x1D6A), - (0x1F00, 0x1F15), - (0x1F18, 0x1F1D), - (0x1F20, 0x1F45), - (0x1F48, 0x1F4D), - (0x1F50, 0x1F57), - (0x1F59,), - (0x1F5B,), - (0x1F5D,), - (0x1F5F, 0x1F7D), - (0x1F80, 0x1FB4), - (0x1FB6, 0x1FC4), - (0x1FC6, 0x1FD3), - (0x1FD6, 0x1FDB), - (0x1FDD, 0x1FEF), - (0x1FF2, 0x1FF4), - (0x1FF6, 0x1FFE), - (0x2129,), - (0x2719, 0x271A), - (0xAB65,), - (0x10140, 0x1018D), - (0x101A0,), - (0x1D200, 0x1D245), - (0x1F7A1, 0x1F7A7), - ] - - class Cyrillic(unicode_set): - "Unicode set for Cyrillic Unicode Character Range" - _ranges: UnicodeRangeList = [ - (0x0400, 0x052F), - (0x1C80, 0x1C88), - (0x1D2B,), - (0x1D78,), - (0x2DE0, 0x2DFF), - (0xA640, 0xA672), - (0xA674, 0xA69F), - (0xFE2E, 0xFE2F), - ] - - class Chinese(unicode_set): - "Unicode set for Chinese Unicode Character Range" - _ranges: UnicodeRangeList = [ - (0x2E80, 0x2E99), - (0x2E9B, 0x2EF3), - (0x31C0, 0x31E3), - (0x3400, 0x4DB5), - (0x4E00, 0x9FEF), - (0xA700, 0xA707), - (0xF900, 0xFA6D), - (0xFA70, 0xFAD9), - (0x16FE2, 0x16FE3), - (0x1F210, 0x1F212), - (0x1F214, 0x1F23B), - (0x1F240, 0x1F248), - (0x20000, 0x2A6D6), - (0x2A700, 0x2B734), - (0x2B740, 0x2B81D), - (0x2B820, 0x2CEA1), - (0x2CEB0, 0x2EBE0), - (0x2F800, 0x2FA1D), - ] - - class Japanese(unicode_set): - "Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges" - _ranges: UnicodeRangeList = [] - - class Kanji(unicode_set): - "Unicode set for Kanji Unicode Character Range" - _ranges: UnicodeRangeList = [ - (0x4E00, 0x9FBF), - (0x3000, 0x303F), - ] - - class Hiragana(unicode_set): - "Unicode set for Hiragana Unicode Character Range" - _ranges: UnicodeRangeList = [ - (0x3041, 0x3096), - (0x3099, 0x30A0), - (0x30FC,), - (0xFF70,), - (0x1B001,), - (0x1B150, 0x1B152), - (0x1F200,), - ] - - class Katakana(unicode_set): - "Unicode set for Katakana Unicode Character Range" - _ranges: UnicodeRangeList = [ - (0x3099, 0x309C), - (0x30A0, 0x30FF), - (0x31F0, 0x31FF), - (0x32D0, 0x32FE), - (0xFF65, 0xFF9F), - (0x1B000,), - (0x1B164, 0x1B167), - (0x1F201, 0x1F202), - (0x1F213,), - ] - - class Hangul(unicode_set): - "Unicode set for Hangul (Korean) Unicode Character Range" - _ranges: UnicodeRangeList = [ - (0x1100, 0x11FF), - (0x302E, 0x302F), - (0x3131, 0x318E), - (0x3200, 0x321C), - (0x3260, 0x327B), - (0x327E,), - (0xA960, 0xA97C), - (0xAC00, 0xD7A3), - (0xD7B0, 0xD7C6), - (0xD7CB, 0xD7FB), - (0xFFA0, 0xFFBE), - (0xFFC2, 0xFFC7), - (0xFFCA, 0xFFCF), - (0xFFD2, 0xFFD7), - (0xFFDA, 0xFFDC), - ] - - Korean = Hangul - - class CJK(Chinese, Japanese, Hangul): - "Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range" - - class Thai(unicode_set): - "Unicode set for Thai Unicode Character Range" - _ranges: UnicodeRangeList = [ - (0x0E01, 0x0E3A), - (0x0E3F, 0x0E5B) - ] - - class Arabic(unicode_set): - "Unicode set for Arabic Unicode Character Range" - _ranges: UnicodeRangeList = [ - (0x0600, 0x061B), - (0x061E, 0x06FF), - (0x0700, 0x077F), - ] - - class Hebrew(unicode_set): - "Unicode set for Hebrew Unicode Character Range" - _ranges: UnicodeRangeList = [ - (0x0591, 0x05C7), - (0x05D0, 0x05EA), - (0x05EF, 0x05F4), - (0xFB1D, 0xFB36), - (0xFB38, 0xFB3C), - (0xFB3E,), - (0xFB40, 0xFB41), - (0xFB43, 0xFB44), - (0xFB46, 0xFB4F), - ] - - class Devanagari(unicode_set): - "Unicode set for Devanagari Unicode Character Range" - _ranges: UnicodeRangeList = [ - (0x0900, 0x097F), - (0xA8E0, 0xA8FF) - ] - - # fmt: on - - -pyparsing_unicode.Japanese._ranges = ( - pyparsing_unicode.Japanese.Kanji._ranges - + pyparsing_unicode.Japanese.Hiragana._ranges - + pyparsing_unicode.Japanese.Katakana._ranges -) - -pyparsing_unicode.BMP = pyparsing_unicode.BasicMultilingualPlane - -# add language identifiers using language Unicode -pyparsing_unicode.العربية = pyparsing_unicode.Arabic -pyparsing_unicode.中文 = pyparsing_unicode.Chinese -pyparsing_unicode.кириллица = pyparsing_unicode.Cyrillic -pyparsing_unicode.Ελληνικά = pyparsing_unicode.Greek -pyparsing_unicode.עִברִית = pyparsing_unicode.Hebrew -pyparsing_unicode.日本語 = pyparsing_unicode.Japanese -pyparsing_unicode.Japanese.漢字 = pyparsing_unicode.Japanese.Kanji -pyparsing_unicode.Japanese.カタカナ = pyparsing_unicode.Japanese.Katakana -pyparsing_unicode.Japanese.ひらがな = pyparsing_unicode.Japanese.Hiragana -pyparsing_unicode.한국어 = pyparsing_unicode.Korean -pyparsing_unicode.ไทย = pyparsing_unicode.Thai -pyparsing_unicode.देवनागरी = pyparsing_unicode.Devanagari |