class unicode_set: """
A set of Unicode characters, for language-specific strings for
``alphas``, ``nums``, ``alphanums``, and ``printables``.
A unicode_set is defined by a list of ranges in the Unicode character
set, in a class attribute ``_ranges``. Ranges can be specified using
2-tuples or a 1-tuple, such as::
Ranges are left- and right-inclusive. A 1-tuple of (x,) is treated as (x, x).
A unicode set can also be defined using multiple inheritance of other unicode sets::
class CJK(Chinese, Japanese, Korean): pass """
_ranges: UnicodeRangeList = []
@_lazyclassproperty def _chars_for_ranges(cls):
ret = [] for cc in cls.__mro__: if cc is unicode_set: break for rr in getattr(cc, "_ranges", ()):
ret.extend(range(rr[0], rr[-1] + 1)) return [chr(c) for c in sorted(set(ret))]
@_lazyclassproperty def printables(cls): """all non-whitespace characters in this range""" return"".join(filterfalse(str.isspace, cls._chars_for_ranges))
@_lazyclassproperty def alphas(cls): """all alphabetic characters in this range""" return"".join(filter(str.isalpha, cls._chars_for_ranges))
@_lazyclassproperty def nums(cls): """all numeric digit characters in this range""" return"".join(filter(str.isdigit, cls._chars_for_ranges))
@_lazyclassproperty def alphanums(cls): """all alphanumeric characters in this range""" return cls.alphas + cls.nums
@_lazyclassproperty def identchars(cls): """all characters in this range that are valid identifier characters, plus underscore '_'""" return"".join(
sorted(
set( "".join(filter(str.isidentifier, cls._chars_for_ranges))
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzªµº"
+ "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ"
+ "_"
)
)
)
@_lazyclassproperty def identbodychars(cls): """
all characters in this range that are valid identifier body characters,
plus the digits 0-9, and · (Unicode MIDDLE DOT) """ return"".join(
sorted(
set(
cls.identchars
+ "0123456789·"
+ "".join(
[c for c in cls._chars_for_ranges if ("_" + c).isidentifier()]
)
)
)
)
@_lazyclassproperty def identifier(cls): """
a pyparsing Word expression for an identifier using this range's definitions for
identchars and identbodychars """ from pip._vendor.pyparsing import Word
return Word(cls.identchars, cls.identbodychars)
class pyparsing_unicode(unicode_set): """
A namespace classfor defining common language unicode_sets. """
# fmt: off
# define ranges in language character sets
_ranges: UnicodeRangeList = [
(0x0020, sys.maxunicode),
]
class BasicMultilingualPlane(unicode_set): """Unicode set for the Basic Multilingual Plane"""
_ranges: UnicodeRangeList = [
(0x0020, 0xFFFF),
]
class Latin1(unicode_set): """Unicode set for Latin-1 Unicode Character Range"""
_ranges: UnicodeRangeList = [
(0x0020, 0x007E),
(0x00A0, 0x00FF),
]
class LatinA(unicode_set): """Unicode set for Latin-A Unicode Character Range"""
_ranges: UnicodeRangeList = [
(0x0100, 0x017F),
]
class LatinB(unicode_set): """Unicode set for Latin-B Unicode Character Range"""
_ranges: UnicodeRangeList = [
(0x0180, 0x024F),
]
class CJK(Chinese, Japanese, Hangul): """Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range"""
class Thai(unicode_set): """Unicode set for Thai Unicode Character Range"""
_ranges: UnicodeRangeList = [
(0x0E01, 0x0E3A),
(0x0E3F, 0x0E5B)
]
class Arabic(unicode_set): """Unicode set for Arabic Unicode Character Range"""
_ranges: UnicodeRangeList = [
(0x0600, 0x061B),
(0x061E, 0x06FF),
(0x0700, 0x077F),
]
class Hebrew(unicode_set): """Unicode set for Hebrew Unicode Character Range"""
_ranges: UnicodeRangeList = [
(0x0591, 0x05C7),
(0x05D0, 0x05EA),
(0x05EF, 0x05F4),
(0xFB1D, 0xFB36),
(0xFB38, 0xFB3C),
(0xFB3E,),
(0xFB40, 0xFB41),
(0xFB43, 0xFB44),
(0xFB46, 0xFB4F),
]
class Devanagari(unicode_set): """Unicode set for Devanagari Unicode Character Range"""
_ranges: UnicodeRangeList = [
(0x0900, 0x097F),
(0xA8E0, 0xA8FF)
]
BMP = BasicMultilingualPlane
# add language identifiers using language Unicode
العربية = Arabic
中文 = Chinese
кириллица = Cyrillic
Ελληνικά = Greek
עִברִית = Hebrew
日本語 = Japanese
한국어 = Korean
ไทย = Thai
देवनागरी = Devanagari
# fmt: on
Messung V0.5
¤ Dauer der Verarbeitung: 0.12 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.