class IDNAError(UnicodeError): """ Base exception for all IDNA-encoding related problems """ pass
class IDNABidiError(IDNAError): """ Exception when bidirectional requirements are not satisfied """ pass
class InvalidCodepoint(IDNAError): """ Exception when a disallowed or unallocated codepoint is used """ pass
class InvalidCodepointContext(IDNAError): """ Exception when the codepoint is not valid in the context it is used """ pass
def _combining_class(cp: int) -> int:
v = unicodedata.combining(chr(cp)) if v == 0: ifnot unicodedata.name(chr(cp)): raise ValueError('Unknown character in unicodedata') return v
def valid_string_length(label: Union[bytes, str], trailing_dot: bool) -> bool: if len(label) > (254 if trailing_dot else 253): returnFalse returnTrue
def check_bidi(label: str, check_ltr: bool = False) -> bool: # Bidi rules should only be applied if string contains RTL characters
bidi_label = False for (idx, cp) in enumerate(label, 1):
direction = unicodedata.bidirectional(cp) if direction == '': # String likely comes from a newer version of Unicode raise IDNABidiError('Unknown directionality in label {} at position {}'.format(repr(label), idx)) if direction in ['R', 'AL', 'AN']:
bidi_label = True ifnot bidi_label andnot check_ltr: returnTrue
# Bidi rule 1
direction = unicodedata.bidirectional(label[0]) if direction in ['R', 'AL']:
rtl = True elif direction == 'L':
rtl = False else: raise IDNABidiError('First codepoint in label {} must be directionality L, R or AL'.format(repr(label)))
valid_ending = False
number_type = None# type: Optional[str] for (idx, cp) in enumerate(label, 1):
direction = unicodedata.bidirectional(cp)
if rtl: # Bidi rule 2 ifnot direction in ['R', 'AL', 'AN', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']: raise IDNABidiError('Invalid direction for codepoint at position {} in a right-to-left label'.format(idx)) # Bidi rule 3 if direction in ['R', 'AL', 'EN', 'AN']:
valid_ending = True elif direction != 'NSM':
valid_ending = False # Bidi rule 4 if direction in ['AN', 'EN']: ifnot number_type:
number_type = direction else: if number_type != direction: raise IDNABidiError('Can not mix numeral types in a right-to-left label') else: # Bidi rule 5 ifnot direction in ['L', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']: raise IDNABidiError('Invalid direction for codepoint at position {} in a left-to-right label'.format(idx)) # Bidi rule 6 if direction in ['L', 'EN']:
valid_ending = True elif direction != 'NSM':
valid_ending = False
ifnot valid_ending: raise IDNABidiError('Label ends with illegal codepoint directionality')
returnTrue
def check_initial_combiner(label: str) -> bool: if unicodedata.category(label[0])[0] == 'M': raise IDNAError('Label begins with an illegal combining character') returnTrue
def check_hyphen_ok(label: str) -> bool: if label[2:4] == '--': raise IDNAError('Label has disallowed hyphens in 3rd and 4th position') if label[0] == '-'or label[-1] == '-': raise IDNAError('Label must not start or end with a hyphen') returnTrue
def check_nfc(label: str) -> None: if unicodedata.normalize('NFC', label) != label: raise IDNAError('Label must be in Normalization Form C')
if pos > 0: if _combining_class(ord(label[pos - 1])) == _virama_combining_class: returnTrue
ok = False for i in range(pos-1, -1, -1):
joining_type = idnadata.joining_types.get(ord(label[i])) if joining_type == ord('T'): continue if joining_type in [ord('L'), ord('D')]:
ok = True break
ifnot ok: returnFalse
ok = False for i in range(pos+1, len(label)):
joining_type = idnadata.joining_types.get(ord(label[i])) if joining_type == ord('T'): continue if joining_type in [ord('R'), ord('D')]:
ok = True break return ok
if cp_value == 0x200d:
if pos > 0: if _combining_class(ord(label[pos - 1])) == _virama_combining_class: returnTrue returnFalse
if cp_value == 0x00b7: if 0 < pos < len(label)-1: if ord(label[pos - 1]) == 0x006c and ord(label[pos + 1]) == 0x006c: returnTrue returnFalse
elif cp_value == 0x0375: if pos < len(label)-1 and len(label) > 1: return _is_script(label[pos + 1], 'Greek') returnFalse
elif cp_value == 0x05f3 or cp_value == 0x05f4: if pos > 0: return _is_script(label[pos - 1], 'Hebrew') returnFalse
elif cp_value == 0x30fb: for cp in label: if cp == '\u30fb': continue if _is_script(cp, 'Hiragana') or _is_script(cp, 'Katakana') or _is_script(cp, 'Han'): returnTrue returnFalse
elif 0x660 <= cp_value <= 0x669: for cp in label: if 0x6f0 <= ord(cp) <= 0x06f9: returnFalse returnTrue
elif 0x6f0 <= cp_value <= 0x6f9: for cp in label: if 0x660 <= ord(cp) <= 0x0669: returnFalse returnTrue
for (pos, cp) in enumerate(label):
cp_value = ord(cp) if intranges_contain(cp_value, idnadata.codepoint_classes['PVALID']): continue elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTJ']): try: ifnot valid_contextj(label, pos): raise InvalidCodepointContext('Joiner {} not allowed at position {} in {}'.format(
_unot(cp_value), pos+1, repr(label))) except ValueError: raise IDNAError('Unknown codepoint adjacent to joiner {} at position {} in {}'.format(
_unot(cp_value), pos+1, repr(label))) elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTO']): ifnot valid_contexto(label, pos): raise InvalidCodepointContext('Codepoint {} not allowed at position {} in {}'.format(_unot(cp_value), pos+1, repr(label))) else: raise InvalidCodepoint('Codepoint {} at position {} of {} not allowed'.format(_unot(cp_value), pos+1, repr(label)))
label_bytes = label_bytes.lower() if label_bytes.startswith(_alabel_prefix):
label_bytes = label_bytes[len(_alabel_prefix):] ifnot label_bytes: raise IDNAError('Malformed A-label, no Punycode eligible content found') if label_bytes.decode('ascii')[-1] == '-': raise IDNAError('A-label must not end with a hyphen') else:
check_label(label_bytes) return label_bytes.decode('ascii')
def uts46_remap(domain: str, std3_rules: bool = True, transitional: bool = False) -> str: """Re-map the characters in the string according to UTS46 processing.""" from .uts46data import uts46data
output = ''
for pos, char in enumerate(domain):
code_point = ord(char) try:
uts46row = uts46data[code_point if code_point < 256 else
bisect.bisect_left(uts46data, (code_point, 'Z')) - 1]
status = uts46row[1]
replacement = None# type: Optional[str] if len(uts46row) == 3:
replacement = uts46row[2] # type: ignore if (status == 'V'or
(status == 'D'andnot transitional) or
(status == '3'andnot std3_rules and replacement isNone)):
output += char elif replacement isnotNoneand (status == 'M'or
(status == '3'andnot std3_rules) or
(status == 'D'and transitional)):
output += replacement elif status != 'I': raise IndexError() except IndexError: raise InvalidCodepoint( 'Codepoint {} not allowed at position {} in {}'.format(
_unot(code_point), pos + 1, repr(domain)))
return unicodedata.normalize('NFC', output)
def encode(s: Union[str, bytes, bytearray], strict: bool = False, uts46: bool = False, std3_rules: bool = False, transitional: bool = False) -> bytes: if isinstance(s, (bytes, bytearray)): try:
s = s.decode('ascii') except UnicodeDecodeError: raise IDNAError('should pass a unicode string to the function rather than a byte string.') if uts46:
s = uts46_remap(s, std3_rules, transitional)
trailing_dot = False
result = [] if strict:
labels = s.split('.') else:
labels = _unicode_dots_re.split(s) ifnot labels or labels == ['']: raise IDNAError('Empty domain') if labels[-1] == '': del labels[-1]
trailing_dot = True for label in labels:
s = alabel(label) if s:
result.append(s) else: raise IDNAError('Empty label') if trailing_dot:
result.append(b'')
s = b'.'.join(result) ifnot valid_string_length(s, trailing_dot): raise IDNAError('Domain too long') return s
def decode(s: Union[str, bytes, bytearray], strict: bool = False, uts46: bool = False, std3_rules: bool = False) -> str: try: if isinstance(s, (bytes, bytearray)):
s = s.decode('ascii') except UnicodeDecodeError: raise IDNAError('Invalid ASCII in A-label') if uts46:
s = uts46_remap(s, std3_rules, False)
trailing_dot = False
result = [] ifnot strict:
labels = _unicode_dots_re.split(s) else:
labels = s.split('.') ifnot labels or labels == ['']: raise IDNAError('Empty domain') ifnot labels[-1]: del labels[-1]
trailing_dot = True for label in labels:
s = ulabel(label) if s:
result.append(s) else: raise IDNAError('Empty label') if trailing_dot:
result.append('') return'.'.join(result)
Messung V0.5
¤ Dauer der Verarbeitung: 0.13 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.