# This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/.
import re from collections import namedtuple from unicodedata import category, combining, normalize
# Keep this function in sync with IsCombiningDiacritic in nsUnicodeProperties.h. def is_combining_diacritic(char): return combining(char) notin (
UNICODE_COMBINING_CLASS_NOT_REORDERED,
UNICODE_COMBINING_CLASS_KANA_VOICING,
UNICODE_COMBINING_CLASS_VIRAMA,
91,
129,
130,
132,
)
# Keep this function in sync with IsMathOrMusicSymbol in nsUnicodeProperties.h. def is_math_or_music_symbol(char): return category(char) in ("Sm", "So")
def changes_plane(char, base_char): # Mappings that would change the first 16 bits of a character are not # currently supported. This is because the mapping table only records the # last 16 bits of the base character and also because moving into or out of # the basic multilingual plane would change the length of a UTF-16 string. return ord(char) >> 16 != ord(base_char) >> 16
def main(header, fallback_table):
mappings = {}
# Glean mappings from decompositions
for char in range(UNICODE_LIMIT):
char = chr(char) if is_combining_diacritic(char) or is_math_or_music_symbol(char): continue
decomposition = normalize("NFD", char) if len(decomposition) < 2: continue
base_char = decomposition[0] if changes_plane(char, base_char): continue
next_char = decomposition[1] ifnot is_combining_diacritic(next_char): # Hangul syllables decompose but do not actually have diacritics. # This also excludes decompositions with the Japanese marks U+3099 # and U+309A (COMBINING KATAKANA-HIRAGANA [SEMI-]VOICED SOUND # MARK), which we should not ignore for searching (bug 1624244). continue
mappings[char] = base_char
# Add mappings from the ASCII fallback table
for line in open(fallback_table, encoding="UTF-8"):
m = re.match("^(.) → (.+?) ;", line) ifnot m: continue
char = m.group(1)
decomposition = m.group(2) if len(decomposition) >= 3: if decomposition.startswith("'") and decomposition.endswith("'"):
decomposition = decomposition[1:-1] if len(decomposition) >= 2: if decomposition.startswith("\\"):
decomposition = decomposition[1:] if len(decomposition) > 1: continue if changes_plane(char, decomposition): continue
mappings[char] = decomposition
# Organize mappings into contiguous blocks
mappings = sorted([BaseCharMapping(ord(k), ord(v)) for k, v in mappings.items()])
blocks = []
i = 0 while i < len(mappings) - 1:
offset = i
first = mappings[i].char & 0xFF while (
i < len(mappings) - 1 and mappings[i].char >> 8 == mappings[i + 1].char >> 8
): while (
i < len(mappings) - 1 and mappings[i].char >> 8 == mappings[i + 1].char >> 8 and mappings[i + 1].char - mappings[i].char > 1
):
char = mappings[i].char + 1
mappings.insert(i + 1, BaseCharMapping(char, char))
i += 1
i += 1
last = mappings[i].char & 0xFF
blocks.append(BaseCharMappingBlock(first, last, offset))
i += 1
indexes = [] for i, block in enumerate(blocks): while len(indexes) < mappings[block.offset].char >> 8:
indexes.append(255)
indexes.append(i)
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.