static UBool isIDSUnaryOperator(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { // New in Unicode 15.1 for just two characters. return 0x2FFE<=c && c<=0x2FFF;
}
U_CAPI UBool U_EXPORT2
u_hasBinaryProperty(UChar32 c, UProperty which) { /* c is range-checked in the functions that are called from here */ if(which<UCHAR_BINARY_START || UCHAR_BINARY_LIMIT<=which) { /* not a known binary property */ returnfalse;
} else { const BinaryProperty &prop=binProps[which]; return prop.contains(prop, c, which);
}
}
/* Checks if the Unicode character can start a Unicode identifier.*/
U_CAPI UBool U_EXPORT2
u_isIDStart(UChar32 c) { return u_hasBinaryProperty(c, UCHAR_ID_START);
}
/* Checks if the Unicode character can be a Unicode identifier part other than starting the
identifier.*/
U_CAPI UBool U_EXPORT2
u_isIDPart(UChar32 c) { return u_hasBinaryProperty(c, UCHAR_ID_CONTINUE);
}
U_CAPI UBool U_EXPORT2
u_stringHasBinaryProperty(const char16_t *s, int32_t length, UProperty which) { if (s == nullptr && length != 0) { returnfalse; } if (length == 1) { return u_hasBinaryProperty(s[0], which); // single code point
} elseif (length == 2 || (length < 0 && *s != 0)) { // not empty string // first code point
int32_t i = 0;
UChar32 c;
U16_NEXT(s, i, length, c); if (length > 0 ? i == length : s[i] == 0) { return u_hasBinaryProperty(c, which); // single code point
}
} // Only call into EmojiProps for a relevant property, // so that we not unnecessarily try to load its data file. return UCHAR_BASIC_EMOJI <= which && which <= UCHAR_RGI_EMOJI &&
EmojiProps::hasBinaryProperty(s, length, which);
}
/* * Map some of the Grapheme Cluster Break values to Hangul Syllable Types. * Hangul_Syllable_Type used to be fully redundant with a subset of Grapheme_Cluster_Break. * * Starting with Unicode 16, this is no longer true for HST=V vs. GCB=V in some cases: * Some Kirat Rai vowels are given GCB=V for proper grapheme clustering, but * they are of course not related to Hangul syllables.
*/ staticconst UHangulSyllableType gcbToHst[]={
U_HST_NOT_APPLICABLE, /* U_GCB_OTHER */
U_HST_NOT_APPLICABLE, /* U_GCB_CONTROL */
U_HST_NOT_APPLICABLE, /* U_GCB_CR */
U_HST_NOT_APPLICABLE, /* U_GCB_EXTEND */
U_HST_LEADING_JAMO, /* U_GCB_L */
U_HST_NOT_APPLICABLE, /* U_GCB_LF */
U_HST_LV_SYLLABLE, /* U_GCB_LV */
U_HST_LVT_SYLLABLE, /* U_GCB_LVT */
U_HST_TRAILING_JAMO, /* U_GCB_T */
U_HST_VOWEL_JAMO /* U_GCB_V */ /* * Omit GCB values beyond what we need for hst. * The code below checks for the array length.
*/
};
static int32_t getHangulSyllableType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { // Ignore supplementary code points: They all have HST=NA. // This is a simple way to handle the GCB!=hst cases since Unicode 16 (Kirat Rai vowels). if(c>0xffff) { return U_HST_NOT_APPLICABLE;
} /* see comments on gcbToHst[] above */
int32_t gcb = static_cast<int32_t>(u_getUnicodeProperties(c, 2) & UPROPS_GCB_MASK) >> UPROPS_GCB_SHIFT; if(gcb<UPRV_LENGTHOF(gcbToHst)) { return gcbToHst[gcb];
} else { return U_HST_NOT_APPLICABLE;
}
}
case UCHAR_BIDI_MIRRORING_GLYPH: return UPROPS_SRC_BIDI;
case UCHAR_CASE_FOLDING: case UCHAR_LOWERCASE_MAPPING: case UCHAR_SIMPLE_CASE_FOLDING: case UCHAR_SIMPLE_LOWERCASE_MAPPING: case UCHAR_SIMPLE_TITLECASE_MAPPING: case UCHAR_SIMPLE_UPPERCASE_MAPPING: case UCHAR_TITLECASE_MAPPING: case UCHAR_UPPERCASE_MAPPING: return UPROPS_SRC_CASE;
case UCHAR_ISO_COMMENT: case UCHAR_NAME: case UCHAR_UNICODE_1_NAME: return UPROPS_SRC_NAMES;
U_CFUNC void U_EXPORT2
uprops_addPropertyStarts(UPropertySource src, const USetAdder *sa, UErrorCode *pErrorCode) { if (U_FAILURE(*pErrorCode)) { return; } if (src == UPROPS_SRC_ID_COMPAT_MATH) { // range limits for (UChar32 c : ID_COMPAT_MATH_CONTINUE) {
sa->add(sa->set, c);
} // single characters for (UChar32 c : ID_COMPAT_MATH_START) {
sa->add(sa->set, c);
sa->add(sa->set, c + 1);
} return;
} if (src == UPROPS_SRC_MCM) { // range limits for (UChar32 c : MODIFIER_COMBINING_MARK) {
sa->add(sa->set, c);
} return;
} if (!ulayout_ensureData(*pErrorCode)) { return; } const UCPTrie *trie; switch (src) { case UPROPS_SRC_INPC:
trie = gInpcTrie; break; case UPROPS_SRC_INSC:
trie = gInscTrie; break; case UPROPS_SRC_VO:
trie = gVoTrie; break; default:
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; return;
}
if (trie == nullptr) {
*pErrorCode = U_MISSING_RESOURCE_ERROR; return;
}
// Add the start code point of each same-value range of the trie.
UChar32 start = 0, end; while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0,
nullptr, nullptr, nullptr)) >= 0) {
sa->add(sa->set, start);
start = end + 1;
}
}
U_CAPI bool U_EXPORT2
u_hasIDType(UChar32 c, UIdentifierType type) {
uint32_t typeIndex = type; // also guards against negative type integers if (typeIndex >= UPRV_LENGTHOF(uprops_idTypeToEncoded)) { returnfalse;
}
uint32_t encodedType = uprops_idTypeToEncoded[typeIndex];
uint32_t value = u_getUnicodeProperties(c, 2) >> UPROPS_2_ID_TYPE_SHIFT; if ((encodedType & UPROPS_ID_TYPE_BIT) != 0) { return value < UPROPS_ID_TYPE_FORBIDDEN && (value & encodedType) != 0;
} else { return value == encodedType;
}
}
U_CAPI int32_t U_EXPORT2
u_getIDTypes(UChar32 c, UIdentifierType *types, int32_t capacity, UErrorCode *pErrorCode) { if (U_FAILURE(*pErrorCode)) { return 0; } if (capacity < 0 || (capacity > 0 && types == nullptr)) {
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; return 0;
}
uint32_t value = u_getUnicodeProperties(c, 2) >> UPROPS_2_ID_TYPE_SHIFT; if ((value & UPROPS_ID_TYPE_FORBIDDEN) == UPROPS_ID_TYPE_FORBIDDEN ||
value == UPROPS_ID_TYPE_NOT_CHARACTER) { // single value if (capacity > 0) {
UIdentifierType t; switch (value) { case UPROPS_ID_TYPE_NOT_CHARACTER: t = U_ID_TYPE_NOT_CHARACTER; break; case UPROPS_ID_TYPE_DEPRECATED: t = U_ID_TYPE_DEPRECATED; break; case UPROPS_ID_TYPE_DEFAULT_IGNORABLE: t = U_ID_TYPE_DEFAULT_IGNORABLE; break; case UPROPS_ID_TYPE_NOT_NFKC: t = U_ID_TYPE_NOT_NFKC; break; case UPROPS_ID_TYPE_INCLUSION: t = U_ID_TYPE_INCLUSION; break; case UPROPS_ID_TYPE_RECOMMENDED: t = U_ID_TYPE_RECOMMENDED; break; default:
*pErrorCode = U_INVALID_FORMAT_ERROR; return 0;
}
types[0] = t;
} else {
*pErrorCode = U_BUFFER_OVERFLOW_ERROR;
} return 1;
} else { // one or more combinable bits
int32_t length = 0;
maybeAppendType(value, UPROPS_ID_TYPE_NOT_XID, U_ID_TYPE_NOT_XID,
types, length, capacity);
maybeAppendType(value, UPROPS_ID_TYPE_EXCLUSION, U_ID_TYPE_EXCLUSION,
types, length, capacity);
maybeAppendType(value, UPROPS_ID_TYPE_OBSOLETE, U_ID_TYPE_OBSOLETE,
types, length, capacity);
maybeAppendType(value, UPROPS_ID_TYPE_TECHNICAL, U_ID_TYPE_TECHNICAL,
types, length, capacity);
maybeAppendType(value, UPROPS_ID_TYPE_UNCOMMON_USE, U_ID_TYPE_UNCOMMON_USE,
types, length, capacity);
maybeAppendType(value, UPROPS_ID_TYPE_LIMITED_USE, U_ID_TYPE_LIMITED_USE,
types, length, capacity); if (length >= capacity) {
*pErrorCode = U_BUFFER_OVERFLOW_ERROR;
} return length;
}
}
#if !UCONFIG_NO_NORMALIZATION
U_CAPI int32_t U_EXPORT2
u_getFC_NFKC_Closure(UChar32 c, char16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode) { if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) { return 0;
} if(destCapacity<0 || (dest==nullptr && destCapacity>0)) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0;
} // Compute the FC_NFKC_Closure on the fly: // We have the API for complete coverage of Unicode properties, although // this value by itself is not useful via API. // (What could be useful is a custom normalization table that combines // case folding and NFKC.) // For the derivation, see Unicode's DerivedNormalizationProps.txt. const Normalizer2 *nfkc=Normalizer2::getNFKCInstance(*pErrorCode); if(U_FAILURE(*pErrorCode)) { return 0;
} // first: b = NFKC(Fold(a))
UnicodeString folded1String; const char16_t *folded1;
int32_t folded1Length=ucase_toFullFolding(c, &folded1, U_FOLD_CASE_DEFAULT); if(folded1Length<0) { const Normalizer2Impl *nfkcImpl=Normalizer2Factory::getImpl(nfkc); if(nfkcImpl->getCompQuickCheck(nfkcImpl->getNorm16(c))!=UNORM_NO) { return u_terminateUChars(dest, destCapacity, 0, pErrorCode); // c does not change at all under CaseFolding+NFKC
}
folded1String.setTo(c);
} else { if(folded1Length>UCASE_MAX_STRING_LENGTH) {
folded1String.setTo(folded1Length);
} else {
folded1String.setTo(false, folded1, folded1Length);
}
}
UnicodeString kc1=nfkc->normalize(folded1String, *pErrorCode); // second: c = NFKC(Fold(b))
UnicodeString folded2String(kc1);
UnicodeString kc2=nfkc->normalize(folded2String.foldCase(), *pErrorCode); // if (c != b) add the mapping from a to c if(U_FAILURE(*pErrorCode) || kc1==kc2) { return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
} else { return kc2.extract(dest, destCapacity, *pErrorCode);
}
}
#endif
Messung V0.5
¤ Dauer der Verarbeitung: 0.16 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.