umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status); if (U_FAILURE(*status))
{ return nullptr;
}
SpoofData *sd = new SpoofData(data, length, *status); if (sd == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR; return nullptr;
}
if (U_FAILURE(*status)) { delete sd; return nullptr;
}
SpoofImpl *si = new SpoofImpl(sd, *status); if (si == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR; delete sd; // explicit delete as the destructor for si won't be called. return nullptr;
}
if (U_FAILURE(*status)) { delete si; // no delete for sd, as the si destructor will delete it. return nullptr;
}
// Verify that the requested checks are all ones (bits) that // are acceptable, known values. if (checks & ~(USPOOF_ALL_CHECKS | USPOOF_AUX_INFO)) {
*status = U_ILLEGAL_ARGUMENT_ERROR; return;
}
U_CAPI int32_t U_EXPORT2
uspoof_areConfusableUnicodeString(const USpoofChecker *sc, const icu::UnicodeString &id1, const icu::UnicodeString &id2,
UErrorCode *status) { const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); if (U_FAILURE(*status)) { return 0;
} // // See section 4 of UAX 39 for the algorithm for checking whether two strings are confusable, // and for definitions of the types (single, whole, mixed-script) of confusables.
// We only care about a few of the check flags. Ignore the others. // If no tests relevant to this function have been specified, return an error. // TODO: is this really the right thing to do? It's probably an error on the caller's part, // but logically we would just return 0 (no error). if ((This->fChecks & USPOOF_CONFUSABLE) == 0) {
*status = U_INVALID_STATE_ERROR; return 0;
}
// Compute the skeletons and check for confusability.
UnicodeString id1Skeleton;
uspoof_getSkeletonUnicodeString(sc, 0 /* deprecated */, id1, id1Skeleton, status);
UnicodeString id2Skeleton;
uspoof_getSkeletonUnicodeString(sc, 0 /* deprecated */, id2, id2Skeleton, status); if (U_FAILURE(*status)) { return 0; } if (id1Skeleton != id2Skeleton) { return 0;
}
// If we get here, the strings are confusable. Now we just need to set the flags for the appropriate classes // of confusables according to UTS 39 section 4. // Start by computing the resolved script sets of id1 and id2.
ScriptSet id1RSS;
This->getResolvedScriptSet(id1, id1RSS, *status);
ScriptSet id2RSS;
This->getResolvedScriptSet(id2, id2RSS, *status);
// Turn on all applicable flags
int32_t result = 0; if (id1RSS.intersects(id2RSS)) {
result |= USPOOF_SINGLE_SCRIPT_CONFUSABLE;
} else {
result |= USPOOF_MIXED_SCRIPT_CONFUSABLE; if (!id1RSS.isEmpty() && !id2RSS.isEmpty()) {
result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
}
}
// Turn off flags that the user doesn't want if ((This->fChecks & USPOOF_SINGLE_SCRIPT_CONFUSABLE) == 0) {
result &= ~USPOOF_SINGLE_SCRIPT_CONFUSABLE;
} if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) == 0) {
result &= ~USPOOF_MIXED_SCRIPT_CONFUSABLE;
} if ((This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE) == 0) {
result &= ~USPOOF_WHOLE_SCRIPT_CONFUSABLE;
}
U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUnicodeString(const USpoofChecker *sc,
UBiDiDirection direction, const icu::UnicodeString &id1, const icu::UnicodeString &id2,
UErrorCode *status) { const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); if (U_FAILURE(*status)) { return 0;
} // // See section 4 of UTS 39 for the algorithm for checking whether two strings are confusable, // and for definitions of the types (single, whole, mixed-script) of confusables.
// We only care about a few of the check flags. Ignore the others. // If no tests relevant to this function have been specified, return an error. // TODO: is this really the right thing to do? It's probably an error on the caller's part, // but logically we would just return 0 (no error). if ((This->fChecks & USPOOF_CONFUSABLE) == 0) {
*status = U_INVALID_STATE_ERROR; return 0;
}
// Compute the skeletons and check for confusability.
UnicodeString id1Skeleton;
uspoof_getBidiSkeletonUnicodeString(sc, direction, id1, id1Skeleton, status);
UnicodeString id2Skeleton;
uspoof_getBidiSkeletonUnicodeString(sc, direction, id2, id2Skeleton, status); if (U_FAILURE(*status)) { return 0;
} if (id1Skeleton != id2Skeleton) { return 0;
}
// If we get here, the strings are confusable. Now we just need to set the flags for the appropriate // classes of confusables according to UTS 39 section 4. Start by computing the resolved script sets // of id1 and id2.
ScriptSet id1RSS;
This->getResolvedScriptSet(id1, id1RSS, *status);
ScriptSet id2RSS;
This->getResolvedScriptSet(id2, id2RSS, *status);
// Turn on all applicable flags
uint32_t result = 0; if (id1RSS.intersects(id2RSS)) {
result |= USPOOF_SINGLE_SCRIPT_CONFUSABLE;
} else {
result |= USPOOF_MIXED_SCRIPT_CONFUSABLE; if (!id1RSS.isEmpty() && !id2RSS.isEmpty()) {
result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
}
}
// Turn off flags that the user doesn't want return result & This->fChecks;
}
if (0 != (This->fChecks & USPOOF_RESTRICTION_LEVEL)) {
URestrictionLevel idRestrictionLevel = This->getRestrictionLevel(id, *status); if (idRestrictionLevel > This->fRestrictionLevel) {
result |= USPOOF_RESTRICTION_LEVEL;
}
checkResult->fRestrictionLevel = idRestrictionLevel;
}
if (0 != (This->fChecks & USPOOF_MIXED_NUMBERS)) {
UnicodeSet numerics;
This->getNumerics(id, numerics, *status); if (numerics.size() > 1) {
result |= USPOOF_MIXED_NUMBERS;
}
checkResult->fNumerics = numerics; // UnicodeSet::operator=
}
if (0 != (This->fChecks & USPOOF_HIDDEN_OVERLAY)) {
int32_t index = This->findHiddenOverlay(id, *status); if (index != -1) {
result |= USPOOF_HIDDEN_OVERLAY;
}
}
if (0 != (This->fChecks & USPOOF_CHAR_LIMIT)) {
int32_t i;
UChar32 c;
int32_t length = id.length(); for (i=0; i<length ;) {
c = id.char32At(i);
i += U16_LENGTH(c); if (!This->fAllowedCharsSet->contains(c)) {
result |= USPOOF_CHAR_LIMIT; break;
}
}
}
if (0 != (This->fChecks & USPOOF_INVISIBLE)) { // This check needs to be done on NFD input
UnicodeString nfdText;
gNfdNormalizer->normalize(id, nfdText, *status);
int32_t nfdLength = nfdText.length();
// scan for more than one occurrence of the same non-spacing mark // in a sequence of non-spacing marks.
int32_t i;
UChar32 c;
UChar32 firstNonspacingMark = 0;
UBool haveMultipleMarks = false;
UnicodeSet marksSeenSoFar; // Set of combining marks in a single combining sequence.
for (i=0; i<nfdLength ;) {
c = nfdText.char32At(i);
i += U16_LENGTH(c); if (u_charType(c) != U_NON_SPACING_MARK) {
firstNonspacingMark = 0; if (haveMultipleMarks) {
marksSeenSoFar.clear();
haveMultipleMarks = false;
} continue;
} if (firstNonspacingMark == 0) {
firstNonspacingMark = c; continue;
} if (!haveMultipleMarks) {
marksSeenSoFar.add(firstNonspacingMark);
haveMultipleMarks = true;
} if (marksSeenSoFar.contains(c)) { // report the error, and stop scanning. // No need to find more than the first failure.
result |= USPOOF_INVISIBLE; break;
}
marksSeenSoFar.add(c);
}
}
// The type parameter is deprecated since ICU 58; any number may be passed.
constexpr uint32_t deprecatedType = 58; return uspoof_getSkeletonUnicodeString(sc, deprecatedType, reordered, dest, status);
}
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.2Bemerkung:
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.