/* mask off anything but primary order */ #define UCOL_PRIMARYORDERMASK 0xffff0000 /* mask off anything but secondary order */ #define UCOL_SECONDARYORDERMASK 0x0000ff00 /* mask off anything but tertiary order */ #define UCOL_TERTIARYORDERMASK 0x000000ff /* primary order shift */ #define UCOL_PRIMARYORDERSHIFT 16 /* secondary order shift */ #define UCOL_SECONDARYORDERSHIFT 8
#define UCOL_IGNORABLE 0
/* get weights from a CE */ #define UCOL_PRIMARYORDER(order) (((order) >> 16) & 0xffff) #define UCOL_SECONDARYORDER(order) (((order) & UCOL_SECONDARYORDERMASK)>> UCOL_SECONDARYORDERSHIFT) #define UCOL_TERTIARYORDER(order) ((order) & UCOL_TERTIARYORDERMASK)
/** * This indicates an error has occurred during processing or there are no more CEs * to be returned.
*/ #define UCOL_PROCESSED_NULLORDER ((int64_t)U_INT64_MAX)
/** * Get the processed ordering priority of the next collation element in the text. * A single character may contain more than one collation element. * * @param ixLow a pointer to an int32_t to receive the iterator index before fetching the CE. * @param ixHigh a pointer to an int32_t to receive the iterator index after fetching the CE. * @param status A pointer to an UErrorCode to receive any errors. * @return The next collation elements ordering, otherwise returns UCOL_PROCESSED_NULLORDER * if an error has occurred or if the end of string has been reached
*/
int64_t nextProcessed(int32_t *ixLow, int32_t *ixHigh, UErrorCode *status); /** * Get the processed ordering priority of the previous collation element in the text. * A single character may contain more than one collation element. * * @param ixLow A pointer to an int32_t to receive the iterator index after fetching the CE * @param ixHigh A pointer to an int32_t to receiver the iterator index before fetching the CE * @param status A pointer to an UErrorCode to receive any errors. Notably * a U_BUFFER_OVERFLOW_ERROR is returned if the internal stack * buffer has been exhausted. * @return The previous collation elements ordering, otherwise returns * UCOL_PROCESSED_NULLORDER if an error has occurred or if the start of * string has been reached.
*/
int64_t previousProcessed(int32_t *ixLow, int32_t *ixHigh, UErrorCode *status);
struct USearch { // required since collation element iterator does not have a getText API const UChar *text;
int32_t textLength; // exact length
UBool isOverlap;
UBool isCanonicalMatch;
int16_t elementComparisonType;
UBreakIterator *internalBreakIter; // internal character breakiterator, lazily created.
UBreakIterator *breakIter; // caller provided character breakiterator // value USEARCH_DONE is the default value // if we are not at the start of the text or the end of the text, // depending on the iteration direction and matchedIndex is USEARCH_DONE // it means that we can't find any more matches in that particular direction
int32_t matchedIndex;
int32_t matchedLength;
UBool isForwardSearching;
UBool reset;
};
struct UStringSearch { struct USearch *search; struct UPattern pattern; const UCollator *collator; const icu::Normalizer2 *nfd; // positions within the collation element iterator is used to determine // if we are at the start of the text.
UCollationElements *textIter;
icu::UCollationPCE *textProcessedIter; // utility collation element, used throughout program for temporary // iteration.
UCollationElements *utilIter;
UBool ownCollator;
UCollationStrength strength;
uint32_t ceMask;
uint32_t variableTop;
UBool toShift;
};
/** * Exact matches without checking for the ends for extra accents. * The match after the position within the collation element iterator is to be * found. * After a match is found the offset in the collation element iterator will be * shifted to the start of the match. * Implementation note: * For tertiary we can't use the collator->tertiaryMask, that is a * preprocessed mask that takes into account case options. since we are only * concerned with exact matches, we don't need that. * Alternate handling - since only the 16 most significant digits is only used, * we can safely do a compare without masking if the ce is a variable, we mask * and get only the primary values no shifting to quartenary is required since * all primary values less than variabletop will need to be masked off anyway. * If the end character is composite and the pattern ce does not match the text * ce, we skip it until we find a match in the end composite character or when * it has passed the character. This is so that we can match pattern "a" with * the text "\u00e6" * @param strsrch string search data * @param status error status if any * @return true if an exact match is found, false otherwise
*/
U_CFUNC
UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status);
/** * Canonical matches. * According to the definition, matches found here will include the whole span * of beginning and ending accents if it overlaps that region. * @param strsrch string search data * @param status error status if any * @return true if a canonical match is found, false otherwise
*/
U_CFUNC
UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status);
/** * Gets the previous match. * Comments follows from handleNextExact * @param strsrch string search data * @param status error status if any * @return True if a exact math is found, false otherwise.
*/
U_CFUNC
UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status);
/** * Canonical matches. * According to the definition, matches found here will include the whole span * of beginning and ending accents if it overlaps that region. * @param strsrch string search data * @param status error status if any * @return true if a canonical match is found, false otherwise
*/
U_CFUNC
UBool usearch_handlePreviousCanonical(UStringSearch *strsrch,
UErrorCode *status);
#endif/* #if !UCONFIG_NO_COLLATION */
#endif
Messung V0.5
¤ Dauer der Verarbeitung: 0.0 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.