/** * An object that matches a fixed input string, implementing the * UnicodeMatcher API. This object also implements the * UnicodeReplacer API, allowing it to emit the matched text as * output. Since the match text may contain flexible match elements, * such as UnicodeSets, the emitted text is not the match pattern, but * instead a substring of the actual matched text. Following * convention, the output text is the leftmost match seen up to this * point. * * A StringMatcher may represent a segment, in which case it has a * positive segment number. This affects how the matcher converts * itself to a pattern but does not otherwise affect its function. * * A StringMatcher that is not a segment should not be used as a * UnicodeReplacer.
*/ class StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public UnicodeReplacer {
public:
/** * Construct a matcher that matches the given pattern string. * @param string the pattern to be matched, possibly containing * stand-ins that represent nested UnicodeMatcher objects. * @param start inclusive start index of text to be replaced * @param limit exclusive end index of text to be replaced; * must be greater than or equal to start * @param segmentNum the segment number from 1..n, or 0 if this is * not a segment. * @param data context object mapping stand-ins to * UnicodeMatcher objects.
*/
StringMatcher(const UnicodeString& string,
int32_t start,
int32_t limit,
int32_t segmentNum, const TransliterationRuleData& data);
/** * Copy constructor * @param o the object to be copied.
*/
StringMatcher(const StringMatcher& o);
/** * Destructor
*/ virtual ~StringMatcher();
/** * Implement UnicodeFunctor * @return a copy of the object.
*/ virtual StringMatcher* clone() const override;
/** * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer * and return the pointer. * @return the UnicodeMatcher point.
*/ virtual UnicodeMatcher* toMatcher() const override;
/** * UnicodeFunctor API. Cast 'this' to a UnicodeReplacer* pointer * and return the pointer. * @return the UnicodeReplacer pointer.
*/ virtual UnicodeReplacer* toReplacer() const override;
/** * Implement UnicodeMatcher * @param text the text to be matched * @param offset on input, the index into text at which to begin * matching. On output, the limit of the matched text. The * number of matched characters is the output value of offset * minus the input value. Offset should always point to the * HIGH SURROGATE (leading code unit) of a pair of surrogates, * both on entry and upon return. * @param limit the limit index of text to be matched. Greater * than offset for a forward direction match, less than offset for * a backward direction match. The last character to be * considered for matching will be text.charAt(limit-1) in the * forward direction or text.charAt(limit+1) in the backward * direction. * @param incremental if true, then assume further characters may * be inserted at limit and check for partial matching. Otherwise * assume the text as given is complete. * @return a match degree value indicating a full match, a partial * match, or a mismatch. If incremental is false then * U_PARTIAL_MATCH should never be returned.
*/ virtual UMatchDegree matches(const Replaceable& text,
int32_t& offset,
int32_t limit,
UBool incremental) override;
/** * Implement UnicodeMatcher * @param result Output param to receive the pattern. * @param escapeUnprintable if True then escape the unprintable characters. * @return A reference to 'result'.
*/ virtual UnicodeString& toPattern(UnicodeString& result,
UBool escapeUnprintable = false) const override;
/** * Implement UnicodeMatcher * Returns true if this matcher will match a character c, where c * & 0xFF == v, at offset, in the forward direction (with limit > * offset). This is used by <tt>RuleBasedTransliterator</tt> for * indexing. * @param v the given value * @return true if this matcher will match a character c, * where c & 0xFF == v
*/ virtual UBool matchesIndexValue(uint8_t v) const override;
/** * Replace characters in 'text' from 'start' to 'limit' with the * output text of this object. Update the 'cursor' parameter to * give the cursor position and return the length of the * replacement text. * * @param text the text to be matched * @param start inclusive start index of text to be replaced * @param limit exclusive end index of text to be replaced; * must be greater than or equal to start * @param cursor output parameter for the cursor position. * Not all replacer objects will update this, but in a complete * tree of replacer objects, representing the entire output side * of a transliteration rule, at least one must update it. * @return the number of 16-bit code units in the text replacing * the characters at offsets start..(limit-1) in text
*/ virtual int32_t replace(Replaceable& text,
int32_t start,
int32_t limit,
int32_t& cursor) override;
/** * Returns a string representation of this replacer. If the * result of calling this function is passed to the appropriate * parser, typically TransliteratorParser, it will produce another * replacer that is equal to this one. * @param result the string to receive the pattern. Previous * contents will be deleted. * @param escapeUnprintable if true then convert unprintable * character to their hex escape representations, \\uxxxx or * \\Uxxxxxxxx. Unprintable characters are defined by * Utility.isUnprintable(). * @return a reference to 'result'.
*/ virtual UnicodeString& toReplacerPattern(UnicodeString& result,
UBool escapeUnprintable) const override;
/** * Remove any match data. This must be called before performing a * set of matches with this segment.
*/ void resetMatch();
/** * ICU "poor man's RTTI", returns a UClassID for the actual class.
*/ virtual UClassID getDynamicClassID() const override;
/** * ICU "poor man's RTTI", returns a UClassID for this class.
*/ static UClassID U_EXPORT2 getStaticClassID();
/** * Union the set of all characters that may output by this object * into the given set. * @param toUnionTo the set into which to union the output characters
*/ virtualvoid addReplacementSetTo(UnicodeSet& toUnionTo) const override;
private:
/** * The text to be matched.
*/
UnicodeString pattern;
/** * Context object that maps stand-ins to matcher and replacer * objects.
*/ const TransliterationRuleData* data;
/** * The segment number, 1-based, or 0 if not a segment.
*/
int32_t segmentNumber;
/** * Start offset, in the match text, of the <em>rightmost</em> * match.
*/
int32_t matchStart;
/** * Limit offset, in the match text, of the <em>rightmost</em> * match.
*/
int32_t matchLimit;
};
U_NAMESPACE_END
#endif/* #if !UCONFIG_NO_TRANSLITERATION */
#endif
Messung V0.5
¤ Dauer der Verarbeitung: 0.10 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.