#include"unicode/uobject.h" #include"unicode/rbbi.h" #include"unicode/uniset.h" #include"unicode/parseerr.h" #include"uhash.h" #include"uvector.h" #include"unicode/symtable.h"// For UnicodeSet parsing, is the interface that // looks up references to $variables within a set.
U_NAMESPACE_BEGIN
class RBBIRuleScanner; struct RBBIRuleTableEl; class RBBISetBuilder; class RBBINode; class RBBITableBuilder;
//-------------------------------------------------------------------------------- // // RBBISymbolTable. Implements SymbolTable interface that is used by the // UnicodeSet parser to resolve references to $variables. // //-------------------------------------------------------------------------------- class RBBISymbolTableEntry : public UMemory { // The symbol table hash table contains one public: // of these structs for each entry.
RBBISymbolTableEntry();
UnicodeString key;
RBBINode *val;
~RBBISymbolTableEntry();
private:
RBBISymbolTableEntry(const RBBISymbolTableEntry &other) = delete; // forbid copying of this class
RBBISymbolTableEntry &operator=(const RBBISymbolTableEntry &other) = delete; // forbid copying of this class
};
class RBBISymbolTable : public UMemory, public SymbolTable { private: const UnicodeString &fRules;
UHashtable *fHashTable;
RBBIRuleScanner *fRuleScanner;
// These next two fields are part of the mechanism for passing references to // already-constructed UnicodeSets back to the UnicodeSet constructor // when the pattern includes $variable references. const UnicodeString ffffString; // = "/uffff"
UnicodeSet *fCachedSetLookup;
#ifdef RBBI_DEBUG virtualvoid rbbiSymtablePrint() const; #else // A do-nothing inline function for non-debug builds. Member funcs can't be empty // or the call sites won't compile.
int32_t fFakeField; #define rbbiSymtablePrint() fFakeField=0; #endif
private:
RBBISymbolTable(const RBBISymbolTable &other); // forbid copying of this class
RBBISymbolTable &operator=(const RBBISymbolTable &other); // forbid copying of this class
};
//-------------------------------------------------------------------------------- // // class RBBIRuleBuilder The top-level class handling RBBI rule compiling. // //-------------------------------------------------------------------------------- class RBBIRuleBuilder : public UMemory { public:
// Create a rule based break iterator from a set of rules. // This function is the main entry point into the rule builder. The // public ICU API for creating RBBIs uses this function to do the actual work. // static BreakIterator * createRuleBasedBreakIterator( const UnicodeString &rules,
UParseError *parseError,
UErrorCode &status);
public: // The "public" functions and data members that appear below are accessed // (and shared) by the various parts that make up the rule builder. They // are NOT intended to be accessed by anything outside of the // rule builder implementation.
RBBIRuleBuilder(const UnicodeString &rules,
UParseError *parseErr,
UErrorCode &status
);
virtual ~RBBIRuleBuilder();
/** * Build the state tables and char class Trie from the source rules.
*/
RBBIDataHeader *build(UErrorCode &status);
/** * Fold together redundant character classes (table columns) and * redundant states (table rows). Done after initial table generation, * before serializing the result.
*/ void optimizeTables();
char *fDebugEnv; // controls debug trace output
UErrorCode *fStatus; // Error reporting. Keeping status
UParseError *fParseError; // here avoids passing it everywhere. const UnicodeString &fRules; // The rule string that we are compiling
UnicodeString fStrippedRules; // The rule string, with comments stripped.
RBBIRuleScanner *fScanner; // The scanner.
RBBINode *fForwardTree; // The parse trees, generated by the scanner,
RBBINode *fReverseTree; // then manipulated by subsequent steps.
RBBINode *fSafeFwdTree;
RBBINode *fSafeRevTree;
RBBINode **fDefaultTree; // For rules not qualified with a ! // the tree to which they belong to.
UBool fChainRules; // True for chained Unicode TR style rules. // False for traditional regexp rules.
UBool fLookAheadHardBreak; // True: Look ahead matches cause an // immediate break, no continuing for the // longest match.
RBBISetBuilder *fSetBuilder; // Set and Character Category builder.
UVector *fUSetNodes; // Vector of all uset nodes.
RBBITableBuilder *fForwardTable; // State transition table, build time form.
UVector *fRuleStatusVals; // The values that can be returned // from getRuleStatus().
RBBIDataHeader *flattenData(); // Create the flattened (runtime format) // data tables.. private:
RBBIRuleBuilder(const RBBIRuleBuilder &other) = delete; // forbid copying of this class
RBBIRuleBuilder &operator=(const RBBIRuleBuilder &other) = delete; // forbid copying of this class
};
//---------------------------------------------------------------------------- // // RBBISetTableEl is an entry in the hash table of UnicodeSets that have // been encountered. The val Node will be of nodetype uset // and contain pointers to the actual UnicodeSets. // The Key is the source string for initializing the set. // // The hash table is used to avoid creating duplicate // unnamed (not $var references) UnicodeSets. // // Memory Management: // The Hash Table owns these RBBISetTableEl structs and // the key strings. It does NOT own the val nodes. // //---------------------------------------------------------------------------- struct RBBISetTableEl {
UnicodeString *key;
RBBINode *val;
};
/** * A pair of ints, used to bundle pairs of states or pairs of character classes.
*/ typedef std::pair<int32_t, int32_t> IntPair;
//---------------------------------------------------------------------------- // // RBBIDebugPrintf Printf equivalent, for debugging output. // Conditional compilation of the implementation lets us // get rid of the stdio dependency in environments where it // is unavailable. // //---------------------------------------------------------------------------- #ifdef RBBI_DEBUG #include <stdio.h> #define RBBIDebugPrintf printf #define RBBIDebugPuts puts #else #undef RBBIDebugPrintf #define RBBIDebugPuts(arg) #endif
U_NAMESPACE_END
#endif/* #if !UCONFIG_NO_BREAK_ITERATION */
#endif
Messung V0.5
¤ Dauer der Verarbeitung: 0.10 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.