// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
* Copyright (C) 1997-2016, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
* File UCHAR.H
*
* Modification History:
*
* Date Name Description
* 04/02/97 aliu Creation.
* 03/29/99 helena Updated for C APIs.
* 4/15/99 Madhu Updated for C Implementation and Javadoc
* 5/20/99 Madhu Added the function u_getVersion()
* 8/19/1999 srl Upgraded scripts to Unicode 3.0
* 8/27/1999 schererm UCharDirection constants: U_...
* 11/11/1999 weiv added u_isalnum(), cleaned comments
* 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion().
******************************************************************************
*/
#ifndef UCHAR_H
#define UCHAR_H
#include <stdbool.h>
#include "unicode/utypes.h"
#include "unicode/stringoptions.h"
#include "unicode/ucpmap.h"
#if !
defined(USET_DEFINED) && !
defined(U_IN_DOXYGEN)
#define USET_DEFINED
/**
* USet is the C API type corresponding to C++ class UnicodeSet.
* It is forward-declared here to avoid including unicode/uset.h file if related
* APIs are not used.
*
* @see ucnv_getUnicodeSet
* @stable ICU 2.4
*/
typedef struct USet USet;
#endif
U_CDECL_BEGIN
/*==========================================================================*/
/* Unicode version number */
/*==========================================================================*/
/**
* Unicode version number, default for the current ICU version.
* The actual Unicode Character Database (UCD) data is stored in uprops.icu
* and may be generated from UCD files from a different Unicode version.
* Call u_getUnicodeVersion to get the actual Unicode version of the data.
*
* @see u_getUnicodeVersion
* @stable ICU 2.0
*/
#define U_UNICODE_VERSION
"16.0"
/**
* \file
* \brief C API: Unicode Properties
*
* This C API provides low-level access to the Unicode Character Database.
* In addition to raw property values, some convenience functions calculate
* derived properties, for example for Java-style programming.
*
* Unicode assigns each code point (not just assigned character) values for
* many properties.
* Most of them are simple boolean flags, or constants from a small enumerated list.
* For some properties, values are strings or other relatively more complex types.
*
* For more information see
* "About the Unicode Character Database" (http://www.unicode.org/ucd/)
* and the ICU User Guide chapter on Properties (https://unicode-org.github.io/icu/userguide/strings/properties).
*
* Many properties are accessible via generic functions that take a UProperty selector.
* - u_hasBinaryProperty() returns a binary value (true/false) per property and code point.
* - u_getIntPropertyValue() returns an integer value per property and code point.
* For each supported enumerated or catalog property, there is
* an enum type for all of the property's values, and
* u_getIntPropertyValue() returns the numeric values of those constants.
* - u_getBinaryPropertySet() returns a set for each ICU-supported binary property with
* all code points for which the property is true.
* - u_getIntPropertyMap() returns a map for each
* ICU-supported enumerated/catalog/int-valued property which
* maps all Unicode code points to their values for that property.
*
* Many functions are designed to match java.lang.Character functions.
* See the individual function documentation,
* and see the JDK 1.4 java.lang.Character documentation
* at http://java.sun.com/j2se/1.4/docs/api/java/lang/Character.html
*
* There are also functions that provide easy migration from C/POSIX functions
* like isblank(). Their use is generally discouraged because the C/POSIX
* standards do not define their semantics beyond the ASCII range, which means
* that different implementations exhibit very different behavior.
* Instead, Unicode properties should be used directly.
*
* There are also only a few, broad C/POSIX character classes, and they tend
* to be used for conflicting purposes. For example, the "isalpha()" class
* is sometimes used to determine word boundaries, while a more sophisticated
* approach would at least distinguish initial letters from continuation
* characters (the latter including combining marks).
* (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
* Another example: There is no "istitle()" class for titlecase characters.
*
* ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
* ICU implements them according to the Standard Recommendations in
* Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
* (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
*
* API access for C/POSIX character classes is as follows:
* - alpha: u_isUAlphabetic(c) or u_hasBinaryProperty(c, UCHAR_ALPHABETIC)
* - lower: u_isULowercase(c) or u_hasBinaryProperty(c, UCHAR_LOWERCASE)
* - upper: u_isUUppercase(c) or u_hasBinaryProperty(c, UCHAR_UPPERCASE)
* - punct: u_ispunct(c)
* - digit: u_isdigit(c) or u_charType(c)==U_DECIMAL_DIGIT_NUMBER
* - xdigit: u_isxdigit(c) or u_hasBinaryProperty(c, UCHAR_POSIX_XDIGIT)
* - alnum: u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM)
* - space: u_isUWhiteSpace(c) or u_hasBinaryProperty(c, UCHAR_WHITE_SPACE)
* - blank: u_isblank(c) or u_hasBinaryProperty(c, UCHAR_POSIX_BLANK)
* - cntrl: u_charType(c)==U_CONTROL_CHAR
* - graph: u_hasBinaryProperty(c, UCHAR_POSIX_GRAPH)
* - print: u_hasBinaryProperty(c, UCHAR_POSIX_PRINT)
*
* Note: Some of the u_isxyz() functions in uchar.h predate, and do not match,
* the Standard Recommendations in UTS #18. Instead, they match Java
* functions according to their API documentation.
*
* \htmlonly
* The C/POSIX character classes are also available in UnicodeSet patterns,
* using patterns like [:graph:] or \p{graph}.
* \endhtmlonly
*
* Note: There are several ICU whitespace functions.
* Comparison:
* - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
* most of general categories "Z" (separators) + most whitespace ISO controls
* (including no-break spaces, but excluding IS1..IS4)
* - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
* - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces)
* - u_isspace: Z + whitespace ISO controls (including no-break spaces)
* - u_isblank: "horizontal spaces" = TAB + Zs
*/
/**
* Constants.
*/
/** The lowest Unicode code point value. Code points are non-negative. @stable ICU 2.0 */
#define UCHAR_MIN_VALUE 0
/**
* The highest Unicode code point value (scalar value) according to
* The Unicode Standard. This is a 21-bit value (20.1 bits, rounded up).
* For a single character, UChar32 is a simple type that can hold any code point value.
*
* @see UChar32
* @stable ICU 2.0
*/
#define UCHAR_MAX_VALUE 0x10ffff
/**
* Get a single-bit bit set (a flag) from a bit number 0..31.
* @stable ICU 2.1
*/
#define U_MASK(x) ((uint32_t)1<<(x))
/**
* Selection constants for Unicode properties.
* These constants are used in functions like u_hasBinaryProperty to select
* one of the Unicode properties.
*
* The properties APIs are intended to reflect Unicode properties as defined
* in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
*
* For details about the properties see
* UAX #44: Unicode Character Database (http://www.unicode.org/reports/tr44/).
*
* Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2,
* then properties marked with "new in Unicode 3.2" are not or not fully available.
* Check u_getUnicodeVersion to be sure.
*
* @see u_hasBinaryProperty
* @see u_getIntPropertyValue
* @see u_getUnicodeVersion
* @stable ICU 2.1
*/
typedef enum UProperty {
/*
* Note: UProperty constants are parsed by preparseucd.py.
* It matches lines like
* UCHAR_<Unicode property name>=<integer>,
*/
/* Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that
debuggers display UCHAR_ALPHABETIC as the symbolic name for 0,
rather than UCHAR_BINARY_START. Likewise for other *_START
identifiers. */
/** Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha.
Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic @stable ICU 2.1 */
UCHAR_ALPHABETIC=0,
/** First constant for binary Unicode properties. @stable ICU 2.1 */
UCHAR_BINARY_START=UCHAR_ALPHABETIC,
/** Binary property ASCII_Hex_Digit. 0-9 A-F a-f @stable ICU 2.1 */
UCHAR_ASCII_HEX_DIGIT=1,
/** Binary property Bidi_Control.
Format controls which have specific functions
in the Bidi Algorithm. @stable ICU 2.1 */
UCHAR_BIDI_CONTROL=2,
/** Binary property Bidi_Mirrored.
Characters that may change display in RTL text.
Same as u_isMirrored.
See Bidi Algorithm, UTR 9. @stable ICU 2.1 */
UCHAR_BIDI_MIRRORED=3,
/** Binary property Dash. Variations of dashes. @stable ICU 2.1 */
UCHAR_DASH=4,
/** Binary property Default_Ignorable_Code_Point (new in Unicode 3.2).
Ignorable in most processing.
<2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space) @stable ICU 2.1 */
UCHAR_DEFAULT_IGNORABLE_CODE_POINT=5,
/** Binary property Deprecated (new in Unicode 3.2).
The usage of deprecated characters is strongly discouraged. @stable ICU 2.1 */
UCHAR_DEPRECATED=6,
/** Binary property Diacritic. Characters that linguistically modify
the meaning of another character to which they apply. @stable ICU 2.1 */
UCHAR_DIACRITIC=7,
/** Binary property Extender.
Extend the value or shape of a preceding alphabetic character,
e.g., length and iteration marks. @stable ICU 2.1 */
UCHAR_EXTENDER=8,
/** Binary property Full_Composition_Exclusion.
CompositionExclusions.txt+Singleton Decompositions+
Non-Starter Decompositions. @stable ICU 2.1 */
UCHAR_FULL_COMPOSITION_EXCLUSION=9,
/** Binary property Grapheme_Base (new in Unicode 3.2).
For programmatic determination of grapheme cluster boundaries.
[0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ @stable ICU 2.1 */
UCHAR_GRAPHEME_BASE=10,
/** Binary property Grapheme_Extend (new in Unicode 3.2).
For programmatic determination of grapheme cluster boundaries.
Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ @stable ICU 2.1 */
UCHAR_GRAPHEME_EXTEND=11,
/** Binary property Grapheme_Link (new in Unicode 3.2).
For programmatic determination of grapheme cluster boundaries. @stable ICU 2.1 */
UCHAR_GRAPHEME_LINK=12,
/** Binary property Hex_Digit.
Characters commonly used for hexadecimal numbers. @stable ICU 2.1 */
UCHAR_HEX_DIGIT=13,
/** Binary property Hyphen. Dashes used to mark connections
between pieces of words, plus the Katakana middle dot. @stable ICU 2.1 */
UCHAR_HYPHEN=14,
/** Binary property ID_Continue.
Characters that can continue an identifier.
DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out."
ID_Start+Mn+Mc+Nd+Pc @stable ICU 2.1 */
UCHAR_ID_CONTINUE=15,
/** Binary property ID_Start.
Characters that can start an identifier.
Lu+Ll+Lt+Lm+Lo+Nl @stable ICU 2.1 */
UCHAR_ID_START=16,
/** Binary property Ideographic.
CJKV ideographs. @stable ICU 2.1 */
UCHAR_IDEOGRAPHIC=17,
/** Binary property IDS_Binary_Operator (new in Unicode 3.2).
For programmatic determination of
Ideographic Description Sequences. @stable ICU 2.1 */
UCHAR_IDS_BINARY_OPERATOR=18,
/** Binary property IDS_Trinary_Operator (new in Unicode 3.2).
For programmatic determination of
Ideographic Description Sequences. @stable ICU 2.1 */
UCHAR_IDS_TRINARY_OPERATOR=19,
/** Binary property Join_Control.
Format controls for cursive joining and ligation. @stable ICU 2.1 */
UCHAR_JOIN_CONTROL=20,
/** Binary property Logical_Order_Exception (new in Unicode 3.2).
Characters that do not use logical order and
require special handling in most processing. @stable ICU 2.1 */
UCHAR_LOGICAL_ORDER_EXCEPTION=21,
/** Binary property Lowercase. Same as u_isULowercase, different from u_islower.
Ll+Other_Lowercase @stable ICU 2.1 */
UCHAR_LOWERCASE=22,
/** Binary property Math. Sm+Other_Math @stable ICU 2.1 */
UCHAR_MATH=23,
/** Binary property Noncharacter_Code_Point.
Code points that are explicitly defined as illegal
for the encoding of characters. @stable ICU 2.1 */
UCHAR_NONCHARACTER_CODE_POINT=24,
/** Binary property Quotation_Mark. @stable ICU 2.1 */
UCHAR_QUOTATION_MARK=25,
/** Binary property Radical (new in Unicode 3.2).
For programmatic determination of
Ideographic Description Sequences. @stable ICU 2.1 */
UCHAR_RADICAL=26,
/** Binary property Soft_Dotted (new in Unicode 3.2).
Characters with a "soft dot", like i or j.
An accent placed on these characters causes
the dot to disappear. @stable ICU 2.1 */
UCHAR_SOFT_DOTTED=27,
/** Binary property Terminal_Punctuation.
Punctuation characters that generally mark
the end of textual units. @stable ICU 2.1 */
UCHAR_TERMINAL_PUNCTUATION=28,
/** Binary property Unified_Ideograph (new in Unicode 3.2).
For programmatic determination of
Ideographic Description Sequences. @stable ICU 2.1 */
UCHAR_UNIFIED_IDEOGRAPH=29,
/** Binary property Uppercase. Same as u_isUUppercase, different from u_isupper.
Lu+Other_Uppercase @stable ICU 2.1 */
UCHAR_UPPERCASE=30,
/** Binary property White_Space.
Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace.
Space characters+TAB+CR+LF-ZWSP-ZWNBSP @stable ICU 2.1 */
UCHAR_WHITE_SPACE=31,
/** Binary property XID_Continue.
ID_Continue modified to allow closure under
normalization forms NFKC and NFKD. @stable ICU 2.1 */
UCHAR_XID_CONTINUE=32,
/** Binary property XID_Start. ID_Start modified to allow
closure under normalization forms NFKC and NFKD. @stable ICU 2.1 */
UCHAR_XID_START=33,
/** Binary property Case_Sensitive. Either the source of a case
mapping or _in_ the target of a case mapping. Not the same as
the general category Cased_Letter. @stable ICU 2.6 */
UCHAR_CASE_SENSITIVE=34,
/** Binary property STerm (new in Unicode 4.0.1).
Sentence Terminal. Used in UAX #29: Text Boundaries
(http://www.unicode.org/reports/tr29/)
@stable ICU 3.0 */
UCHAR_S_TERM=35,
/** Binary property Variation_Selector (new in Unicode 4.0.1).
Indicates all those characters that qualify as Variation Selectors.
For details on the behavior of these characters,
see StandardizedVariants.html and 15.6 Variation Selectors.
@stable ICU 3.0 */
UCHAR_VARIATION_SELECTOR=36,
/** Binary property NFD_Inert.
ICU-specific property for characters that are inert under NFD,
i.e., they do not interact with adjacent characters.
See the documentation for the Normalizer2 class and the
Normalizer2::isInert() method.
@stable ICU 3.0 */
UCHAR_NFD_INERT=37,
/** Binary property NFKD_Inert.
ICU-specific property for characters that are inert under NFKD,
i.e., they do not interact with adjacent characters.
See the documentation for the Normalizer2 class and the
Normalizer2::isInert() method.
@stable ICU 3.0 */
UCHAR_NFKD_INERT=38,
/** Binary property NFC_Inert.
ICU-specific property for characters that are inert under NFC,
i.e., they do not interact with adjacent characters.
See the documentation for the Normalizer2 class and the
Normalizer2::isInert() method.
@stable ICU 3.0 */
UCHAR_NFC_INERT=39,
/** Binary property NFKC_Inert.
ICU-specific property for characters that are inert under NFKC,
i.e., they do not interact with adjacent characters.
See the documentation for the Normalizer2 class and the
Normalizer2::isInert() method.
@stable ICU 3.0 */
UCHAR_NFKC_INERT=40,
/** Binary Property Segment_Starter.
ICU-specific property for characters that are starters in terms of
Unicode normalization and combining character sequences.
They have ccc=0 and do not occur in non-initial position of the
canonical decomposition of any character
(like a-umlaut in NFD and a Jamo T in an NFD(Hangul LVT)).
ICU uses this property for segmenting a string for generating a set of
canonically equivalent strings, e.g. for canonical closure while
processing collation tailoring rules.
@stable ICU 3.0 */
UCHAR_SEGMENT_STARTER=41,
/** Binary property Pattern_Syntax (new in Unicode 4.1).
See UAX #31 Identifier and Pattern Syntax
(http://www.unicode.org/reports/tr31/)
@stable ICU 3.4 */
UCHAR_PATTERN_SYNTAX=42,
/** Binary property Pattern_White_Space (new in Unicode 4.1).
See UAX #31 Identifier and Pattern Syntax
(http://www.unicode.org/reports/tr31/)
@stable ICU 3.4 */
UCHAR_PATTERN_WHITE_SPACE=43,
/** Binary property alnum (a C/POSIX character class).
Implemented according to the UTS #18 Annex C Standard Recommendation.
See the uchar.h file documentation.
@stable ICU 3.4 */
UCHAR_POSIX_ALNUM=44,
/** Binary property blank (a C/POSIX character class).
Implemented according to the UTS #18 Annex C Standard Recommendation.
See the uchar.h file documentation.
@stable ICU 3.4 */
UCHAR_POSIX_BLANK=45,
/** Binary property graph (a C/POSIX character class).
Implemented according to the UTS #18 Annex C Standard Recommendation.
See the uchar.h file documentation.
@stable ICU 3.4 */
UCHAR_POSIX_GRAPH=46,
/** Binary property print (a C/POSIX character class).
Implemented according to the UTS #18 Annex C Standard Recommendation.
See the uchar.h file documentation.
@stable ICU 3.4 */
UCHAR_POSIX_PRINT=47,
/** Binary property xdigit (a C/POSIX character class).
Implemented according to the UTS #18 Annex C Standard Recommendation.
See the uchar.h file documentation.
@stable ICU 3.4 */
UCHAR_POSIX_XDIGIT=48,
/** Binary property Cased. For Lowercase, Uppercase and Titlecase characters. @stable ICU 4.4 */
UCHAR_CASED=49,
/** Binary property Case_Ignorable. Used in context-sensitive case mappings. @stable ICU 4.4 */
UCHAR_CASE_IGNORABLE=50,
/** Binary property Changes_When_Lowercased. @stable ICU 4.4 */
UCHAR_CHANGES_WHEN_LOWERCASED=51,
/** Binary property Changes_When_Uppercased. @stable ICU 4.4 */
UCHAR_CHANGES_WHEN_UPPERCASED=52,
/** Binary property Changes_When_Titlecased. @stable ICU 4.4 */
UCHAR_CHANGES_WHEN_TITLECASED=53,
/** Binary property Changes_When_Casefolded. @stable ICU 4.4 */
UCHAR_CHANGES_WHEN_CASEFOLDED=54,
/** Binary property Changes_When_Casemapped. @stable ICU 4.4 */
UCHAR_CHANGES_WHEN_CASEMAPPED=55,
/** Binary property Changes_When_NFKC_Casefolded. @stable ICU 4.4 */
UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED=56,
/**
* Binary property Emoji.
* See http://www.unicode.org/reports/tr51/#Emoji_Properties
*
* @stable ICU 57
*/
UCHAR_EMOJI=57,
/**
* Binary property Emoji_Presentation.
* See http://www.unicode.org/reports/tr51/#Emoji_Properties
*
* @stable ICU 57
*/
UCHAR_EMOJI_PRESENTATION=58,
/**
* Binary property Emoji_Modifier.
* See http://www.unicode.org/reports/tr51/#Emoji_Properties
*
* @stable ICU 57
*/
UCHAR_EMOJI_MODIFIER=59,
/**
* Binary property Emoji_Modifier_Base.
* See http://www.unicode.org/reports/tr51/#Emoji_Properties
*
* @stable ICU 57
*/
UCHAR_EMOJI_MODIFIER_BASE=60,
/**
* Binary property Emoji_Component.
* See http://www.unicode.org/reports/tr51/#Emoji_Properties
*
* @stable ICU 60
*/
UCHAR_EMOJI_COMPONENT=61,
/**
* Binary property Regional_Indicator.
* @stable ICU 60
*/
UCHAR_REGIONAL_INDICATOR=62,
/**
* Binary property Prepended_Concatenation_Mark.
* @stable ICU 60
*/
UCHAR_PREPENDED_CONCATENATION_MARK=63,
/**
* Binary property Extended_Pictographic.
* See http://www.unicode.org/reports/tr51/#Emoji_Properties
*
* @stable ICU 62
*/
UCHAR_EXTENDED_PICTOGRAPHIC=64,
/**
* Binary property of strings Basic_Emoji.
* See https://www.unicode.org/reports/tr51/#Emoji_Sets
*
* @stable ICU 70
*/
UCHAR_BASIC_EMOJI=65,
/**
* Binary property of strings Emoji_Keycap_Sequence.
* See https://www.unicode.org/reports/tr51/#Emoji_Sets
*
* @stable ICU 70
*/
UCHAR_EMOJI_KEYCAP_SEQUENCE=66,
/**
* Binary property of strings RGI_Emoji_Modifier_Sequence.
* See https://www.unicode.org/reports/tr51/#Emoji_Sets
*
* @stable ICU 70
*/
UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE=67,
/**
* Binary property of strings RGI_Emoji_Flag_Sequence.
* See https://www.unicode.org/reports/tr51/#Emoji_Sets
*
* @stable ICU 70
*/
UCHAR_RGI_EMOJI_FLAG_SEQUENCE=68,
/**
* Binary property of strings RGI_Emoji_Tag_Sequence.
* See https://www.unicode.org/reports/tr51/#Emoji_Sets
*
* @stable ICU 70
*/
UCHAR_RGI_EMOJI_TAG_SEQUENCE=69,
/**
* Binary property of strings RGI_Emoji_ZWJ_Sequence.
* See https://www.unicode.org/reports/tr51/#Emoji_Sets
*
* @stable ICU 70
*/
UCHAR_RGI_EMOJI_ZWJ_SEQUENCE=70,
/**
* Binary property of strings RGI_Emoji.
* See https://www.unicode.org/reports/tr51/#Emoji_Sets
*
* @stable ICU 70
*/
UCHAR_RGI_EMOJI=71,
/**
* Binary property IDS_Unary_Operator.
* For programmatic determination of Ideographic Description Sequences.
*
* @stable ICU 74
*/
UCHAR_IDS_UNARY_OPERATOR=72,
/**
* Binary property ID_Compat_Math_Start.
* Used in mathematical identifier profile in UAX #31.
* @stable ICU 74
*/
UCHAR_ID_COMPAT_MATH_START=73,
/**
* Binary property ID_Compat_Math_Continue.
* Used in mathematical identifier profile in UAX #31.
* @stable ICU 74
*/
UCHAR_ID_COMPAT_MATH_CONTINUE=74,
#ifndef U_HIDE_DRAFT_API
/**
* Binary property Modifier_Combining_Mark.
* Used by the AMTRA algorithm in UAX #53.
* @draft ICU 76
*/
UCHAR_MODIFIER_COMBINING_MARK=75,
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the last constant for binary Unicode properties.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
UCHAR_BINARY_LIMIT=76,
#endif // U_HIDE_DEPRECATED_API
/** Enumerated property Bidi_Class.
Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */
UCHAR_BIDI_CLASS=0x1000,
/** First constant for enumerated/integer Unicode properties. @stable ICU 2.2 */
UCHAR_INT_START=UCHAR_BIDI_CLASS,
/** Enumerated property Block.
Same as ublock_getCode, returns UBlockCode values. @stable ICU 2.2 */
UCHAR_BLOCK=0x1001,
/** Enumerated property Canonical_Combining_Class.
Same as u_getCombiningClass, returns 8-bit numeric values. @stable ICU 2.2 */
UCHAR_CANONICAL_COMBINING_CLASS=0x1002,
/** Enumerated property Decomposition_Type.
Returns UDecompositionType values. @stable ICU 2.2 */
UCHAR_DECOMPOSITION_TYPE=0x1003,
/** Enumerated property East_Asian_Width.
See http://www.unicode.org/reports/tr11/
Returns UEastAsianWidth values. @stable ICU 2.2 */
UCHAR_EAST_ASIAN_WIDTH=0x1004,
/** Enumerated property General_Category.
Same as u_charType, returns UCharCategory values. @stable ICU 2.2 */
UCHAR_GENERAL_CATEGORY=0x1005,
/** Enumerated property Joining_Group.
Returns UJoiningGroup values. @stable ICU 2.2 */
UCHAR_JOINING_GROUP=0x1006,
/** Enumerated property Joining_Type.
Returns UJoiningType values. @stable ICU 2.2 */
UCHAR_JOINING_TYPE=0x1007,
/** Enumerated property Line_Break.
Returns ULineBreak values. @stable ICU 2.2 */
UCHAR_LINE_BREAK=0x1008,
/** Enumerated property Numeric_Type.
Returns UNumericType values. @stable ICU 2.2 */
UCHAR_NUMERIC_TYPE=0x1009,
/** Enumerated property Script.
Same as uscript_getScript, returns UScriptCode values. @stable ICU 2.2 */
UCHAR_SCRIPT=0x100A,
/** Enumerated property Hangul_Syllable_Type, new in Unicode 4.
Returns UHangulSyllableType values. @stable ICU 2.6 */
UCHAR_HANGUL_SYLLABLE_TYPE=0x100B,
/** Enumerated property NFD_Quick_Check.
Returns UNormalizationCheckResult values. @stable ICU 3.0 */
UCHAR_NFD_QUICK_CHECK=0x100C,
/** Enumerated property NFKD_Quick_Check.
Returns UNormalizationCheckResult values. @stable ICU 3.0 */
UCHAR_NFKD_QUICK_CHECK=0x100D,
/** Enumerated property NFC_Quick_Check.
Returns UNormalizationCheckResult values. @stable ICU 3.0 */
UCHAR_NFC_QUICK_CHECK=0x100E,
/** Enumerated property NFKC_Quick_Check.
Returns UNormalizationCheckResult values. @stable ICU 3.0 */
UCHAR_NFKC_QUICK_CHECK=0x100F,
/** Enumerated property Lead_Canonical_Combining_Class.
ICU-specific property for the ccc of the first code point
of the decomposition, or lccc(c)=ccc(NFD(c)[0]).
Useful for checking for canonically ordered text;
see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */
UCHAR_LEAD_CANONICAL_COMBINING_CLASS=0x1010,
/** Enumerated property Trail_Canonical_Combining_Class.
ICU-specific property for the ccc of the last code point
of the decomposition, or tccc(c)=ccc(NFD(c)[last]).
Useful for checking for canonically ordered text;
see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */
UCHAR_TRAIL_CANONICAL_COMBINING_CLASS=0x1011,
/** Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1).
Used in UAX #29: Text Boundaries
(http://www.unicode.org/reports/tr29/)
Returns UGraphemeClusterBreak values. @stable ICU 3.4 */
UCHAR_GRAPHEME_CLUSTER_BREAK=0x1012,
/** Enumerated property Sentence_Break (new in Unicode 4.1).
Used in UAX #29: Text Boundaries
(http://www.unicode.org/reports/tr29/)
Returns USentenceBreak values. @stable ICU 3.4 */
UCHAR_SENTENCE_BREAK=0x1013,
/** Enumerated property Word_Break (new in Unicode 4.1).
Used in UAX #29: Text Boundaries
(http://www.unicode.org/reports/tr29/)
Returns UWordBreakValues values. @stable ICU 3.4 */
UCHAR_WORD_BREAK=0x1014,
/** Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
Used in UAX #9: Unicode Bidirectional Algorithm
(http://www.unicode.org/reports/tr9/)
Returns UBidiPairedBracketType values. @stable ICU 52 */
UCHAR_BIDI_PAIRED_BRACKET_TYPE=0x1015,
/**
* Enumerated property Indic_Positional_Category.
* New in Unicode 6.0 as provisional property Indic_Matra_Category;
* renamed and changed to informative in Unicode 8.0.
* See http://www.unicode.org/reports/tr44/#IndicPositionalCategory.txt
* @stable ICU 63
*/
UCHAR_INDIC_POSITIONAL_CATEGORY=0x1016,
/**
* Enumerated property Indic_Syllabic_Category.
* New in Unicode 6.0 as provisional; informative since Unicode 8.0.
* See http://www.unicode.org/reports/tr44/#IndicSyllabicCategory.txt
* @stable ICU 63
*/
UCHAR_INDIC_SYLLABIC_CATEGORY=0x1017,
/**
* Enumerated property Vertical_Orientation.
* Used for UAX #50 Unicode Vertical Text Layout (https://www.unicode.org/reports/tr50/).
* New as a UCD property in Unicode 10.0.
* @stable ICU 63
*/
UCHAR_VERTICAL_ORIENTATION=0x1018,
#ifndef U_HIDE_DRAFT_API
/**
* Enumerated property Identifier_Status.
* Used for UTS #39 General Security Profile for Identifiers
* (https://www.unicode.org/reports/tr39/#General_Security_Profile).
* @draft ICU 75
*/
UCHAR_IDENTIFIER_STATUS=0x1019,
/**
* Enumerated property Indic_Conjunct_Break.
* Used in the grapheme cluster break algorithm in UAX #29.
* @draft ICU 76
*/
UCHAR_INDIC_CONJUNCT_BREAK=0x101A,
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the last constant for enumerated/integer Unicode properties.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
UCHAR_INT_LIMIT=0x101B,
#endif // U_HIDE_DEPRECATED_API
/** Bitmask property General_Category_Mask.
This is the General_Category property returned as a bit mask.
When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)),
returns bit masks for UCharCategory values where exactly one bit is set.
When used with u_getPropertyValueName() and u_getPropertyValueEnum(),
a multi-bit mask is used for sets of categories like "Letters".
Mask values should be cast to uint32_t.
@stable ICU 2.4 */
UCHAR_GENERAL_CATEGORY_MASK=0x2000,
/** First constant for bit-mask Unicode properties. @stable ICU 2.4 */
UCHAR_MASK_START=UCHAR_GENERAL_CATEGORY_MASK,
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the last constant for bit-mask Unicode properties.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
UCHAR_MASK_LIMIT=0x2001,
#endif // U_HIDE_DEPRECATED_API
/** Double property Numeric_Value.
Corresponds to u_getNumericValue. @stable ICU 2.4 */
UCHAR_NUMERIC_VALUE=0x3000,
/** First constant for double Unicode properties. @stable ICU 2.4 */
UCHAR_DOUBLE_START=UCHAR_NUMERIC_VALUE,
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the last constant for double Unicode properties.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
UCHAR_DOUBLE_LIMIT=0x3001,
#endif // U_HIDE_DEPRECATED_API
/** String property Age.
Corresponds to u_charAge. @stable ICU 2.4 */
UCHAR_AGE=0x4000,
/** First constant for string Unicode properties. @stable ICU 2.4 */
UCHAR_STRING_START=UCHAR_AGE,
/** String property Bidi_Mirroring_Glyph.
Corresponds to u_charMirror. @stable ICU 2.4 */
UCHAR_BIDI_MIRRORING_GLYPH=0x4001,
/** String property Case_Folding.
Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */
UCHAR_CASE_FOLDING=0x4002,
#ifndef U_HIDE_DEPRECATED_API
/** Deprecated string property ISO_Comment.
Corresponds to u_getISOComment. @deprecated ICU 49 */
UCHAR_ISO_COMMENT=0x4003,
#endif /* U_HIDE_DEPRECATED_API */
/** String property Lowercase_Mapping.
Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */
UCHAR_LOWERCASE_MAPPING=0x4004,
/** String property Name.
Corresponds to u_charName. @stable ICU 2.4 */
UCHAR_NAME=0x4005,
/** String property Simple_Case_Folding.
Corresponds to u_foldCase. @stable ICU 2.4 */
UCHAR_SIMPLE_CASE_FOLDING=0x4006,
/** String property Simple_Lowercase_Mapping.
Corresponds to u_tolower. @stable ICU 2.4 */
UCHAR_SIMPLE_LOWERCASE_MAPPING=0x4007,
/** String property Simple_Titlecase_Mapping.
Corresponds to u_totitle. @stable ICU 2.4 */
UCHAR_SIMPLE_TITLECASE_MAPPING=0x4008,
/** String property Simple_Uppercase_Mapping.
Corresponds to u_toupper. @stable ICU 2.4 */
UCHAR_SIMPLE_UPPERCASE_MAPPING=0x4009,
/** String property Titlecase_Mapping.
Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */
UCHAR_TITLECASE_MAPPING=0x400A,
#ifndef U_HIDE_DEPRECATED_API
/** String property Unicode_1_Name.
This property is of little practical value.
Beginning with ICU 49, ICU APIs return an empty string for this property.
Corresponds to u_charName(U_UNICODE_10_CHAR_NAME). @deprecated ICU 49 */
UCHAR_UNICODE_1_NAME=0x400B,
#endif /* U_HIDE_DEPRECATED_API */
/** String property Uppercase_Mapping.
Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */
UCHAR_UPPERCASE_MAPPING=0x400C,
/** String property Bidi_Paired_Bracket (new in Unicode 6.3).
Corresponds to u_getBidiPairedBracket. @stable ICU 52 */
UCHAR_BIDI_PAIRED_BRACKET=0x400D,
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the last constant for string Unicode properties.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
UCHAR_STRING_LIMIT=0x400E,
#endif // U_HIDE_DEPRECATED_API
/** Miscellaneous property Script_Extensions (new in Unicode 6.0).
Some characters are commonly used in multiple scripts.
For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h.
@stable ICU 4.6 */
UCHAR_SCRIPT_EXTENSIONS=0x7000,
/** First constant for Unicode properties with unusual value types. @stable ICU 4.6 */
UCHAR_OTHER_PROPERTY_START=UCHAR_SCRIPT_EXTENSIONS,
#ifndef U_HIDE_DRAFT_API
/**
* Miscellaneous property Identifier_Type.
* Used for UTS #39 General Security Profile for Identifiers
* (https://www.unicode.org/reports/tr39/#General_Security_Profile).
*
* Corresponds to u_hasIDType() and u_getIDTypes().
*
* Each code point maps to a <i>set</i> of UIdentifierType values.
*
* @see u_hasIDType
* @see u_getIDTypes
* @draft ICU 75
*/
UCHAR_IDENTIFIER_TYPE=0x7001,
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the last constant for Unicode properties with unusual value types.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
UCHAR_OTHER_PROPERTY_LIMIT=0x7002,
#endif // U_HIDE_DEPRECATED_API
/** Represents a nonexistent or invalid property or property value. @stable ICU 2.4 */
UCHAR_INVALID_CODE = -1
} UProperty;
/**
* Data for enumerated Unicode general category types.
* See http://www.unicode.org/Public/UNIDATA/UnicodeData.html .
* @stable ICU 2.0
*/
typedef enum UCharCategory
{
/*
* Note: UCharCategory constants and their API comments are parsed by preparseucd.py.
* It matches pairs of lines like
* / ** <Unicode 2-letter General_Category value> comment... * /
* U_<[A-Z_]+> = <integer>,
*/
/** Non-category for unassigned and non-character code points. @stable ICU 2.0 */
U_UNASSIGNED = 0,
/** Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) @stable ICU 2.0 */
U_GENERAL_OTHER_TYPES = 0,
/** Lu @stable ICU 2.0 */
U_UPPERCASE_LETTER = 1,
/** Ll @stable ICU 2.0 */
U_LOWERCASE_LETTER = 2,
/** Lt @stable ICU 2.0 */
U_TITLECASE_LETTER = 3,
/** Lm @stable ICU 2.0 */
U_MODIFIER_LETTER = 4,
/** Lo @stable ICU 2.0 */
U_OTHER_LETTER = 5,
/** Mn @stable ICU 2.0 */
U_NON_SPACING_MARK = 6,
/** Me @stable ICU 2.0 */
U_ENCLOSING_MARK = 7,
/** Mc @stable ICU 2.0 */
U_COMBINING_SPACING_MARK = 8,
/** Nd @stable ICU 2.0 */
U_DECIMAL_DIGIT_NUMBER = 9,
/** Nl @stable ICU 2.0 */
U_LETTER_NUMBER = 10,
/** No @stable ICU 2.0 */
U_OTHER_NUMBER = 11,
/** Zs @stable ICU 2.0 */
U_SPACE_SEPARATOR = 12,
/** Zl @stable ICU 2.0 */
U_LINE_SEPARATOR = 13,
/** Zp @stable ICU 2.0 */
U_PARAGRAPH_SEPARATOR = 14,
/** Cc @stable ICU 2.0 */
U_CONTROL_CHAR = 15,
/** Cf @stable ICU 2.0 */
U_FORMAT_CHAR = 16,
/** Co @stable ICU 2.0 */
U_PRIVATE_USE_CHAR = 17,
/** Cs @stable ICU 2.0 */
U_SURROGATE = 18,
/** Pd @stable ICU 2.0 */
U_DASH_PUNCTUATION = 19,
/** Ps @stable ICU 2.0 */
U_START_PUNCTUATION = 20,
/** Pe @stable ICU 2.0 */
U_END_PUNCTUATION = 21,
/** Pc @stable ICU 2.0 */
U_CONNECTOR_PUNCTUATION = 22,
/** Po @stable ICU 2.0 */
U_OTHER_PUNCTUATION = 23,
/** Sm @stable ICU 2.0 */
U_MATH_SYMBOL = 24,
/** Sc @stable ICU 2.0 */
U_CURRENCY_SYMBOL = 25,
/** Sk @stable ICU 2.0 */
U_MODIFIER_SYMBOL = 26,
/** So @stable ICU 2.0 */
U_OTHER_SYMBOL = 27,
/** Pi @stable ICU 2.0 */
U_INITIAL_PUNCTUATION = 28,
/** Pf @stable ICU 2.0 */
U_FINAL_PUNCTUATION = 29,
/**
* One higher than the last enum UCharCategory constant.
* This numeric value is stable (will not change), see
* http://www.unicode.org/policies/stability_policy.html#Property_Value
*
* @stable ICU 2.0
*/
U_CHAR_CATEGORY_COUNT
} UCharCategory;
/**
* U_GC_XX_MASK constants are bit flags corresponding to Unicode
* general category values.
* For each category, the nth bit is set if the numeric value of the
* corresponding UCharCategory constant is n.
*
* There are also some U_GC_Y_MASK constants for groups of general categories
* like L for all letter categories.
*
* @see u_charType
* @see U_GET_GC_MASK
* @see UCharCategory
* @stable ICU 2.1
*/
#define U_GC_CN_MASK U_MASK(U_GENERAL_OTHER_TYPES)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_LU_MASK U_MASK(U_UPPERCASE_LETTER)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_LL_MASK U_MASK(U_LOWERCASE_LETTER)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_LT_MASK U_MASK(U_TITLECASE_LETTER)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_LM_MASK U_MASK(U_MODIFIER_LETTER)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_LO_MASK U_MASK(U_OTHER_LETTER)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_MN_MASK U_MASK(U_NON_SPACING_MARK)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_ME_MASK U_MASK(U_ENCLOSING_MARK)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_MC_MASK U_MASK(U_COMBINING_SPACING_MARK)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_ND_MASK U_MASK(U_DECIMAL_DIGIT_NUMBER)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_NL_MASK U_MASK(U_LETTER_NUMBER)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_NO_MASK U_MASK(U_OTHER_NUMBER)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_ZS_MASK U_MASK(U_SPACE_SEPARATOR)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_ZL_MASK U_MASK(U_LINE_SEPARATOR)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_ZP_MASK U_MASK(U_PARAGRAPH_SEPARATOR)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_CC_MASK U_MASK(U_CONTROL_CHAR)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_CF_MASK U_MASK(U_FORMAT_CHAR)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_CO_MASK U_MASK(U_PRIVATE_USE_CHAR)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_CS_MASK U_MASK(U_SURROGATE)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_PD_MASK U_MASK(U_DASH_PUNCTUATION)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_PS_MASK U_MASK(U_START_PUNCTUATION)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_PE_MASK U_MASK(U_END_PUNCTUATION)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_PC_MASK U_MASK(U_CONNECTOR_PUNCTUATION)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_PO_MASK U_MASK(U_OTHER_PUNCTUATION)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_SM_MASK U_MASK(U_MATH_SYMBOL)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_SC_MASK U_MASK(U_CURRENCY_SYMBOL)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_SK_MASK U_MASK(U_MODIFIER_SYMBOL)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_SO_MASK U_MASK(U_OTHER_SYMBOL)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_PI_MASK U_MASK(U_INITIAL_PUNCTUATION)
/** Mask constant for a UCharCategory. @stable ICU 2.1 */
#define U_GC_PF_MASK U_MASK(U_FINAL_PUNCTUATION)
/** Mask constant for multiple UCharCategory bits (L Letters). @stable ICU 2.1 */
#define U_GC_L_MASK \
(U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK)
/** Mask constant for multiple UCharCategory bits (LC Cased Letters). @stable ICU 2.1 */
#define U_GC_LC_MASK \
(U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK)
/** Mask constant for multiple UCharCategory bits (M Marks). @stable ICU 2.1 */
#define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK)
/** Mask constant for multiple UCharCategory bits (N Numbers). @stable ICU 2.1 */
#define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK)
/** Mask constant for multiple UCharCategory bits (Z Separators). @stable ICU 2.1 */
#define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK)
/** Mask constant for multiple UCharCategory bits (C Others). @stable ICU 2.1 */
#define U_GC_C_MASK \
(U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK)
/** Mask constant for multiple UCharCategory bits (P Punctuation). @stable ICU 2.1 */
#define U_GC_P_MASK \
(U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \
U_GC_PI_MASK|U_GC_PF_MASK)
/** Mask constant for multiple UCharCategory bits (S Symbols). @stable ICU 2.1 */
#define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK)
/**
* This specifies the language directional property of a character set.
* @stable ICU 2.0
*/
typedef enum UCharDirection {
/*
* Note: UCharDirection constants and their API comments are parsed by preparseucd.py.
* It matches pairs of lines like
* / ** <Unicode 1..3-letter Bidi_Class value> comment... * /
* U_<[A-Z_]+> = <integer>,
*/
/** L @stable ICU 2.0 */
U_LEFT_TO_RIGHT = 0,
/** R @stable ICU 2.0 */
U_RIGHT_TO_LEFT = 1,
/** EN @stable ICU 2.0 */
U_EUROPEAN_NUMBER = 2,
/** ES @stable ICU 2.0 */
U_EUROPEAN_NUMBER_SEPARATOR = 3,
/** ET @stable ICU 2.0 */
U_EUROPEAN_NUMBER_TERMINATOR = 4,
/** AN @stable ICU 2.0 */
U_ARABIC_NUMBER = 5,
/** CS @stable ICU 2.0 */
U_COMMON_NUMBER_SEPARATOR = 6,
/** B @stable ICU 2.0 */
U_BLOCK_SEPARATOR = 7,
/** S @stable ICU 2.0 */
U_SEGMENT_SEPARATOR = 8,
/** WS @stable ICU 2.0 */
U_WHITE_SPACE_NEUTRAL = 9,
/** ON @stable ICU 2.0 */
U_OTHER_NEUTRAL = 10,
/** LRE @stable ICU 2.0 */
U_LEFT_TO_RIGHT_EMBEDDING = 11,
/** LRO @stable ICU 2.0 */
U_LEFT_TO_RIGHT_OVERRIDE = 12,
/** AL @stable ICU 2.0 */
U_RIGHT_TO_LEFT_ARABIC = 13,
/** RLE @stable ICU 2.0 */
U_RIGHT_TO_LEFT_EMBEDDING = 14,
/** RLO @stable ICU 2.0 */
U_RIGHT_TO_LEFT_OVERRIDE = 15,
/** PDF @stable ICU 2.0 */
U_POP_DIRECTIONAL_FORMAT = 16,
/** NSM @stable ICU 2.0 */
U_DIR_NON_SPACING_MARK = 17,
/** BN @stable ICU 2.0 */
U_BOUNDARY_NEUTRAL = 18,
/** FSI @stable ICU 52 */
U_FIRST_STRONG_ISOLATE = 19,
/** LRI @stable ICU 52 */
U_LEFT_TO_RIGHT_ISOLATE = 20,
/** RLI @stable ICU 52 */
U_RIGHT_TO_LEFT_ISOLATE = 21,
/** PDI @stable ICU 52 */
U_POP_DIRECTIONAL_ISOLATE = 22,
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest UCharDirection value.
* The highest value is available via u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS).
*
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
U_CHAR_DIRECTION_COUNT
#endif // U_HIDE_DEPRECATED_API
} UCharDirection;
/**
* Bidi Paired Bracket Type constants.
*
* @see UCHAR_BIDI_PAIRED_BRACKET_TYPE
* @stable ICU 52
*/
typedef enum UBidiPairedBracketType {
/*
* Note: UBidiPairedBracketType constants are parsed by preparseucd.py.
* It matches lines like
* U_BPT_<Unicode Bidi_Paired_Bracket_Type value name>
*/
/** Not a paired bracket. @stable ICU 52 */
U_BPT_NONE,
/** Open paired bracket. @stable ICU 52 */
U_BPT_OPEN,
/** Close paired bracket. @stable ICU 52 */
U_BPT_CLOSE,
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal UBidiPairedBracketType value.
* The highest value is available via u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE).
*
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
U_BPT_COUNT
/* 3 */
#endif // U_HIDE_DEPRECATED_API
} UBidiPairedBracketType;
/**
* Constants for Unicode blocks, see the Unicode Data file Blocks.txt
* @stable ICU 2.0
*/
enum UBlockCode {
/*
* Note: UBlockCode constants are parsed by preparseucd.py.
* It matches lines like
* UBLOCK_<Unicode Block value name> = <integer>,
*/
/** New No_Block value in Unicode 4. @stable ICU 2.6 */
UBLOCK_NO_BLOCK = 0,
/*[none]*/ /* Special range indicating No_Block */
/** @stable ICU 2.0 */
UBLOCK_BASIC_LATIN = 1,
/*[0000]*/
/** @stable ICU 2.0 */
UBLOCK_LATIN_1_SUPPLEMENT=2,
/*[0080]*/
/** @stable ICU 2.0 */
UBLOCK_LATIN_EXTENDED_A =3,
/*[0100]*/
/** @stable ICU 2.0 */
UBLOCK_LATIN_EXTENDED_B =4,
/*[0180]*/
/** @stable ICU 2.0 */
UBLOCK_IPA_EXTENSIONS =5,
/*[0250]*/
/** @stable ICU 2.0 */
UBLOCK_SPACING_MODIFIER_LETTERS =6,
/*[02B0]*/
/** @stable ICU 2.0 */
UBLOCK_COMBINING_DIACRITICAL_MARKS =7,
/*[0300]*/
/**
* Unicode 3.2 renames this block to "Greek and Coptic".
* @stable ICU 2.0
*/
UBLOCK_GREEK =8,
/*[0370]*/
/** @stable ICU 2.0 */
UBLOCK_CYRILLIC =9,
/*[0400]*/
/** @stable ICU 2.0 */
UBLOCK_ARMENIAN =10,
/*[0530]*/
/** @stable ICU 2.0 */
UBLOCK_HEBREW =11,
/*[0590]*/
/** @stable ICU 2.0 */
UBLOCK_ARABIC =12,
/*[0600]*/
/** @stable ICU 2.0 */
UBLOCK_SYRIAC =13,
/*[0700]*/
/** @stable ICU 2.0 */
UBLOCK_THAANA =14,
/*[0780]*/
/** @stable ICU 2.0 */
UBLOCK_DEVANAGARI =15,
/*[0900]*/
/** @stable ICU 2.0 */
UBLOCK_BENGALI =16,
/*[0980]*/
/** @stable ICU 2.0 */
UBLOCK_GURMUKHI =17,
/*[0A00]*/
/** @stable ICU 2.0 */
UBLOCK_GUJARATI =18,
/*[0A80]*/
/** @stable ICU 2.0 */
UBLOCK_ORIYA =19,
/*[0B00]*/
/** @stable ICU 2.0 */
UBLOCK_TAMIL =20,
/*[0B80]*/
/** @stable ICU 2.0 */
UBLOCK_TELUGU =21,
/*[0C00]*/
/** @stable ICU 2.0 */
UBLOCK_KANNADA =22,
/*[0C80]*/
/** @stable ICU 2.0 */
UBLOCK_MALAYALAM =23,
/*[0D00]*/
/** @stable ICU 2.0 */
UBLOCK_SINHALA =24,
/*[0D80]*/
/** @stable ICU 2.0 */
UBLOCK_THAI =25,
/*[0E00]*/
/** @stable ICU 2.0 */
UBLOCK_LAO =26,
/*[0E80]*/
/** @stable ICU 2.0 */
UBLOCK_TIBETAN =27,
/*[0F00]*/
/** @stable ICU 2.0 */
UBLOCK_MYANMAR =28,
/*[1000]*/
/** @stable ICU 2.0 */
UBLOCK_GEORGIAN =29,
/*[10A0]*/
/** @stable ICU 2.0 */
UBLOCK_HANGUL_JAMO =30,
/*[1100]*/
/** @stable ICU 2.0 */
UBLOCK_ETHIOPIC =31,
/*[1200]*/
/** @stable ICU 2.0 */
UBLOCK_CHEROKEE =32,
/*[13A0]*/
/** @stable ICU 2.0 */
UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33,
/*[1400]*/
/** @stable ICU 2.0 */
UBLOCK_OGHAM =34,
/*[1680]*/
/** @stable ICU 2.0 */
UBLOCK_RUNIC =35,
/*[16A0]*/
/** @stable ICU 2.0 */
UBLOCK_KHMER =36,
/*[1780]*/
/** @stable ICU 2.0 */
UBLOCK_MONGOLIAN =37,
/*[1800]*/
/** @stable ICU 2.0 */
UBLOCK_LATIN_EXTENDED_ADDITIONAL =38,
/*[1E00]*/
/** @stable ICU 2.0 */
UBLOCK_GREEK_EXTENDED =39,
/*[1F00]*/
/** @stable ICU 2.0 */
UBLOCK_GENERAL_PUNCTUATION =40,
/*[2000]*/
/** @stable ICU 2.0 */
UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41,
/*[2070]*/
/** @stable ICU 2.0 */
UBLOCK_CURRENCY_SYMBOLS =42,
/*[20A0]*/
/**
* Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols".
* @stable ICU 2.0
*/
UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43,
/*[20D0]*/
/** @stable ICU 2.0 */
UBLOCK_LETTERLIKE_SYMBOLS =44,
/*[2100]*/
/** @stable ICU 2.0 */
UBLOCK_NUMBER_FORMS =45,
/*[2150]*/
/** @stable ICU 2.0 */
UBLOCK_ARROWS =46,
/*[2190]*/
/** @stable ICU 2.0 */
UBLOCK_MATHEMATICAL_OPERATORS =47,
/*[2200]*/
/** @stable ICU 2.0 */
UBLOCK_MISCELLANEOUS_TECHNICAL =48,
/*[2300]*/
/** @stable ICU 2.0 */
UBLOCK_CONTROL_PICTURES =49,
/*[2400]*/
/** @stable ICU 2.0 */
UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50,
/*[2440]*/
/** @stable ICU 2.0 */
UBLOCK_ENCLOSED_ALPHANUMERICS =51,
/*[2460]*/
/** @stable ICU 2.0 */
UBLOCK_BOX_DRAWING =52,
/*[2500]*/
/** @stable ICU 2.0 */
UBLOCK_BLOCK_ELEMENTS =53,
/*[2580]*/
/** @stable ICU 2.0 */
UBLOCK_GEOMETRIC_SHAPES =54,
/*[25A0]*/
/** @stable ICU 2.0 */
UBLOCK_MISCELLANEOUS_SYMBOLS =55,
/*[2600]*/
/** @stable ICU 2.0 */
UBLOCK_DINGBATS =56,
/*[2700]*/
/** @stable ICU 2.0 */
UBLOCK_BRAILLE_PATTERNS =57,
/*[2800]*/
/** @stable ICU 2.0 */
UBLOCK_CJK_RADICALS_SUPPLEMENT =58,
/*[2E80]*/
/** @stable ICU 2.0 */
UBLOCK_KANGXI_RADICALS =59,
/*[2F00]*/
/** @stable ICU 2.0 */
UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60,
/*[2FF0]*/
/** @stable ICU 2.0 */
UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61,
/*[3000]*/
/** @stable ICU 2.0 */
UBLOCK_HIRAGANA =62,
/*[3040]*/
/** @stable ICU 2.0 */
UBLOCK_KATAKANA =63,
/*[30A0]*/
/** @stable ICU 2.0 */
UBLOCK_BOPOMOFO =64,
/*[3100]*/
/** @stable ICU 2.0 */
UBLOCK_HANGUL_COMPATIBILITY_JAMO =65,
/*[3130]*/
/** @stable ICU 2.0 */
UBLOCK_KANBUN =66,
/*[3190]*/
/** @stable ICU 2.0 */
UBLOCK_BOPOMOFO_EXTENDED =67,
/*[31A0]*/
/** @stable ICU 2.0 */
UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68,
/*[3200]*/
/** @stable ICU 2.0 */
UBLOCK_CJK_COMPATIBILITY =69,
/*[3300]*/
/** @stable ICU 2.0 */
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70,
/*[3400]*/
/** @stable ICU 2.0 */
UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71,
/*[4E00]*/
/** @stable ICU 2.0 */
UBLOCK_YI_SYLLABLES =72,
/*[A000]*/
/** @stable ICU 2.0 */
UBLOCK_YI_RADICALS =73,
/*[A490]*/
/** @stable ICU 2.0 */
UBLOCK_HANGUL_SYLLABLES =74,
/*[AC00]*/
/** @stable ICU 2.0 */
UBLOCK_HIGH_SURROGATES =75,
/*[D800]*/
/** @stable ICU 2.0 */
UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76,
/*[DB80]*/
/** @stable ICU 2.0 */
UBLOCK_LOW_SURROGATES =77,
/*[DC00]*/
/**
* Same as UBLOCK_PRIVATE_USE.
* Until Unicode 3.1.1, the corresponding block name was "Private Use",
* and multiple code point ranges had this block.
* Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and
* adds separate blocks for the supplementary PUAs.
*
* @stable ICU 2.0
*/
UBLOCK_PRIVATE_USE_AREA =78,
/*[E000]*/
/**
* Same as UBLOCK_PRIVATE_USE_AREA.
* Until Unicode 3.1.1, the corresponding block name was "Private Use",
* and multiple code point ranges had this block.
* Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and
* adds separate blocks for the supplementary PUAs.
*
* @stable ICU 2.0
*/
UBLOCK_PRIVATE_USE = UBLOCK_PRIVATE_USE_AREA,
/** @stable ICU 2.0 */
UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79,
/*[F900]*/
/** @stable ICU 2.0 */
UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80,
/*[FB00]*/
/** @stable ICU 2.0 */
UBLOCK_ARABIC_PRESENTATION_FORMS_A =81,
/*[FB50]*/
/** @stable ICU 2.0 */
UBLOCK_COMBINING_HALF_MARKS =82,
/*[FE20]*/
/** @stable ICU 2.0 */
UBLOCK_CJK_COMPATIBILITY_FORMS =83,
/*[FE30]*/
/** @stable ICU 2.0 */
UBLOCK_SMALL_FORM_VARIANTS =84,
/*[FE50]*/
/** @stable ICU 2.0 */
UBLOCK_ARABIC_PRESENTATION_FORMS_B =85,
/*[FE70]*/
/** @stable ICU 2.0 */
UBLOCK_SPECIALS =86,
/*[FFF0]*/
/** @stable ICU 2.0 */
UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87,
/*[FF00]*/
/* New blocks in Unicode 3.1 */
/** @stable ICU 2.0 */
UBLOCK_OLD_ITALIC = 88,
/*[10300]*/
/** @stable ICU 2.0 */
UBLOCK_GOTHIC = 89,
/*[10330]*/
/** @stable ICU 2.0 */
UBLOCK_DESERET = 90,
/*[10400]*/
/** @stable ICU 2.0 */
UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91,
/*[1D000]*/
/** @stable ICU 2.0 */
UBLOCK_MUSICAL_SYMBOLS = 92,
/*[1D100]*/
/** @stable ICU 2.0 */
UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93,
/*[1D400]*/
/** @stable ICU 2.0 */
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94,
/*[20000]*/
/** @stable ICU 2.0 */
UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95,
/*[2F800]*/
/** @stable ICU 2.0 */
UBLOCK_TAGS = 96,
/*[E0000]*/
/* New blocks in Unicode 3.2 */
/** @stable ICU 3.0 */
UBLOCK_CYRILLIC_SUPPLEMENT = 97,
/*[0500]*/
/**
* Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
* @stable ICU 2.2
*/
UBLOCK_CYRILLIC_SUPPLEMENTARY = UBLOCK_CYRILLIC_SUPPLEMENT,
/** @stable ICU 2.2 */
UBLOCK_TAGALOG = 98,
/*[1700]*/
/** @stable ICU 2.2 */
UBLOCK_HANUNOO = 99,
/*[1720]*/
/** @stable ICU 2.2 */
UBLOCK_BUHID = 100,
/*[1740]*/
/** @stable ICU 2.2 */
UBLOCK_TAGBANWA = 101,
/*[1760]*/
/** @stable ICU 2.2 */
UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102,
/*[27C0]*/
/** @stable ICU 2.2 */
UBLOCK_SUPPLEMENTAL_ARROWS_A = 103,
/*[27F0]*/
/** @stable ICU 2.2 */
UBLOCK_SUPPLEMENTAL_ARROWS_B = 104,
/*[2900]*/
/** @stable ICU 2.2 */
UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105,
/*[2980]*/
/** @stable ICU 2.2 */
UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106,
/*[2A00]*/
/** @stable ICU 2.2 */
UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107,
/*[31F0]*/
/** @stable ICU 2.2 */
UBLOCK_VARIATION_SELECTORS = 108,
/*[FE00]*/
/** @stable ICU 2.2 */
UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109,
/*[F0000]*/
/** @stable ICU 2.2 */
UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110,
/*[100000]*/
/* New blocks in Unicode 4 */
/** @stable ICU 2.6 */
UBLOCK_LIMBU = 111,
/*[1900]*/
/** @stable ICU 2.6 */
UBLOCK_TAI_LE = 112,
/*[1950]*/
/** @stable ICU 2.6 */
UBLOCK_KHMER_SYMBOLS = 113,
/*[19E0]*/
/** @stable ICU 2.6 */
UBLOCK_PHONETIC_EXTENSIONS = 114,
/*[1D00]*/
/** @stable ICU 2.6 */
UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 115,
/*[2B00]*/
/** @stable ICU 2.6 */
UBLOCK_YIJING_HEXAGRAM_SYMBOLS = 116,
/*[4DC0]*/
/** @stable ICU 2.6 */
UBLOCK_LINEAR_B_SYLLABARY = 117,
/*[10000]*/
/** @stable ICU 2.6 */
UBLOCK_LINEAR_B_IDEOGRAMS = 118,
/*[10080]*/
/** @stable ICU 2.6 */
UBLOCK_AEGEAN_NUMBERS = 119,
/*[10100]*/
/** @stable ICU 2.6 */
UBLOCK_UGARITIC = 120,
/*[10380]*/
/** @stable ICU 2.6 */
UBLOCK_SHAVIAN = 121,
/*[10450]*/
/** @stable ICU 2.6 */
UBLOCK_OSMANYA = 122,
/*[10480]*/
/** @stable ICU 2.6 */
UBLOCK_CYPRIOT_SYLLABARY = 123,
/*[10800]*/
/** @stable ICU 2.6 */
UBLOCK_TAI_XUAN_JING_SYMBOLS = 124,
/*[1D300]*/
/** @stable ICU 2.6 */
UBLOCK_VARIATION_SELECTORS_SUPPLEMENT = 125,
/*[E0100]*/
/* New blocks in Unicode 4.1 */
/** @stable ICU 3.4 */
UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION = 126,
/*[1D200]*/
/** @stable ICU 3.4 */
UBLOCK_ANCIENT_GREEK_NUMBERS = 127,
/*[10140]*/
/** @stable ICU 3.4 */
UBLOCK_ARABIC_SUPPLEMENT = 128,
/*[0750]*/
/** @stable ICU 3.4 */
UBLOCK_BUGINESE = 129,
/*[1A00]*/
/** @stable ICU 3.4 */
UBLOCK_CJK_STROKES = 130,
/*[31C0]*/
/** @stable ICU 3.4 */
UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 131,
/*[1DC0]*/
/** @stable ICU 3.4 */
UBLOCK_COPTIC = 132,
/*[2C80]*/
/** @stable ICU 3.4 */
UBLOCK_ETHIOPIC_EXTENDED = 133,
/*[2D80]*/
/** @stable ICU 3.4 */
UBLOCK_ETHIOPIC_SUPPLEMENT = 134,
/*[1380]*/
/** @stable ICU 3.4 */
UBLOCK_GEORGIAN_SUPPLEMENT = 135,
/*[2D00]*/
/** @stable ICU 3.4 */
UBLOCK_GLAGOLITIC = 136,
/*[2C00]*/
/** @stable ICU 3.4 */
UBLOCK_KHAROSHTHI = 137,
/*[10A00]*/
/** @stable ICU 3.4 */
UBLOCK_MODIFIER_TONE_LETTERS = 138,
/*[A700]*/
/** @stable ICU 3.4 */
UBLOCK_NEW_TAI_LUE = 139,
/*[1980]*/
/** @stable ICU 3.4 */
UBLOCK_OLD_PERSIAN = 140,
/*[103A0]*/
/** @stable ICU 3.4 */
UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT = 141,
/*[1D80]*/
/** @stable ICU 3.4 */
UBLOCK_SUPPLEMENTAL_PUNCTUATION = 142,
/*[2E00]*/
/** @stable ICU 3.4 */
UBLOCK_SYLOTI_NAGRI = 143,
/*[A800]*/
/** @stable ICU 3.4 */
UBLOCK_TIFINAGH = 144,
/*[2D30]*/
/** @stable ICU 3.4 */
UBLOCK_VERTICAL_FORMS = 145,
/*[FE10]*/
/* New blocks in Unicode 5.0 */
/** @stable ICU 3.6 */
UBLOCK_NKO = 146,
/*[07C0]*/
/** @stable ICU 3.6 */
UBLOCK_BALINESE = 147,
/*[1B00]*/
/** @stable ICU 3.6 */
UBLOCK_LATIN_EXTENDED_C = 148,
/*[2C60]*/
/** @stable ICU 3.6 */
UBLOCK_LATIN_EXTENDED_D = 149,
/*[A720]*/
/** @stable ICU 3.6 */
UBLOCK_PHAGS_PA = 150,
/*[A840]*/
/** @stable ICU 3.6 */
UBLOCK_PHOENICIAN = 151,
/*[10900]*/
/** @stable ICU 3.6 */
UBLOCK_CUNEIFORM = 152,
/*[12000]*/
/** @stable ICU 3.6 */
UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION = 153,
/*[12400]*/
/** @stable ICU 3.6 */
UBLOCK_COUNTING_ROD_NUMERALS = 154,
/*[1D360]*/
/* New blocks in Unicode 5.1 */
/** @stable ICU 4.0 */
UBLOCK_SUNDANESE = 155,
/*[1B80]*/
/** @stable ICU 4.0 */
UBLOCK_LEPCHA = 156,
/*[1C00]*/
/** @stable ICU 4.0 */
UBLOCK_OL_CHIKI = 157,
/*[1C50]*/
/** @stable ICU 4.0 */
UBLOCK_CYRILLIC_EXTENDED_A = 158,
/*[2DE0]*/
/** @stable ICU 4.0 */
UBLOCK_VAI = 159,
/*[A500]*/
/** @stable ICU 4.0 */
UBLOCK_CYRILLIC_EXTENDED_B = 160,
/*[A640]*/
/** @stable ICU 4.0 */
UBLOCK_SAURASHTRA = 161,
/*[A880]*/
/** @stable ICU 4.0 */
UBLOCK_KAYAH_LI = 162,
/*[A900]*/
/** @stable ICU 4.0 */
UBLOCK_REJANG = 163,
/*[A930]*/
/** @stable ICU 4.0 */
UBLOCK_CHAM = 164,
/*[AA00]*/
/** @stable ICU 4.0 */
UBLOCK_ANCIENT_SYMBOLS = 165,
/*[10190]*/
/** @stable ICU 4.0 */
UBLOCK_PHAISTOS_DISC = 166,
/*[101D0]*/
/** @stable ICU 4.0 */
UBLOCK_LYCIAN = 167,
/*[10280]*/
/** @stable ICU 4.0 */
UBLOCK_CARIAN = 168,
/*[102A0]*/
/** @stable ICU 4.0 */
UBLOCK_LYDIAN = 169,
/*[10920]*/
/** @stable ICU 4.0 */
UBLOCK_MAHJONG_TILES = 170,
/*[1F000]*/
/** @stable ICU 4.0 */
UBLOCK_DOMINO_TILES = 171,
/*[1F030]*/
/* New blocks in Unicode 5.2 */
/** @stable ICU 4.4 */
UBLOCK_SAMARITAN = 172,
/*[0800]*/
/** @stable ICU 4.4 */
UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 173,
/*[18B0]*/
/** @stable ICU 4.4 */
UBLOCK_TAI_THAM = 174,
/*[1A20]*/
/** @stable ICU 4.4 */
UBLOCK_VEDIC_EXTENSIONS = 175,
/*[1CD0]*/
/** @stable ICU 4.4 */
UBLOCK_LISU = 176,
/*[A4D0]*/
/** @stable ICU 4.4 */
UBLOCK_BAMUM = 177,
/*[A6A0]*/
/** @stable ICU 4.4 */
UBLOCK_COMMON_INDIC_NUMBER_FORMS = 178,
/*[A830]*/
/** @stable ICU 4.4 */
UBLOCK_DEVANAGARI_EXTENDED = 179,
/*[A8E0]*/
/** @stable ICU 4.4 */
UBLOCK_HANGUL_JAMO_EXTENDED_A = 180,
/*[A960]*/
/** @stable ICU 4.4 */
UBLOCK_JAVANESE = 181,
/*[A980]*/
/** @stable ICU 4.4 */
UBLOCK_MYANMAR_EXTENDED_A = 182,
/*[AA60]*/
/** @stable ICU 4.4 */
UBLOCK_TAI_VIET = 183,
/*[AA80]*/
/** @stable ICU 4.4 */
UBLOCK_MEETEI_MAYEK = 184,
/*[ABC0]*/
/** @stable ICU 4.4 */
UBLOCK_HANGUL_JAMO_EXTENDED_B = 185,
/*[D7B0]*/
/** @stable ICU 4.4 */
UBLOCK_IMPERIAL_ARAMAIC = 186,
/*[10840]*/
/** @stable ICU 4.4 */
UBLOCK_OLD_SOUTH_ARABIAN = 187,
/*[10A60]*/
/** @stable ICU 4.4 */
UBLOCK_AVESTAN = 188,
/*[10B00]*/
/** @stable ICU 4.4 */
UBLOCK_INSCRIPTIONAL_PARTHIAN = 189,
/*[10B40]*/
/** @stable ICU 4.4 */
UBLOCK_INSCRIPTIONAL_PAHLAVI = 190,
/*[10B60]*/
/** @stable ICU 4.4 */
UBLOCK_OLD_TURKIC = 191,
/*[10C00]*/
/** @stable ICU 4.4 */
UBLOCK_RUMI_NUMERAL_SYMBOLS = 192,
/*[10E60]*/
/** @stable ICU 4.4 */
UBLOCK_KAITHI = 193,
/*[11080]*/
/** @stable ICU 4.4 */
UBLOCK_EGYPTIAN_HIEROGLYPHS = 194,
/*[13000]*/
/** @stable ICU 4.4 */
UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 195,
/*[1F100]*/
/** @stable ICU 4.4 */
UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 196,
/*[1F200]*/
/** @stable ICU 4.4 */
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 197,
/*[2A700]*/
/* New blocks in Unicode 6.0 */
/** @stable ICU 4.6 */
UBLOCK_MANDAIC = 198,
/*[0840]*/
/** @stable ICU 4.6 */
UBLOCK_BATAK = 199,
/*[1BC0]*/
/** @stable ICU 4.6 */
UBLOCK_ETHIOPIC_EXTENDED_A = 200,
/*[AB00]*/
/** @stable ICU 4.6 */
UBLOCK_BRAHMI = 201,
/*[11000]*/
/** @stable ICU 4.6 */
UBLOCK_BAMUM_SUPPLEMENT = 202,
/*[16800]*/
/** @stable ICU 4.6 */
UBLOCK_KANA_SUPPLEMENT = 203,
/*[1B000]*/
/** @stable ICU 4.6 */
UBLOCK_PLAYING_CARDS = 204,
/*[1F0A0]*/
/** @stable ICU 4.6 */
UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 205,
/*[1F300]*/
/** @stable ICU 4.6 */
UBLOCK_EMOTICONS = 206,
/*[1F600]*/
/** @stable ICU 4.6 */
UBLOCK_TRANSPORT_AND_MAP_SYMBOLS = 207,
/*[1F680]*/
/** @stable ICU 4.6 */
UBLOCK_ALCHEMICAL_SYMBOLS = 208,
/*[1F700]*/
/** @stable ICU 4.6 */
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 209,
/*[2B740]*/
/* New blocks in Unicode 6.1 */
/** @stable ICU 49 */
UBLOCK_ARABIC_EXTENDED_A = 210,
/*[08A0]*/
/** @stable ICU 49 */
UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 211,
/*[1EE00]*/
/** @stable ICU 49 */
UBLOCK_CHAKMA = 212,
/*[11100]*/
/** @stable ICU 49 */
UBLOCK_MEETEI_MAYEK_EXTENSIONS = 213,
/*[AAE0]*/
/** @stable ICU 49 */
UBLOCK_MEROITIC_CURSIVE = 214,
/*[109A0]*/
/** @stable ICU 49 */
UBLOCK_MEROITIC_HIEROGLYPHS = 215,
/*[10980]*/
/** @stable ICU 49 */
UBLOCK_MIAO = 216,
/*[16F00]*/
/** @stable ICU 49 */
UBLOCK_SHARADA = 217,
/*[11180]*/
/** @stable ICU 49 */
UBLOCK_SORA_SOMPENG = 218,
/*[110D0]*/
/** @stable ICU 49 */
UBLOCK_SUNDANESE_SUPPLEMENT = 219,
/*[1CC0]*/
/** @stable ICU 49 */
UBLOCK_TAKRI = 220,
/*[11680]*/
/* New blocks in Unicode 7.0 */
/** @stable ICU 54 */
UBLOCK_BASSA_VAH = 221,
/*[16AD0]*/
/** @stable ICU 54 */
UBLOCK_CAUCASIAN_ALBANIAN = 222,
/*[10530]*/
/** @stable ICU 54 */
UBLOCK_COPTIC_EPACT_NUMBERS = 223,
/*[102E0]*/
/** @stable ICU 54 */
UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED = 224,
/*[1AB0]*/
/** @stable ICU 54 */
UBLOCK_DUPLOYAN = 225,
/*[1BC00]*/
/** @stable ICU 54 */
UBLOCK_ELBASAN = 226,
/*[10500]*/
/** @stable ICU 54 */
UBLOCK_GEOMETRIC_SHAPES_EXTENDED = 227,
/*[1F780]*/
/** @stable ICU 54 */
UBLOCK_GRANTHA = 228,
/*[11300]*/
/** @stable ICU 54 */
UBLOCK_KHOJKI = 229,
/*[11200]*/
/** @stable ICU 54 */
UBLOCK_KHUDAWADI = 230,
/*[112B0]*/
/** @stable ICU 54 */
UBLOCK_LATIN_EXTENDED_E = 231,
/*[AB30]*/
/** @stable ICU 54 */
UBLOCK_LINEAR_A = 232,
/*[10600]*/
/** @stable ICU 54 */
UBLOCK_MAHAJANI = 233,
/*[11150]*/
/** @stable ICU 54 */
UBLOCK_MANICHAEAN = 234,
/*[10AC0]*/
/** @stable ICU 54 */
UBLOCK_MENDE_KIKAKUI = 235,
/*[1E800]*/
/** @stable ICU 54 */
UBLOCK_MODI = 236,
/*[11600]*/
/** @stable ICU 54 */
UBLOCK_MRO = 237,
/*[16A40]*/
--> --------------------
--> maximum size reached
--> --------------------