ICU4C

#include <parseerr.h>
#include <ptypes.h>
#include <putil.h>
#include <stringoptions.h>
#include <ubrk.h>
#include <uchar.h>
#include <ucol.h>
#include <ucpmap.h>
#include <udisplaycontext.h>
#include <uenum.h>
#include <uldnames.h>
#include <uloc.h>
#include <ulocdata.h>
#include <umachine.h>
#include <unorm2.h>
#include <urep.h>
#include <uscript.h>
#include <ustring.h>
#include <utext.h>
#include <utf.h>
#include <utf16.h>
#include <utf8.h>
#include <utrans.h>
#include <utypes.h>
#include <uversion.h>

Summary

Enumerations

Anonymous Enum 124{
  U_PARSE_CONTEXT_LEN = 16
}
enum
The capacity of the context strings in UParseError.
UAcceptResult{
  ULOC_ACCEPT_FAILED = 0,
  ULOC_ACCEPT_VALID = 1,
  ULOC_ACCEPT_FALLBACK = 2
}
enum
Output values which uloc_acceptLanguage() writes to the 'outResult' parameter.
UBidiPairedBracketType{
  U_BPT_NONE,
  U_BPT_OPEN,
  U_BPT_CLOSE,
  U_BPT_COUNT
}
enum
Bidi Paired Bracket Type constants.
UBlockCode{
  UBLOCK_NO_BLOCK = 0,
  UBLOCK_BASIC_LATIN = 1,
  UBLOCK_LATIN_1_SUPPLEMENT =2,
  UBLOCK_LATIN_EXTENDED_A =3,
  UBLOCK_LATIN_EXTENDED_B =4,
  UBLOCK_IPA_EXTENSIONS =5,
  UBLOCK_SPACING_MODIFIER_LETTERS =6,
  UBLOCK_COMBINING_DIACRITICAL_MARKS =7,
  UBLOCK_GREEK =8,
  UBLOCK_CYRILLIC =9,
  UBLOCK_ARMENIAN =10,
  UBLOCK_HEBREW =11,
  UBLOCK_ARABIC =12,
  UBLOCK_SYRIAC =13,
  UBLOCK_THAANA =14,
  UBLOCK_DEVANAGARI =15,
  UBLOCK_BENGALI =16,
  UBLOCK_GURMUKHI =17,
  UBLOCK_GUJARATI =18,
  UBLOCK_ORIYA =19,
  UBLOCK_TAMIL =20,
  UBLOCK_TELUGU =21,
  UBLOCK_KANNADA =22,
  UBLOCK_MALAYALAM =23,
  UBLOCK_SINHALA =24,
  UBLOCK_THAI =25,
  UBLOCK_LAO =26,
  UBLOCK_TIBETAN =27,
  UBLOCK_MYANMAR =28,
  UBLOCK_GEORGIAN =29,
  UBLOCK_HANGUL_JAMO =30,
  UBLOCK_ETHIOPIC =31,
  UBLOCK_CHEROKEE =32,
  UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33,
  UBLOCK_OGHAM =34,
  UBLOCK_RUNIC =35,
  UBLOCK_KHMER =36,
  UBLOCK_MONGOLIAN =37,
  UBLOCK_LATIN_EXTENDED_ADDITIONAL =38,
  UBLOCK_GREEK_EXTENDED =39,
  UBLOCK_GENERAL_PUNCTUATION =40,
  UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41,
  UBLOCK_CURRENCY_SYMBOLS =42,
  UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43,
  UBLOCK_LETTERLIKE_SYMBOLS =44,
  UBLOCK_NUMBER_FORMS =45,
  UBLOCK_ARROWS =46,
  UBLOCK_MATHEMATICAL_OPERATORS =47,
  UBLOCK_MISCELLANEOUS_TECHNICAL =48,
  UBLOCK_CONTROL_PICTURES =49,
  UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50,
  UBLOCK_ENCLOSED_ALPHANUMERICS =51,
  UBLOCK_BOX_DRAWING =52,
  UBLOCK_BLOCK_ELEMENTS =53,
  UBLOCK_GEOMETRIC_SHAPES =54,
  UBLOCK_MISCELLANEOUS_SYMBOLS =55,
  UBLOCK_DINGBATS =56,
  UBLOCK_BRAILLE_PATTERNS =57,
  UBLOCK_CJK_RADICALS_SUPPLEMENT =58,
  UBLOCK_KANGXI_RADICALS =59,
  UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60,
  UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61,
  UBLOCK_HIRAGANA =62,
  UBLOCK_KATAKANA =63,
  UBLOCK_BOPOMOFO =64,
  UBLOCK_HANGUL_COMPATIBILITY_JAMO =65,
  UBLOCK_KANBUN =66,
  UBLOCK_BOPOMOFO_EXTENDED =67,
  UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68,
  UBLOCK_CJK_COMPATIBILITY =69,
  UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70,
  UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71,
  UBLOCK_YI_SYLLABLES =72,
  UBLOCK_YI_RADICALS =73,
  UBLOCK_HANGUL_SYLLABLES =74,
  UBLOCK_HIGH_SURROGATES =75,
  UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76,
  UBLOCK_LOW_SURROGATES =77,
  UBLOCK_PRIVATE_USE_AREA =78,
  UBLOCK_PRIVATE_USE = UBLOCK_PRIVATE_USE_AREA,
  UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79,
  UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80,
  UBLOCK_ARABIC_PRESENTATION_FORMS_A =81,
  UBLOCK_COMBINING_HALF_MARKS =82,
  UBLOCK_CJK_COMPATIBILITY_FORMS =83,
  UBLOCK_SMALL_FORM_VARIANTS =84,
  UBLOCK_ARABIC_PRESENTATION_FORMS_B =85,
  UBLOCK_SPECIALS =86,
  UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87,
  UBLOCK_OLD_ITALIC = 88,
  UBLOCK_GOTHIC = 89,
  UBLOCK_DESERET = 90,
  UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91,
  UBLOCK_MUSICAL_SYMBOLS = 92,
  UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93,
  UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94,
  UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95,
  UBLOCK_TAGS = 96,
  UBLOCK_CYRILLIC_SUPPLEMENT = 97,
  UBLOCK_CYRILLIC_SUPPLEMENTARY = UBLOCK_CYRILLIC_SUPPLEMENT,
  UBLOCK_TAGALOG = 98,
  UBLOCK_HANUNOO = 99,
  UBLOCK_BUHID = 100,
  UBLOCK_TAGBANWA = 101,
  UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102,
  UBLOCK_SUPPLEMENTAL_ARROWS_A = 103,
  UBLOCK_SUPPLEMENTAL_ARROWS_B = 104,
  UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105,
  UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106,
  UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107,
  UBLOCK_VARIATION_SELECTORS = 108,
  UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109,
  UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110,
  UBLOCK_LIMBU = 111,
  UBLOCK_TAI_LE = 112,
  UBLOCK_KHMER_SYMBOLS = 113,
  UBLOCK_PHONETIC_EXTENSIONS = 114,
  UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 115,
  UBLOCK_YIJING_HEXAGRAM_SYMBOLS = 116,
  UBLOCK_LINEAR_B_SYLLABARY = 117,
  UBLOCK_LINEAR_B_IDEOGRAMS = 118,
  UBLOCK_AEGEAN_NUMBERS = 119,
  UBLOCK_UGARITIC = 120,
  UBLOCK_SHAVIAN = 121,
  UBLOCK_OSMANYA = 122,
  UBLOCK_CYPRIOT_SYLLABARY = 123,
  UBLOCK_TAI_XUAN_JING_SYMBOLS = 124,
  UBLOCK_VARIATION_SELECTORS_SUPPLEMENT = 125,
  UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION = 126,
  UBLOCK_ANCIENT_GREEK_NUMBERS = 127,
  UBLOCK_ARABIC_SUPPLEMENT = 128,
  UBLOCK_BUGINESE = 129,
  UBLOCK_CJK_STROKES = 130,
  UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 131,
  UBLOCK_COPTIC = 132,
  UBLOCK_ETHIOPIC_EXTENDED = 133,
  UBLOCK_ETHIOPIC_SUPPLEMENT = 134,
  UBLOCK_GEORGIAN_SUPPLEMENT = 135,
  UBLOCK_GLAGOLITIC = 136,
  UBLOCK_KHAROSHTHI = 137,
  UBLOCK_MODIFIER_TONE_LETTERS = 138,
  UBLOCK_NEW_TAI_LUE = 139,
  UBLOCK_OLD_PERSIAN = 140,
  UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT = 141,
  UBLOCK_SUPPLEMENTAL_PUNCTUATION = 142,
  UBLOCK_SYLOTI_NAGRI = 143,
  UBLOCK_TIFINAGH = 144,
  UBLOCK_VERTICAL_FORMS = 145,
  UBLOCK_NKO = 146,
  UBLOCK_BALINESE = 147,
  UBLOCK_LATIN_EXTENDED_C = 148,
  UBLOCK_LATIN_EXTENDED_D = 149,
  UBLOCK_PHAGS_PA = 150,
  UBLOCK_PHOENICIAN = 151,
  UBLOCK_CUNEIFORM = 152,
  UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION = 153,
  UBLOCK_COUNTING_ROD_NUMERALS = 154,
  UBLOCK_SUNDANESE = 155,
  UBLOCK_LEPCHA = 156,
  UBLOCK_OL_CHIKI = 157,
  UBLOCK_CYRILLIC_EXTENDED_A = 158,
  UBLOCK_VAI = 159,
  UBLOCK_CYRILLIC_EXTENDED_B = 160,
  UBLOCK_SAURASHTRA = 161,
  UBLOCK_KAYAH_LI = 162,
  UBLOCK_REJANG = 163,
  UBLOCK_CHAM = 164,
  UBLOCK_ANCIENT_SYMBOLS = 165,
  UBLOCK_PHAISTOS_DISC = 166,
  UBLOCK_LYCIAN = 167,
  UBLOCK_CARIAN = 168,
  UBLOCK_LYDIAN = 169,
  UBLOCK_MAHJONG_TILES = 170,
  UBLOCK_DOMINO_TILES = 171,
  UBLOCK_SAMARITAN = 172,
  UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 173,
  UBLOCK_TAI_THAM = 174,
  UBLOCK_VEDIC_EXTENSIONS = 175,
  UBLOCK_LISU = 176,
  UBLOCK_BAMUM = 177,
  UBLOCK_COMMON_INDIC_NUMBER_FORMS = 178,
  UBLOCK_DEVANAGARI_EXTENDED = 179,
  UBLOCK_HANGUL_JAMO_EXTENDED_A = 180,
  UBLOCK_JAVANESE = 181,
  UBLOCK_MYANMAR_EXTENDED_A = 182,
  UBLOCK_TAI_VIET = 183,
  UBLOCK_MEETEI_MAYEK = 184,
  UBLOCK_HANGUL_JAMO_EXTENDED_B = 185,
  UBLOCK_IMPERIAL_ARAMAIC = 186,
  UBLOCK_OLD_SOUTH_ARABIAN = 187,
  UBLOCK_AVESTAN = 188,
  UBLOCK_INSCRIPTIONAL_PARTHIAN = 189,
  UBLOCK_INSCRIPTIONAL_PAHLAVI = 190,
  UBLOCK_OLD_TURKIC = 191,
  UBLOCK_RUMI_NUMERAL_SYMBOLS = 192,
  UBLOCK_KAITHI = 193,
  UBLOCK_EGYPTIAN_HIEROGLYPHS = 194,
  UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 195,
  UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 196,
  UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 197,
  UBLOCK_MANDAIC = 198,
  UBLOCK_BATAK = 199,
  UBLOCK_ETHIOPIC_EXTENDED_A = 200,
  UBLOCK_BRAHMI = 201,
  UBLOCK_BAMUM_SUPPLEMENT = 202,
  UBLOCK_KANA_SUPPLEMENT = 203,
  UBLOCK_PLAYING_CARDS = 204,
  UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 205,
  UBLOCK_EMOTICONS = 206,
  UBLOCK_TRANSPORT_AND_MAP_SYMBOLS = 207,
  UBLOCK_ALCHEMICAL_SYMBOLS = 208,
  UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 209,
  UBLOCK_ARABIC_EXTENDED_A = 210,
  UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 211,
  UBLOCK_CHAKMA = 212,
  UBLOCK_MEETEI_MAYEK_EXTENSIONS = 213,
  UBLOCK_MEROITIC_CURSIVE = 214,
  UBLOCK_MEROITIC_HIEROGLYPHS = 215,
  UBLOCK_MIAO = 216,
  UBLOCK_SHARADA = 217,
  UBLOCK_SORA_SOMPENG = 218,
  UBLOCK_SUNDANESE_SUPPLEMENT = 219,
  UBLOCK_TAKRI = 220,
  UBLOCK_BASSA_VAH = 221,
  UBLOCK_CAUCASIAN_ALBANIAN = 222,
  UBLOCK_COPTIC_EPACT_NUMBERS = 223,
  UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED = 224,
  UBLOCK_DUPLOYAN = 225,
  UBLOCK_ELBASAN = 226,
  UBLOCK_GEOMETRIC_SHAPES_EXTENDED = 227,
  UBLOCK_GRANTHA = 228,
  UBLOCK_KHOJKI = 229,
  UBLOCK_KHUDAWADI = 230,
  UBLOCK_LATIN_EXTENDED_E = 231,
  UBLOCK_LINEAR_A = 232,
  UBLOCK_MAHAJANI = 233,
  UBLOCK_MANICHAEAN = 234,
  UBLOCK_MENDE_KIKAKUI = 235,
  UBLOCK_MODI = 236,
  UBLOCK_MRO = 237,
  UBLOCK_MYANMAR_EXTENDED_B = 238,
  UBLOCK_NABATAEAN = 239,
  UBLOCK_OLD_NORTH_ARABIAN = 240,
  UBLOCK_OLD_PERMIC = 241,
  UBLOCK_ORNAMENTAL_DINGBATS = 242,
  UBLOCK_PAHAWH_HMONG = 243,
  UBLOCK_PALMYRENE = 244,
  UBLOCK_PAU_CIN_HAU = 245,
  UBLOCK_PSALTER_PAHLAVI = 246,
  UBLOCK_SHORTHAND_FORMAT_CONTROLS = 247,
  UBLOCK_SIDDHAM = 248,
  UBLOCK_SINHALA_ARCHAIC_NUMBERS = 249,
  UBLOCK_SUPPLEMENTAL_ARROWS_C = 250,
  UBLOCK_TIRHUTA = 251,
  UBLOCK_WARANG_CITI = 252,
  UBLOCK_AHOM = 253,
  UBLOCK_ANATOLIAN_HIEROGLYPHS = 254,
  UBLOCK_CHEROKEE_SUPPLEMENT = 255,
  UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 256,
  UBLOCK_EARLY_DYNASTIC_CUNEIFORM = 257,
  UBLOCK_HATRAN = 258,
  UBLOCK_MULTANI = 259,
  UBLOCK_OLD_HUNGARIAN = 260,
  UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 261,
  UBLOCK_SUTTON_SIGNWRITING = 262,
  UBLOCK_ADLAM = 263,
  UBLOCK_BHAIKSUKI = 264,
  UBLOCK_CYRILLIC_EXTENDED_C = 265,
  UBLOCK_GLAGOLITIC_SUPPLEMENT = 266,
  UBLOCK_IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 267,
  UBLOCK_MARCHEN = 268,
  UBLOCK_MONGOLIAN_SUPPLEMENT = 269,
  UBLOCK_NEWA = 270,
  UBLOCK_OSAGE = 271,
  UBLOCK_TANGUT = 272,
  UBLOCK_TANGUT_COMPONENTS = 273,
  UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 274,
  UBLOCK_KANA_EXTENDED_A = 275,
  UBLOCK_MASARAM_GONDI = 276,
  UBLOCK_NUSHU = 277,
  UBLOCK_SOYOMBO = 278,
  UBLOCK_SYRIAC_SUPPLEMENT = 279,
  UBLOCK_ZANABAZAR_SQUARE = 280,
  UBLOCK_CHESS_SYMBOLS = 281,
  UBLOCK_DOGRA = 282,
  UBLOCK_GEORGIAN_EXTENDED = 283,
  UBLOCK_GUNJALA_GONDI = 284,
  UBLOCK_HANIFI_ROHINGYA = 285,
  UBLOCK_INDIC_SIYAQ_NUMBERS = 286,
  UBLOCK_MAKASAR = 287,
  UBLOCK_MAYAN_NUMERALS = 288,
  UBLOCK_MEDEFAIDRIN = 289,
  UBLOCK_OLD_SOGDIAN = 290,
  UBLOCK_SOGDIAN = 291,
  UBLOCK_EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS = 292,
  UBLOCK_ELYMAIC = 293,
  UBLOCK_NANDINAGARI = 294,
  UBLOCK_NYIAKENG_PUACHUE_HMONG = 295,
  UBLOCK_OTTOMAN_SIYAQ_NUMBERS = 296,
  UBLOCK_SMALL_KANA_EXTENSION = 297,
  UBLOCK_SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A = 298,
  UBLOCK_TAMIL_SUPPLEMENT = 299,
  UBLOCK_WANCHO = 300,
  UBLOCK_CHORASMIAN = 301,
  UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G = 302,
  UBLOCK_DIVES_AKURU = 303,
  UBLOCK_KHITAN_SMALL_SCRIPT = 304,
  UBLOCK_LISU_SUPPLEMENT = 305,
  UBLOCK_SYMBOLS_FOR_LEGACY_COMPUTING = 306,
  UBLOCK_TANGUT_SUPPLEMENT = 307,
  UBLOCK_YEZIDI = 308,
  UBLOCK_ARABIC_EXTENDED_B = 309,
  UBLOCK_CYPRO_MINOAN = 310,
  UBLOCK_ETHIOPIC_EXTENDED_B = 311,
  UBLOCK_KANA_EXTENDED_B = 312,
  UBLOCK_LATIN_EXTENDED_F = 313,
  UBLOCK_LATIN_EXTENDED_G = 314,
  UBLOCK_OLD_UYGHUR = 315,
  UBLOCK_TANGSA = 316,
  UBLOCK_TOTO = 317,
  UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A = 318,
  UBLOCK_VITHKUQI = 319,
  UBLOCK_ZNAMENNY_MUSICAL_NOTATION = 320,
  UBLOCK_ARABIC_EXTENDED_C = 321,
  UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H = 322,
  UBLOCK_CYRILLIC_EXTENDED_D = 323,
  UBLOCK_DEVANAGARI_EXTENDED_A = 324,
  UBLOCK_KAKTOVIK_NUMERALS = 325,
  UBLOCK_KAWI = 326,
  UBLOCK_NAG_MUNDARI = 327,
  UBLOCK_COUNT = 328,
  UBLOCK_INVALID_CODE =-1
}
enum
Constants for Unicode blocks, see the Unicode Data file Blocks.txt.
UBreakIteratorType{
  UBRK_CHARACTER = 0,
  UBRK_WORD = 1,
  UBRK_LINE = 2,
  UBRK_SENTENCE = 3,
  UBRK_TITLE = 4,
  UBRK_COUNT = 5
}
enum
The possible types of text boundaries.
UCPMapRangeOption{
  UCPMAP_RANGE_NORMAL,
  UCPMAP_RANGE_FIXED_LEAD_SURROGATES,
  UCPMAP_RANGE_FIXED_ALL_SURROGATES
}
enum
Selectors for how ucpmap_getRange() etc.
UCharCategory{
  U_UNASSIGNED = 0,
  U_GENERAL_OTHER_TYPES = 0,
  U_UPPERCASE_LETTER = 1,
  U_LOWERCASE_LETTER = 2,
  U_TITLECASE_LETTER = 3,
  U_MODIFIER_LETTER = 4,
  U_OTHER_LETTER = 5,
  U_NON_SPACING_MARK = 6,
  U_ENCLOSING_MARK = 7,
  U_COMBINING_SPACING_MARK = 8,
  U_DECIMAL_DIGIT_NUMBER = 9,
  U_LETTER_NUMBER = 10,
  U_OTHER_NUMBER = 11,
  U_SPACE_SEPARATOR = 12,
  U_LINE_SEPARATOR = 13,
  U_PARAGRAPH_SEPARATOR = 14,
  U_CONTROL_CHAR = 15,
  U_FORMAT_CHAR = 16,
  U_PRIVATE_USE_CHAR = 17,
  U_SURROGATE = 18,
  U_DASH_PUNCTUATION = 19,
  U_START_PUNCTUATION = 20,
  U_END_PUNCTUATION = 21,
  U_CONNECTOR_PUNCTUATION = 22,
  U_OTHER_PUNCTUATION = 23,
  U_MATH_SYMBOL = 24,
  U_CURRENCY_SYMBOL = 25,
  U_MODIFIER_SYMBOL = 26,
  U_OTHER_SYMBOL = 27,
  U_INITIAL_PUNCTUATION = 28,
  U_FINAL_PUNCTUATION = 29,
  U_CHAR_CATEGORY_COUNT
}
enum
Data for enumerated Unicode general category types.
UCharDirection{
  U_LEFT_TO_RIGHT = 0,
  U_RIGHT_TO_LEFT = 1,
  U_EUROPEAN_NUMBER = 2,
  U_EUROPEAN_NUMBER_SEPARATOR = 3,
  U_EUROPEAN_NUMBER_TERMINATOR = 4,
  U_ARABIC_NUMBER = 5,
  U_COMMON_NUMBER_SEPARATOR = 6,
  U_BLOCK_SEPARATOR = 7,
  U_SEGMENT_SEPARATOR = 8,
  U_WHITE_SPACE_NEUTRAL = 9,
  U_OTHER_NEUTRAL = 10,
  U_LEFT_TO_RIGHT_EMBEDDING = 11,
  U_LEFT_TO_RIGHT_OVERRIDE = 12,
  U_RIGHT_TO_LEFT_ARABIC = 13,
  U_RIGHT_TO_LEFT_EMBEDDING = 14,
  U_RIGHT_TO_LEFT_OVERRIDE = 15,
  U_POP_DIRECTIONAL_FORMAT = 16,
  U_DIR_NON_SPACING_MARK = 17,
  U_BOUNDARY_NEUTRAL = 18,
  U_FIRST_STRONG_ISOLATE = 19,
  U_LEFT_TO_RIGHT_ISOLATE = 20,
  U_RIGHT_TO_LEFT_ISOLATE = 21,
  U_POP_DIRECTIONAL_ISOLATE = 22,
  U_CHAR_DIRECTION_COUNT
}
enum
This specifies the language directional property of a character set.
UCharNameChoice{
  U_UNICODE_CHAR_NAME,
  U_UNICODE_10_CHAR_NAME,
  U_EXTENDED_CHAR_NAME = U_UNICODE_CHAR_NAME+2,
  U_CHAR_NAME_ALIAS,
  U_CHAR_NAME_CHOICE_COUNT
}
enum
Selector constants for u_charName().
UColAttribute{
  UCOL_FRENCH_COLLATION,
  UCOL_ALTERNATE_HANDLING,
  UCOL_CASE_FIRST,
  UCOL_CASE_LEVEL,
  UCOL_NORMALIZATION_MODE,
  UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE,
  UCOL_STRENGTH,
  UCOL_HIRAGANA_QUATERNARY_MODE = UCOL_STRENGTH + 1,
  UCOL_NUMERIC_COLLATION = UCOL_STRENGTH + 2,
  UCOL_ATTRIBUTE_COUNT
}
enum
Attributes that collation service understands.
UColAttributeValue{
  UCOL_DEFAULT = -1,
  UCOL_PRIMARY = 0,
  UCOL_SECONDARY = 1,
  UCOL_TERTIARY = 2,
  UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY,
  UCOL_CE_STRENGTH_LIMIT,
  UCOL_QUATERNARY =3,
  UCOL_IDENTICAL =15,
  UCOL_STRENGTH_LIMIT,
  UCOL_OFF = 16,
  UCOL_ON = 17,
  UCOL_SHIFTED = 20,
  UCOL_NON_IGNORABLE = 21,
  UCOL_LOWER_FIRST = 24,
  UCOL_UPPER_FIRST = 25,
  UCOL_ATTRIBUTE_VALUE_COUNT
}
enum
Enum containing attribute values for controlling collation behavior.
UColBoundMode{
  UCOL_BOUND_LOWER = 0,
  UCOL_BOUND_UPPER = 1,
  UCOL_BOUND_UPPER_LONG = 2,
  UCOL_BOUND_VALUE_COUNT
}
enum
enum that is taken by ucol_getBound API See below for explanation do not change the values assigned to the members of this enum.
UColReorderCode{
  UCOL_REORDER_CODE_DEFAULT = -1,
  UCOL_REORDER_CODE_NONE = USCRIPT_UNKNOWN,
  UCOL_REORDER_CODE_OTHERS = USCRIPT_UNKNOWN,
  UCOL_REORDER_CODE_SPACE = 0x1000,
  UCOL_REORDER_CODE_FIRST = UCOL_REORDER_CODE_SPACE,
  UCOL_REORDER_CODE_PUNCTUATION = 0x1001,
  UCOL_REORDER_CODE_SYMBOL = 0x1002,
  UCOL_REORDER_CODE_CURRENCY = 0x1003,
  UCOL_REORDER_CODE_DIGIT = 0x1004,
  UCOL_REORDER_CODE_LIMIT = 0x1005
}
enum
Enum containing the codes for reordering segments of the collation table that are not script codes.
UCollationResult{
  UCOL_EQUAL = 0,
  UCOL_GREATER = 1,
  UCOL_LESS = -1
}
enum
UCOL_LESS is returned if source string is compared to be less than target string in the ucol_strcoll() method.
UDecompositionType{
  U_DT_NONE,
  U_DT_CANONICAL,
  U_DT_COMPAT,
  U_DT_CIRCLE,
  U_DT_FINAL,
  U_DT_FONT,
  U_DT_FRACTION,
  U_DT_INITIAL,
  U_DT_ISOLATED,
  U_DT_MEDIAL,
  U_DT_NARROW,
  U_DT_NOBREAK,
  U_DT_SMALL,
  U_DT_SQUARE,
  U_DT_SUB,
  U_DT_SUPER,
  U_DT_VERTICAL,
  U_DT_WIDE,
  U_DT_COUNT
}
enum
Decomposition Type constants.
UDialectHandling{
  ULDN_STANDARD_NAMES = 0,
  ULDN_DIALECT_NAMES
}
enum
Enum used in LocaleDisplayNames::createInstance.
UDisplayContext{
  UDISPCTX_STANDARD_NAMES = (UDISPCTX_TYPE_DIALECT_HANDLING<<8) + 0,
  UDISPCTX_DIALECT_NAMES = (UDISPCTX_TYPE_DIALECT_HANDLING<<8) + 1,
  UDISPCTX_CAPITALIZATION_NONE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 0,
  UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 1,
  UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 2,
  UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 3,
  UDISPCTX_CAPITALIZATION_FOR_STANDALONE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 4,
  UDISPCTX_LENGTH_FULL = (UDISPCTX_TYPE_DISPLAY_LENGTH<<8) + 0,
  UDISPCTX_LENGTH_SHORT = (UDISPCTX_TYPE_DISPLAY_LENGTH<<8) + 1,
  UDISPCTX_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 0,
  UDISPCTX_NO_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 1
}
enum
Display context settings.
UDisplayContextType{
  UDISPCTX_TYPE_DIALECT_HANDLING = 0,
  UDISPCTX_TYPE_CAPITALIZATION = 1,
  UDISPCTX_TYPE_DISPLAY_LENGTH = 2,
  UDISPCTX_TYPE_SUBSTITUTE_HANDLING = 3
}
enum
Display context types, for getting values of a particular setting.
UEastAsianWidth{
  U_EA_NEUTRAL,
  U_EA_AMBIGUOUS,
  U_EA_HALFWIDTH,
  U_EA_FULLWIDTH,
  U_EA_NARROW,
  U_EA_WIDE,
  U_EA_COUNT
}
enum
East Asian Width constants.
UErrorCode{
  U_USING_FALLBACK_WARNING = -128,
  U_ERROR_WARNING_START = -128,
  U_USING_DEFAULT_WARNING = -127,
  U_SAFECLONE_ALLOCATED_WARNING = -126,
  U_STATE_OLD_WARNING = -125,
  U_STRING_NOT_TERMINATED_WARNING = -124,
  U_SORT_KEY_TOO_SHORT_WARNING = -123,
  U_AMBIGUOUS_ALIAS_WARNING = -122,
  U_DIFFERENT_UCA_VERSION = -121,
  U_PLUGIN_CHANGED_LEVEL_WARNING = -120,
  U_ERROR_WARNING_LIMIT,
  U_ZERO_ERROR = 0,
  U_ILLEGAL_ARGUMENT_ERROR = 1,
  U_MISSING_RESOURCE_ERROR = 2,
  U_INVALID_FORMAT_ERROR = 3,
  U_FILE_ACCESS_ERROR = 4,
  U_INTERNAL_PROGRAM_ERROR = 5,
  U_MESSAGE_PARSE_ERROR = 6,
  U_MEMORY_ALLOCATION_ERROR = 7,
  U_INDEX_OUTOFBOUNDS_ERROR = 8,
  U_PARSE_ERROR = 9,
  U_INVALID_CHAR_FOUND = 10,
  U_TRUNCATED_CHAR_FOUND = 11,
  U_ILLEGAL_CHAR_FOUND = 12,
  U_INVALID_TABLE_FORMAT = 13,
  U_INVALID_TABLE_FILE = 14,
  U_BUFFER_OVERFLOW_ERROR = 15,
  U_UNSUPPORTED_ERROR = 16,
  U_RESOURCE_TYPE_MISMATCH = 17,
  U_ILLEGAL_ESCAPE_SEQUENCE = 18,
  U_UNSUPPORTED_ESCAPE_SEQUENCE = 19,
  U_NO_SPACE_AVAILABLE = 20,
  U_CE_NOT_FOUND_ERROR = 21,
  U_PRIMARY_TOO_LONG_ERROR = 22,
  U_STATE_TOO_OLD_ERROR = 23,
  U_TOO_MANY_ALIASES_ERROR = 24,
  U_ENUM_OUT_OF_SYNC_ERROR = 25,
  U_INVARIANT_CONVERSION_ERROR = 26,
  U_INVALID_STATE_ERROR = 27,
  U_COLLATOR_VERSION_MISMATCH = 28,
  U_USELESS_COLLATOR_ERROR = 29,
  U_NO_WRITE_PERMISSION = 30,
  U_INPUT_TOO_LONG_ERROR = 31,
  U_STANDARD_ERROR_LIMIT = 32,
  U_BAD_VARIABLE_DEFINITION =0x10000,
  U_PARSE_ERROR_START = 0x10000,
  U_MALFORMED_RULE,
  U_MALFORMED_SET,
  U_MALFORMED_SYMBOL_REFERENCE,
  U_MALFORMED_UNICODE_ESCAPE,
  U_MALFORMED_VARIABLE_DEFINITION,
  U_MALFORMED_VARIABLE_REFERENCE,
  U_MISMATCHED_SEGMENT_DELIMITERS,
  U_MISPLACED_ANCHOR_START,
  U_MISPLACED_CURSOR_OFFSET,
  U_MISPLACED_QUANTIFIER,
  U_MISSING_OPERATOR,
  U_MISSING_SEGMENT_CLOSE,
  U_MULTIPLE_ANTE_CONTEXTS,
  U_MULTIPLE_CURSORS,
  U_MULTIPLE_POST_CONTEXTS,
  U_TRAILING_BACKSLASH,
  U_UNDEFINED_SEGMENT_REFERENCE,
  U_UNDEFINED_VARIABLE,
  U_UNQUOTED_SPECIAL,
  U_UNTERMINATED_QUOTE,
  U_RULE_MASK_ERROR,
  U_MISPLACED_COMPOUND_FILTER,
  U_MULTIPLE_COMPOUND_FILTERS,
  U_INVALID_RBT_SYNTAX,
  U_INVALID_PROPERTY_PATTERN,
  U_MALFORMED_PRAGMA,
  U_UNCLOSED_SEGMENT,
  U_ILLEGAL_CHAR_IN_SEGMENT,
  U_VARIABLE_RANGE_EXHAUSTED,
  U_VARIABLE_RANGE_OVERLAP,
  U_ILLEGAL_CHARACTER,
  U_INTERNAL_TRANSLITERATOR_ERROR,
  U_INVALID_ID,
  U_INVALID_FUNCTION,
  U_PARSE_ERROR_LIMIT,
  U_UNEXPECTED_TOKEN =0x10100,
  U_FMT_PARSE_ERROR_START =0x10100,
  U_MULTIPLE_DECIMAL_SEPARATORS,
  U_MULTIPLE_DECIMAL_SEPERATORS = U_MULTIPLE_DECIMAL_SEPARATORS,
  U_MULTIPLE_EXPONENTIAL_SYMBOLS,
  U_MALFORMED_EXPONENTIAL_PATTERN,
  U_MULTIPLE_PERCENT_SYMBOLS,
  U_MULTIPLE_PERMILL_SYMBOLS,
  U_MULTIPLE_PAD_SPECIFIERS,
  U_PATTERN_SYNTAX_ERROR,
  U_ILLEGAL_PAD_POSITION,
  U_UNMATCHED_BRACES,
  U_UNSUPPORTED_PROPERTY,
  U_UNSUPPORTED_ATTRIBUTE,
  U_ARGUMENT_TYPE_MISMATCH,
  U_DUPLICATE_KEYWORD,
  U_UNDEFINED_KEYWORD,
  U_DEFAULT_KEYWORD_MISSING,
  U_DECIMAL_NUMBER_SYNTAX_ERROR,
  U_FORMAT_INEXACT_ERROR,
  U_NUMBER_ARG_OUTOFBOUNDS_ERROR,
  U_NUMBER_SKELETON_SYNTAX_ERROR,
  U_FMT_PARSE_ERROR_LIMIT = 0x10114,
  U_BRK_INTERNAL_ERROR =0x10200,
  U_BRK_ERROR_START =0x10200,
  U_BRK_HEX_DIGITS_EXPECTED,
  U_BRK_SEMICOLON_EXPECTED,
  U_BRK_RULE_SYNTAX,
  U_BRK_UNCLOSED_SET,
  U_BRK_ASSIGN_ERROR,
  U_BRK_VARIABLE_REDFINITION,
  U_BRK_MISMATCHED_PAREN,
  U_BRK_NEW_LINE_IN_QUOTED_STRING,
  U_BRK_UNDEFINED_VARIABLE,
  U_BRK_INIT_ERROR,
  U_BRK_RULE_EMPTY_SET,
  U_BRK_UNRECOGNIZED_OPTION,
  U_BRK_MALFORMED_RULE_TAG,
  U_BRK_ERROR_LIMIT,
  U_REGEX_INTERNAL_ERROR =0x10300,
  U_REGEX_ERROR_START =0x10300,
  U_REGEX_RULE_SYNTAX,
  U_REGEX_INVALID_STATE,
  U_REGEX_BAD_ESCAPE_SEQUENCE,
  U_REGEX_PROPERTY_SYNTAX,
  U_REGEX_UNIMPLEMENTED,
  U_REGEX_MISMATCHED_PAREN,
  U_REGEX_NUMBER_TOO_BIG,
  U_REGEX_BAD_INTERVAL,
  U_REGEX_MAX_LT_MIN,
  U_REGEX_INVALID_BACK_REF,
  U_REGEX_INVALID_FLAG,
  U_REGEX_LOOK_BEHIND_LIMIT,
  U_REGEX_SET_CONTAINS_STRING,
  U_REGEX_OCTAL_TOO_BIG,
  U_REGEX_MISSING_CLOSE_BRACKET =U_REGEX_SET_CONTAINS_STRING+2,
  U_REGEX_INVALID_RANGE,
  U_REGEX_STACK_OVERFLOW,
  U_REGEX_TIME_OUT,
  U_REGEX_STOPPED_BY_CALLER,
  U_REGEX_PATTERN_TOO_BIG,
  U_REGEX_INVALID_CAPTURE_GROUP_NAME,
  U_REGEX_ERROR_LIMIT =U_REGEX_STOPPED_BY_CALLER+3,
  U_IDNA_PROHIBITED_ERROR =0x10400,
  U_IDNA_ERROR_START =0x10400,
  U_IDNA_UNASSIGNED_ERROR,
  U_IDNA_CHECK_BIDI_ERROR,
  U_IDNA_STD3_ASCII_RULES_ERROR,
  U_IDNA_ACE_PREFIX_ERROR,
  U_IDNA_VERIFICATION_ERROR,
  U_IDNA_LABEL_TOO_LONG_ERROR,
  U_IDNA_ZERO_LENGTH_LABEL_ERROR,
  U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR,
  U_IDNA_ERROR_LIMIT,
  U_STRINGPREP_PROHIBITED_ERROR = U_IDNA_PROHIBITED_ERROR,
  U_STRINGPREP_UNASSIGNED_ERROR = U_IDNA_UNASSIGNED_ERROR,
  U_STRINGPREP_CHECK_BIDI_ERROR = U_IDNA_CHECK_BIDI_ERROR,
  U_PLUGIN_ERROR_START =0x10500,
  U_PLUGIN_TOO_HIGH =0x10500,
  U_PLUGIN_DIDNT_SET_LEVEL,
  U_PLUGIN_ERROR_LIMIT,
  U_ERROR_LIMIT =U_PLUGIN_ERROR_LIMIT
}
enum
Standard ICU4C error code type, a substitute for exceptions.
UGraphemeClusterBreak{
  U_GCB_OTHER = 0,
  U_GCB_CONTROL = 1,
  U_GCB_CR = 2,
  U_GCB_EXTEND = 3,
  U_GCB_L = 4,
  U_GCB_LF = 5,
  U_GCB_LV = 6,
  U_GCB_LVT = 7,
  U_GCB_T = 8,
  U_GCB_V = 9,
  U_GCB_SPACING_MARK = 10,
  U_GCB_PREPEND = 11,
  U_GCB_REGIONAL_INDICATOR = 12,
  U_GCB_E_BASE = 13,
  U_GCB_E_BASE_GAZ = 14,
  U_GCB_E_MODIFIER = 15,
  U_GCB_GLUE_AFTER_ZWJ = 16,
  U_GCB_ZWJ = 17,
  U_GCB_COUNT = 18
}
enum
Grapheme Cluster Break constants.
UHangulSyllableType{
  U_HST_NOT_APPLICABLE,
  U_HST_LEADING_JAMO,
  U_HST_VOWEL_JAMO,
  U_HST_TRAILING_JAMO,
  U_HST_LV_SYLLABLE,
  U_HST_LVT_SYLLABLE,
  U_HST_COUNT
}
enum
Hangul Syllable Type constants.
UIndicPositionalCategory{
  U_INPC_NA,
  U_INPC_BOTTOM,
  U_INPC_BOTTOM_AND_LEFT,
  U_INPC_BOTTOM_AND_RIGHT,
  U_INPC_LEFT,
  U_INPC_LEFT_AND_RIGHT,
  U_INPC_OVERSTRUCK,
  U_INPC_RIGHT,
  U_INPC_TOP,
  U_INPC_TOP_AND_BOTTOM,
  U_INPC_TOP_AND_BOTTOM_AND_RIGHT,
  U_INPC_TOP_AND_LEFT,
  U_INPC_TOP_AND_LEFT_AND_RIGHT,
  U_INPC_TOP_AND_RIGHT,
  U_INPC_VISUAL_ORDER_LEFT,
  U_INPC_TOP_AND_BOTTOM_AND_LEFT
}
enum
Indic Positional Category constants.
UIndicSyllabicCategory{
  U_INSC_OTHER,
  U_INSC_AVAGRAHA,
  U_INSC_BINDU,
  U_INSC_BRAHMI_JOINING_NUMBER,
  U_INSC_CANTILLATION_MARK,
  U_INSC_CONSONANT,
  U_INSC_CONSONANT_DEAD,
  U_INSC_CONSONANT_FINAL,
  U_INSC_CONSONANT_HEAD_LETTER,
  U_INSC_CONSONANT_INITIAL_POSTFIXED,
  U_INSC_CONSONANT_KILLER,
  U_INSC_CONSONANT_MEDIAL,
  U_INSC_CONSONANT_PLACEHOLDER,
  U_INSC_CONSONANT_PRECEDING_REPHA,
  U_INSC_CONSONANT_PREFIXED,
  U_INSC_CONSONANT_SUBJOINED,
  U_INSC_CONSONANT_SUCCEEDING_REPHA,
  U_INSC_CONSONANT_WITH_STACKER,
  U_INSC_GEMINATION_MARK,
  U_INSC_INVISIBLE_STACKER,
  U_INSC_JOINER,
  U_INSC_MODIFYING_LETTER,
  U_INSC_NON_JOINER,
  U_INSC_NUKTA,
  U_INSC_NUMBER,
  U_INSC_NUMBER_JOINER,
  U_INSC_PURE_KILLER,
  U_INSC_REGISTER_SHIFTER,
  U_INSC_SYLLABLE_MODIFIER,
  U_INSC_TONE_LETTER,
  U_INSC_TONE_MARK,
  U_INSC_VIRAMA,
  U_INSC_VISARGA,
  U_INSC_VOWEL,
  U_INSC_VOWEL_DEPENDENT,
  U_INSC_VOWEL_INDEPENDENT
}
enum
Indic Syllabic Category constants.
UJoiningGroup{
  U_JG_NO_JOINING_GROUP,
  U_JG_AIN,
  U_JG_ALAPH,
  U_JG_ALEF,
  U_JG_BEH,
  U_JG_BETH,
  U_JG_DAL,
  U_JG_DALATH_RISH,
  U_JG_E,
  U_JG_FEH,
  U_JG_FINAL_SEMKATH,
  U_JG_GAF,
  U_JG_GAMAL,
  U_JG_HAH,
  U_JG_TEH_MARBUTA_GOAL,
  U_JG_HAMZA_ON_HEH_GOAL =U_JG_TEH_MARBUTA_GOAL,
  U_JG_HE,
  U_JG_HEH,
  U_JG_HEH_GOAL,
  U_JG_HETH,
  U_JG_KAF,
  U_JG_KAPH,
  U_JG_KNOTTED_HEH,
  U_JG_LAM,
  U_JG_LAMADH,
  U_JG_MEEM,
  U_JG_MIM,
  U_JG_NOON,
  U_JG_NUN,
  U_JG_PE,
  U_JG_QAF,
  U_JG_QAPH,
  U_JG_REH,
  U_JG_REVERSED_PE,
  U_JG_SAD,
  U_JG_SADHE,
  U_JG_SEEN,
  U_JG_SEMKATH,
  U_JG_SHIN,
  U_JG_SWASH_KAF,
  U_JG_SYRIAC_WAW,
  U_JG_TAH,
  U_JG_TAW,
  U_JG_TEH_MARBUTA,
  U_JG_TETH,
  U_JG_WAW,
  U_JG_YEH,
  U_JG_YEH_BARREE,
  U_JG_YEH_WITH_TAIL,
  U_JG_YUDH,
  U_JG_YUDH_HE,
  U_JG_ZAIN,
  U_JG_FE,
  U_JG_KHAPH,
  U_JG_ZHAIN,
  U_JG_BURUSHASKI_YEH_BARREE,
  U_JG_FARSI_YEH,
  U_JG_NYA,
  U_JG_ROHINGYA_YEH,
  U_JG_MANICHAEAN_ALEPH,
  U_JG_MANICHAEAN_AYIN,
  U_JG_MANICHAEAN_BETH,
  U_JG_MANICHAEAN_DALETH,
  U_JG_MANICHAEAN_DHAMEDH,
  U_JG_MANICHAEAN_FIVE,
  U_JG_MANICHAEAN_GIMEL,
  U_JG_MANICHAEAN_HETH,
  U_JG_MANICHAEAN_HUNDRED,
  U_JG_MANICHAEAN_KAPH,
  U_JG_MANICHAEAN_LAMEDH,
  U_JG_MANICHAEAN_MEM,
  U_JG_MANICHAEAN_NUN,
  U_JG_MANICHAEAN_ONE,
  U_JG_MANICHAEAN_PE,
  U_JG_MANICHAEAN_QOPH,
  U_JG_MANICHAEAN_RESH,
  U_JG_MANICHAEAN_SADHE,
  U_JG_MANICHAEAN_SAMEKH,
  U_JG_MANICHAEAN_TAW,
  U_JG_MANICHAEAN_TEN,
  U_JG_MANICHAEAN_TETH,
  U_JG_MANICHAEAN_THAMEDH,
  U_JG_MANICHAEAN_TWENTY,
  U_JG_MANICHAEAN_WAW,
  U_JG_MANICHAEAN_YODH,
  U_JG_MANICHAEAN_ZAYIN,
  U_JG_STRAIGHT_WAW,
  U_JG_AFRICAN_FEH,
  U_JG_AFRICAN_NOON,
  U_JG_AFRICAN_QAF,
  U_JG_MALAYALAM_BHA,
  U_JG_MALAYALAM_JA,
  U_JG_MALAYALAM_LLA,
  U_JG_MALAYALAM_LLLA,
  U_JG_MALAYALAM_NGA,
  U_JG_MALAYALAM_NNA,
  U_JG_MALAYALAM_NNNA,
  U_JG_MALAYALAM_NYA,
  U_JG_MALAYALAM_RA,
  U_JG_MALAYALAM_SSA,
  U_JG_MALAYALAM_TTA,
  U_JG_HANIFI_ROHINGYA_KINNA_YA,
  U_JG_HANIFI_ROHINGYA_PA,
  U_JG_THIN_YEH,
  U_JG_VERTICAL_TAIL,
  U_JG_COUNT
}
enum
Joining Group constants.
UJoiningType{
  U_JT_NON_JOINING,
  U_JT_JOIN_CAUSING,
  U_JT_DUAL_JOINING,
  U_JT_LEFT_JOINING,
  U_JT_RIGHT_JOINING,
  U_JT_TRANSPARENT,
  U_JT_COUNT
}
enum
Joining Type constants.
ULayoutType{
  ULOC_LAYOUT_LTR = 0,
  ULOC_LAYOUT_RTL = 1,
  ULOC_LAYOUT_TTB = 2,
  ULOC_LAYOUT_BTT = 3,
  ULOC_LAYOUT_UNKNOWN
}
enum
enums for the return value for the character and line orientation functions.
ULineBreak{
  U_LB_UNKNOWN = 0,
  U_LB_AMBIGUOUS = 1,
  U_LB_ALPHABETIC = 2,
  U_LB_BREAK_BOTH = 3,
  U_LB_BREAK_AFTER = 4,
  U_LB_BREAK_BEFORE = 5,
  U_LB_MANDATORY_BREAK = 6,
  U_LB_CONTINGENT_BREAK = 7,
  U_LB_CLOSE_PUNCTUATION = 8,
  U_LB_COMBINING_MARK = 9,
  U_LB_CARRIAGE_RETURN = 10,
  U_LB_EXCLAMATION = 11,
  U_LB_GLUE = 12,
  U_LB_HYPHEN = 13,
  U_LB_IDEOGRAPHIC = 14,
  U_LB_INSEPARABLE = 15,
  U_LB_INSEPERABLE = U_LB_INSEPARABLE,
  U_LB_INFIX_NUMERIC = 16,
  U_LB_LINE_FEED = 17,
  U_LB_NONSTARTER = 18,
  U_LB_NUMERIC = 19,
  U_LB_OPEN_PUNCTUATION = 20,
  U_LB_POSTFIX_NUMERIC = 21,
  U_LB_PREFIX_NUMERIC = 22,
  U_LB_QUOTATION = 23,
  U_LB_COMPLEX_CONTEXT = 24,
  U_LB_SURROGATE = 25,
  U_LB_SPACE = 26,
  U_LB_BREAK_SYMBOLS = 27,
  U_LB_ZWSPACE = 28,
  U_LB_NEXT_LINE = 29,
  U_LB_WORD_JOINER = 30,
  U_LB_H2 = 31,
  U_LB_H3 = 32,
  U_LB_JL = 33,
  U_LB_JT = 34,
  U_LB_JV = 35,
  U_LB_CLOSE_PARENTHESIS = 36,
  U_LB_CONDITIONAL_JAPANESE_STARTER = 37,
  U_LB_HEBREW_LETTER = 38,
  U_LB_REGIONAL_INDICATOR = 39,
  U_LB_E_BASE = 40,
  U_LB_E_MODIFIER = 41,
  U_LB_ZWJ = 42,
  U_LB_COUNT = 43
}
enum
Line Break constants.
ULineBreakTag{
  UBRK_LINE_SOFT = 0,
  UBRK_LINE_SOFT_LIMIT = 100,
  UBRK_LINE_HARD = 100,
  UBRK_LINE_HARD_LIMIT = 200
}
enum
Enum constants for the line break tags returned by getRuleStatus().
ULocAvailableType{
  ULOC_AVAILABLE_DEFAULT,
  ULOC_AVAILABLE_ONLY_LEGACY_ALIASES,
  ULOC_AVAILABLE_WITH_LEGACY_ALIASES,
  ULOC_AVAILABLE_COUNT
}
enum
Types for uloc_getAvailableByType and uloc_countAvailableByType.
ULocDataLocaleType{
  ULOC_ACTUAL_LOCALE = 0,
  ULOC_VALID_LOCALE = 1,
  ULOC_REQUESTED_LOCALE = 2,
  ULOC_DATA_LOCALE_TYPE_LIMIT = 3
}
enum
Constants for *_getLocale() Allow user to select whether she wants information on requested, valid or actual locale.
UNormalization2Mode{
  UNORM2_COMPOSE,
  UNORM2_DECOMPOSE,
  UNORM2_FCD,
  UNORM2_COMPOSE_CONTIGUOUS
}
enum
Constants for normalization modes.
UNormalizationCheckResult{
  UNORM_NO,
  UNORM_YES,
  UNORM_MAYBE
}
enum
Result values for normalization quick check functions.
UNumericType{
  U_NT_NONE,
  U_NT_DECIMAL,
  U_NT_DIGIT,
  U_NT_NUMERIC,
  U_NT_COUNT
}
enum
Numeric Type constants.
UProperty{
  UCHAR_ALPHABETIC =0,
  UCHAR_BINARY_START =UCHAR_ALPHABETIC,
  UCHAR_ASCII_HEX_DIGIT =1,
  UCHAR_BIDI_CONTROL =2,
  UCHAR_BIDI_MIRRORED =3,
  UCHAR_DASH =4,
  UCHAR_DEFAULT_IGNORABLE_CODE_POINT =5,
  UCHAR_DEPRECATED =6,
  UCHAR_DIACRITIC =7,
  UCHAR_EXTENDER =8,
  UCHAR_FULL_COMPOSITION_EXCLUSION =9,
  UCHAR_GRAPHEME_BASE =10,
  UCHAR_GRAPHEME_EXTEND =11,
  UCHAR_GRAPHEME_LINK =12,
  UCHAR_HEX_DIGIT =13,
  UCHAR_HYPHEN =14,
  UCHAR_ID_CONTINUE =15,
  UCHAR_ID_START =16,
  UCHAR_IDEOGRAPHIC =17,
  UCHAR_IDS_BINARY_OPERATOR =18,
  UCHAR_IDS_TRINARY_OPERATOR =19,
  UCHAR_JOIN_CONTROL =20,
  UCHAR_LOGICAL_ORDER_EXCEPTION =21,
  UCHAR_LOWERCASE =22,
  UCHAR_MATH =23,
  UCHAR_NONCHARACTER_CODE_POINT =24,
  UCHAR_QUOTATION_MARK =25,
  UCHAR_RADICAL =26,
  UCHAR_SOFT_DOTTED =27,
  UCHAR_TERMINAL_PUNCTUATION =28,
  UCHAR_UNIFIED_IDEOGRAPH =29,
  UCHAR_UPPERCASE =30,
  UCHAR_WHITE_SPACE =31,
  UCHAR_XID_CONTINUE =32,
  UCHAR_XID_START =33,
  UCHAR_CASE_SENSITIVE =34,
  UCHAR_S_TERM =35,
  UCHAR_VARIATION_SELECTOR =36,
  UCHAR_NFD_INERT =37,
  UCHAR_NFKD_INERT =38,
  UCHAR_NFC_INERT =39,
  UCHAR_NFKC_INERT =40,
  UCHAR_SEGMENT_STARTER =41,
  UCHAR_PATTERN_SYNTAX =42,
  UCHAR_PATTERN_WHITE_SPACE =43,
  UCHAR_POSIX_ALNUM =44,
  UCHAR_POSIX_BLANK =45,
  UCHAR_POSIX_GRAPH =46,
  UCHAR_POSIX_PRINT =47,
  UCHAR_POSIX_XDIGIT =48,
  UCHAR_CASED =49,
  UCHAR_CASE_IGNORABLE =50,
  UCHAR_CHANGES_WHEN_LOWERCASED =51,
  UCHAR_CHANGES_WHEN_UPPERCASED =52,
  UCHAR_CHANGES_WHEN_TITLECASED =53,
  UCHAR_CHANGES_WHEN_CASEFOLDED =54,
  UCHAR_CHANGES_WHEN_CASEMAPPED =55,
  UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED =56,
  UCHAR_EMOJI =57,
  UCHAR_EMOJI_PRESENTATION =58,
  UCHAR_EMOJI_MODIFIER =59,
  UCHAR_EMOJI_MODIFIER_BASE =60,
  UCHAR_EMOJI_COMPONENT =61,
  UCHAR_REGIONAL_INDICATOR =62,
  UCHAR_PREPENDED_CONCATENATION_MARK =63,
  UCHAR_EXTENDED_PICTOGRAPHIC =64,
  UCHAR_BASIC_EMOJI =65,
  UCHAR_EMOJI_KEYCAP_SEQUENCE =66,
  UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE =67,
  UCHAR_RGI_EMOJI_FLAG_SEQUENCE =68,
  UCHAR_RGI_EMOJI_TAG_SEQUENCE =69,
  UCHAR_RGI_EMOJI_ZWJ_SEQUENCE =70,
  UCHAR_RGI_EMOJI =71,
  UCHAR_BINARY_LIMIT =72,
  UCHAR_BIDI_CLASS =0x1000,
  UCHAR_INT_START =UCHAR_BIDI_CLASS,
  UCHAR_BLOCK =0x1001,
  UCHAR_CANONICAL_COMBINING_CLASS =0x1002,
  UCHAR_DECOMPOSITION_TYPE =0x1003,
  UCHAR_EAST_ASIAN_WIDTH =0x1004,
  UCHAR_GENERAL_CATEGORY =0x1005,
  UCHAR_JOINING_GROUP =0x1006,
  UCHAR_JOINING_TYPE =0x1007,
  UCHAR_LINE_BREAK =0x1008,
  UCHAR_NUMERIC_TYPE =0x1009,
  UCHAR_SCRIPT =0x100A,
  UCHAR_HANGUL_SYLLABLE_TYPE =0x100B,
  UCHAR_NFD_QUICK_CHECK =0x100C,
  UCHAR_NFKD_QUICK_CHECK =0x100D,
  UCHAR_NFC_QUICK_CHECK =0x100E,
  UCHAR_NFKC_QUICK_CHECK =0x100F,
  UCHAR_LEAD_CANONICAL_COMBINING_CLASS =0x1010,
  UCHAR_TRAIL_CANONICAL_COMBINING_CLASS =0x1011,
  UCHAR_GRAPHEME_CLUSTER_BREAK =0x1012,
  UCHAR_SENTENCE_BREAK =0x1013,
  UCHAR_WORD_BREAK =0x1014,
  UCHAR_BIDI_PAIRED_BRACKET_TYPE =0x1015,
  UCHAR_INDIC_POSITIONAL_CATEGORY =0x1016,
  UCHAR_INDIC_SYLLABIC_CATEGORY =0x1017,
  UCHAR_VERTICAL_ORIENTATION =0x1018,
  UCHAR_INT_LIMIT =0x1019,
  UCHAR_GENERAL_CATEGORY_MASK =0x2000,
  UCHAR_MASK_START =UCHAR_GENERAL_CATEGORY_MASK,
  UCHAR_MASK_LIMIT =0x2001,
  UCHAR_NUMERIC_VALUE =0x3000,
  UCHAR_DOUBLE_START =UCHAR_NUMERIC_VALUE,
  UCHAR_DOUBLE_LIMIT =0x3001,
  UCHAR_AGE =0x4000,
  UCHAR_STRING_START =UCHAR_AGE,
  UCHAR_BIDI_MIRRORING_GLYPH =0x4001,
  UCHAR_CASE_FOLDING =0x4002,
  UCHAR_ISO_COMMENT =0x4003,
  UCHAR_LOWERCASE_MAPPING =0x4004,
  UCHAR_NAME =0x4005,
  UCHAR_SIMPLE_CASE_FOLDING =0x4006,
  UCHAR_SIMPLE_LOWERCASE_MAPPING =0x4007,
  UCHAR_SIMPLE_TITLECASE_MAPPING =0x4008,
  UCHAR_SIMPLE_UPPERCASE_MAPPING =0x4009,
  UCHAR_TITLECASE_MAPPING =0x400A,
  UCHAR_UNICODE_1_NAME =0x400B,
  UCHAR_UPPERCASE_MAPPING =0x400C,
  UCHAR_BIDI_PAIRED_BRACKET =0x400D,
  UCHAR_STRING_LIMIT =0x400E,
  UCHAR_SCRIPT_EXTENSIONS =0x7000,
  UCHAR_OTHER_PROPERTY_START =UCHAR_SCRIPT_EXTENSIONS,
  UCHAR_OTHER_PROPERTY_LIMIT =0x7001,
  UCHAR_INVALID_CODE = -1
}
enum
Selection constants for Unicode properties.
UPropertyNameChoice{
  U_SHORT_PROPERTY_NAME,
  U_LONG_PROPERTY_NAME,
  U_PROPERTY_NAME_CHOICE_COUNT
}
enum
Selector constants for u_getPropertyName() and u_getPropertyValueName().
UScriptCode{
  USCRIPT_INVALID_CODE = -1,
  USCRIPT_COMMON = 0,
  USCRIPT_INHERITED = 1,
  USCRIPT_ARABIC = 2,
  USCRIPT_ARMENIAN = 3,
  USCRIPT_BENGALI = 4,
  USCRIPT_BOPOMOFO = 5,
  USCRIPT_CHEROKEE = 6,
  USCRIPT_COPTIC = 7,
  USCRIPT_CYRILLIC = 8,
  USCRIPT_DESERET = 9,
  USCRIPT_DEVANAGARI = 10,
  USCRIPT_ETHIOPIC = 11,
  USCRIPT_GEORGIAN = 12,
  USCRIPT_GOTHIC = 13,
  USCRIPT_GREEK = 14,
  USCRIPT_GUJARATI = 15,
  USCRIPT_GURMUKHI = 16,
  USCRIPT_HAN = 17,
  USCRIPT_HANGUL = 18,
  USCRIPT_HEBREW = 19,
  USCRIPT_HIRAGANA = 20,
  USCRIPT_KANNADA = 21,
  USCRIPT_KATAKANA = 22,
  USCRIPT_KHMER = 23,
  USCRIPT_LAO = 24,
  USCRIPT_LATIN = 25,
  USCRIPT_MALAYALAM = 26,
  USCRIPT_MONGOLIAN = 27,
  USCRIPT_MYANMAR = 28,
  USCRIPT_OGHAM = 29,
  USCRIPT_OLD_ITALIC = 30,
  USCRIPT_ORIYA = 31,
  USCRIPT_RUNIC = 32,
  USCRIPT_SINHALA = 33,
  USCRIPT_SYRIAC = 34,
  USCRIPT_TAMIL = 35,
  USCRIPT_TELUGU = 36,
  USCRIPT_THAANA = 37,
  USCRIPT_THAI = 38,
  USCRIPT_TIBETAN = 39,
  USCRIPT_CANADIAN_ABORIGINAL = 40,
  USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL,
  USCRIPT_YI = 41,
  USCRIPT_TAGALOG = 42,
  USCRIPT_HANUNOO = 43,
  USCRIPT_BUHID = 44,
  USCRIPT_TAGBANWA = 45,
  USCRIPT_BRAILLE = 46,
  USCRIPT_CYPRIOT = 47,
  USCRIPT_LIMBU = 48,
  USCRIPT_LINEAR_B = 49,
  USCRIPT_OSMANYA = 50,
  USCRIPT_SHAVIAN = 51,
  USCRIPT_TAI_LE = 52,
  USCRIPT_UGARITIC = 53,
  USCRIPT_KATAKANA_OR_HIRAGANA = 54,
  USCRIPT_BUGINESE = 55,
  USCRIPT_GLAGOLITIC = 56,
  USCRIPT_KHAROSHTHI = 57,
  USCRIPT_SYLOTI_NAGRI = 58,
  USCRIPT_NEW_TAI_LUE = 59,
  USCRIPT_TIFINAGH = 60,
  USCRIPT_OLD_PERSIAN = 61,
  USCRIPT_BALINESE = 62,
  USCRIPT_BATAK = 63,
  USCRIPT_BLISSYMBOLS = 64,
  USCRIPT_BRAHMI = 65,
  USCRIPT_CHAM = 66,
  USCRIPT_CIRTH = 67,
  USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = 68,
  USCRIPT_DEMOTIC_EGYPTIAN = 69,
  USCRIPT_HIERATIC_EGYPTIAN = 70,
  USCRIPT_EGYPTIAN_HIEROGLYPHS = 71,
  USCRIPT_KHUTSURI = 72,
  USCRIPT_SIMPLIFIED_HAN = 73,
  USCRIPT_TRADITIONAL_HAN = 74,
  USCRIPT_PAHAWH_HMONG = 75,
  USCRIPT_OLD_HUNGARIAN = 76,
  USCRIPT_HARAPPAN_INDUS = 77,
  USCRIPT_JAVANESE = 78,
  USCRIPT_KAYAH_LI = 79,
  USCRIPT_LATIN_FRAKTUR = 80,
  USCRIPT_LATIN_GAELIC = 81,
  USCRIPT_LEPCHA = 82,
  USCRIPT_LINEAR_A = 83,
  USCRIPT_MANDAIC = 84,
  USCRIPT_MANDAEAN = USCRIPT_MANDAIC,
  USCRIPT_MAYAN_HIEROGLYPHS = 85,
  USCRIPT_MEROITIC_HIEROGLYPHS = 86,
  USCRIPT_MEROITIC = USCRIPT_MEROITIC_HIEROGLYPHS,
  USCRIPT_NKO = 87,
  USCRIPT_ORKHON = 88,
  USCRIPT_OLD_PERMIC = 89,
  USCRIPT_PHAGS_PA = 90,
  USCRIPT_PHOENICIAN = 91,
  USCRIPT_MIAO = 92,
  USCRIPT_PHONETIC_POLLARD = USCRIPT_MIAO,
  USCRIPT_RONGORONGO = 93,
  USCRIPT_SARATI = 94,
  USCRIPT_ESTRANGELO_SYRIAC = 95,
  USCRIPT_WESTERN_SYRIAC = 96,
  USCRIPT_EASTERN_SYRIAC = 97,
  USCRIPT_TENGWAR = 98,
  USCRIPT_VAI = 99,
  USCRIPT_VISIBLE_SPEECH = 100,
  USCRIPT_CUNEIFORM = 101,
  USCRIPT_UNWRITTEN_LANGUAGES = 102,
  USCRIPT_UNKNOWN = 103,
  USCRIPT_CARIAN = 104,
  USCRIPT_JAPANESE = 105,
  USCRIPT_LANNA = 106,
  USCRIPT_LYCIAN = 107,
  USCRIPT_LYDIAN = 108,
  USCRIPT_OL_CHIKI = 109,
  USCRIPT_REJANG = 110,
  USCRIPT_SAURASHTRA = 111,
  USCRIPT_SIGN_WRITING = 112,
  USCRIPT_SUNDANESE = 113,
  USCRIPT_MOON = 114,
  USCRIPT_MEITEI_MAYEK = 115,
  USCRIPT_IMPERIAL_ARAMAIC = 116,
  USCRIPT_AVESTAN = 117,
  USCRIPT_CHAKMA = 118,
  USCRIPT_KOREAN = 119,
  USCRIPT_KAITHI = 120,
  USCRIPT_MANICHAEAN = 121,
  USCRIPT_INSCRIPTIONAL_PAHLAVI = 122,
  USCRIPT_PSALTER_PAHLAVI = 123,
  USCRIPT_BOOK_PAHLAVI = 124,
  USCRIPT_INSCRIPTIONAL_PARTHIAN = 125,
  USCRIPT_SAMARITAN = 126,
  USCRIPT_TAI_VIET = 127,
  USCRIPT_MATHEMATICAL_NOTATION = 128,
  USCRIPT_SYMBOLS = 129,
  USCRIPT_BAMUM = 130,
  USCRIPT_LISU = 131,
  USCRIPT_NAKHI_GEBA = 132,
  USCRIPT_OLD_SOUTH_ARABIAN = 133,
  USCRIPT_BASSA_VAH = 134,
  USCRIPT_DUPLOYAN = 135,
  USCRIPT_DUPLOYAN_SHORTAND = USCRIPT_DUPLOYAN,
  USCRIPT_ELBASAN = 136,
  USCRIPT_GRANTHA = 137,
  USCRIPT_KPELLE = 138,
  USCRIPT_LOMA = 139,
  USCRIPT_MENDE = 140,
  USCRIPT_MEROITIC_CURSIVE = 141,
  USCRIPT_OLD_NORTH_ARABIAN = 142,
  USCRIPT_NABATAEAN = 143,
  USCRIPT_PALMYRENE = 144,
  USCRIPT_KHUDAWADI = 145,
  USCRIPT_SINDHI = USCRIPT_KHUDAWADI,
  USCRIPT_WARANG_CITI = 146,
  USCRIPT_AFAKA = 147,
  USCRIPT_JURCHEN = 148,
  USCRIPT_MRO = 149,
  USCRIPT_NUSHU = 150,
  USCRIPT_SHARADA = 151,
  USCRIPT_SORA_SOMPENG = 152,
  USCRIPT_TAKRI = 153,
  USCRIPT_TANGUT = 154,
  USCRIPT_WOLEAI = 155,
  USCRIPT_ANATOLIAN_HIEROGLYPHS = 156,
  USCRIPT_KHOJKI = 157,
  USCRIPT_TIRHUTA = 158,
  USCRIPT_CAUCASIAN_ALBANIAN = 159,
  USCRIPT_MAHAJANI = 160,
  USCRIPT_AHOM = 161,
  USCRIPT_HATRAN = 162,
  USCRIPT_MODI = 163,
  USCRIPT_MULTANI = 164,
  USCRIPT_PAU_CIN_HAU = 165,
  USCRIPT_SIDDHAM = 166,
  USCRIPT_ADLAM = 167,
  USCRIPT_BHAIKSUKI = 168,
  USCRIPT_MARCHEN = 169,
  USCRIPT_NEWA = 170,
  USCRIPT_OSAGE = 171,
  USCRIPT_HAN_WITH_BOPOMOFO = 172,
  USCRIPT_JAMO = 173,
  USCRIPT_SYMBOLS_EMOJI = 174,
  USCRIPT_MASARAM_GONDI = 175,
  USCRIPT_SOYOMBO = 176,
  USCRIPT_ZANABAZAR_SQUARE = 177,
  USCRIPT_DOGRA = 178,
  USCRIPT_GUNJALA_GONDI = 179,
  USCRIPT_MAKASAR = 180,
  USCRIPT_MEDEFAIDRIN = 181,
  USCRIPT_HANIFI_ROHINGYA = 182,
  USCRIPT_SOGDIAN = 183,
  USCRIPT_OLD_SOGDIAN = 184,
  USCRIPT_ELYMAIC = 185,
  USCRIPT_NYIAKENG_PUACHUE_HMONG = 186,
  USCRIPT_NANDINAGARI = 187,
  USCRIPT_WANCHO = 188,
  USCRIPT_CHORASMIAN = 189,
  USCRIPT_DIVES_AKURU = 190,
  USCRIPT_KHITAN_SMALL_SCRIPT = 191,
  USCRIPT_YEZIDI = 192,
  USCRIPT_CYPRO_MINOAN = 193,
  USCRIPT_OLD_UYGHUR = 194,
  USCRIPT_TANGSA = 195,
  USCRIPT_TOTO = 196,
  USCRIPT_VITHKUQI = 197,
  USCRIPT_KAWI = 198,
  USCRIPT_NAG_MUNDARI = 199,
  USCRIPT_CODE_LIMIT = 200
}
enum
Constants for ISO 15924 script codes.
UScriptUsage{
  USCRIPT_USAGE_NOT_ENCODED,
  USCRIPT_USAGE_UNKNOWN,
  USCRIPT_USAGE_EXCLUDED,
  USCRIPT_USAGE_LIMITED_USE,
  USCRIPT_USAGE_ASPIRATIONAL,
  USCRIPT_USAGE_RECOMMENDED
}
enum
Script usage constants.
USentenceBreak{
  U_SB_OTHER = 0,
  U_SB_ATERM = 1,
  U_SB_CLOSE = 2,
  U_SB_FORMAT = 3,
  U_SB_LOWER = 4,
  U_SB_NUMERIC = 5,
  U_SB_OLETTER = 6,
  U_SB_SEP = 7,
  U_SB_SP = 8,
  U_SB_STERM = 9,
  U_SB_UPPER = 10,
  U_SB_CR = 11,
  U_SB_EXTEND = 12,
  U_SB_LF = 13,
  U_SB_SCONTINUE = 14,
  U_SB_COUNT = 15
}
enum
Sentence Break constants.
USentenceBreakTag{
  UBRK_SENTENCE_TERM = 0,
  UBRK_SENTENCE_TERM_LIMIT = 100,
  UBRK_SENTENCE_SEP = 100,
  UBRK_SENTENCE_SEP_LIMIT = 200
}
enum
Enum constants for the sentence break tags returned by getRuleStatus().
UTransDirection{
  UTRANS_FORWARD,
  UTRANS_REVERSE
}
enum
Direction constant indicating the direction in a transliterator, e.g., the forward or reverse rules of a RuleBasedTransliterator.
UVerticalOrientation{
  U_VO_ROTATED,
  U_VO_TRANSFORMED_ROTATED,
  U_VO_TRANSFORMED_UPRIGHT,
  U_VO_UPRIGHT
}
enum
Vertical Orientation constants.
UWordBreak{
  UBRK_WORD_NONE = 0,
  UBRK_WORD_NONE_LIMIT = 100,
  UBRK_WORD_NUMBER = 100,
  UBRK_WORD_NUMBER_LIMIT = 200,
  UBRK_WORD_LETTER = 200,
  UBRK_WORD_LETTER_LIMIT = 300,
  UBRK_WORD_KANA = 300,
  UBRK_WORD_KANA_LIMIT = 400,
  UBRK_WORD_IDEO = 400,
  UBRK_WORD_IDEO_LIMIT = 500
}
enum
Enum constants for the word break tags returned by getRuleStatus().
UWordBreakValues{
  U_WB_OTHER = 0,
  U_WB_ALETTER = 1,
  U_WB_FORMAT = 2,
  U_WB_KATAKANA = 3,
  U_WB_MIDLETTER = 4,
  U_WB_MIDNUM = 5,
  U_WB_NUMERIC = 6,
  U_WB_EXTENDNUMLET = 7,
  U_WB_CR = 8,
  U_WB_EXTEND = 9,
  U_WB_LF = 10,
  U_WB_MIDNUMLET =11,
  U_WB_NEWLINE =12,
  U_WB_REGIONAL_INDICATOR = 13,
  U_WB_HEBREW_LETTER = 14,
  U_WB_SINGLE_QUOTE = 15,
  U_WB_DOUBLE_QUOTE = 16,
  U_WB_E_BASE = 17,
  U_WB_E_BASE_GAZ = 18,
  U_WB_E_MODIFIER = 19,
  U_WB_GLUE_AFTER_ZWJ = 20,
  U_WB_ZWJ = 21,
  U_WB_WSEGSPACE = 22,
  U_WB_COUNT = 23
}
enum
Word Break constants.

Typedefs

OldUChar OldUChar
uint16_t
Default ICU 58 definition of UChar.
UBidiPairedBracketType typedef
Bidi Paired Bracket Type constants.
UBlockCode typedef
enum UBlockCode
UBool typedef
int8_t
The ICU boolean type, a signed-byte integer.
UBreakIteratorType typedef
The possible types of text boundaries.
UCPMap typedef
struct UCPMap
Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values.
UCPMapValueFilter(const void *context, uint32_t value) typedef
uint32_t U_CALLCONV
Callback function type: Modifies a map value.
UChar UChar
uint16_t
The base type for UTF-16 code units and pointers.
UChar32 typedef
int32_t
Define UChar32 as a type for single Unicode code points.
UCharCategory typedef
Data for enumerated Unicode general category types.
UCharDirection typedef
This specifies the language directional property of a character set.
UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) typedef
Callback from u_enumCharTypes(), is called for each contiguous range of code points c (where start<=c
UCharNameChoice typedef
Selector constants for u_charName().
UColAttribute typedef
Attributes that collation service understands.
UColAttributeValue typedef
Enum containing attribute values for controlling collation behavior.
UColBoundMode typedef
enum that is taken by ucol_getBound API See below for explanation do not change the values assigned to the members of this enum.
UColReorderCode typedef
Enum containing the codes for reordering segments of the collation table that are not script codes.
UCollationResult typedef
UCOL_LESS is returned if source string is compared to be less than target string in the ucol_strcoll() method.
UCollationStrength typedef
Base letter represents a primary difference.
UCollator typedef
struct UCollator
structure representing a collator object instance
UDate typedef
double
Date and Time data type.
UDecompositionType typedef
Decomposition Type constants.
UDisplayContext typedef
UDisplayContextType typedef
UEastAsianWidth typedef
East Asian Width constants.
UEnumCharNamesFn(void *context, UChar32 code, UCharNameChoice nameChoice, const char *name, int32_t length) typedef
Type of a callback function for u_enumCharNames() that gets called for each Unicode character with the code point value and the character name.
UEnumeration typedef
struct UEnumeration
structure representing an enumeration object instance
UErrorCode typedef
enum UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
UGraphemeClusterBreak typedef
Grapheme Cluster Break constants.
UHangulSyllableType typedef
Hangul Syllable Type constants.
UIndicPositionalCategory typedef
Indic Positional Category constants.
UIndicSyllabicCategory typedef
Indic Syllabic Category constants.
UJoiningGroup typedef
Joining Group constants.
UJoiningType typedef
Joining Type constants.
ULineBreak typedef
enum ULineBreak
Line Break constants.
ULineBreakTag typedef
Enum constants for the line break tags returned by getRuleStatus().
ULocAvailableType typedef
Types for uloc_getAvailableByType and uloc_countAvailableByType.
ULocaleData typedef
struct ULocaleData
A locale data object.
ULocaleDisplayNames typedef
C typedef for struct ULocaleDisplayNames.
UNormalizationCheckResult typedef
Result values for normalization quick check functions.
UNormalizer2 typedef
struct UNormalizer2
C typedef for struct UNormalizer2.
UNumericType typedef
Numeric Type constants.
UParseError typedef
struct UParseError
A UParseError struct is used to returned detailed information about parsing errors.
UProperty typedef
enum UProperty
Selection constants for Unicode properties.
UPropertyNameChoice typedef
Selector constants for u_getPropertyName() and u_getPropertyValueName().
UReplaceable typedef
void *
An opaque replaceable text object.
UReplaceableCallbacks typedef
A set of function pointers that transliterators use to manipulate a UReplaceable.
UScriptCode typedef
Constants for ISO 15924 script codes.
UScriptUsage typedef
Script usage constants.
USentenceBreak typedef
Sentence Break constants.
USentenceBreakTag typedef
Enum constants for the sentence break tags returned by getRuleStatus().
UText typedef
struct UText
C typedef for struct UText.
UTransDirection typedef
Direction constant indicating the direction in a transliterator, e.g., the forward or reverse rules of a RuleBasedTransliterator.
UTransPosition typedef
Position structure for utrans_transIncremental() incremental transliteration.
UTransliterator typedef
void *
An opaque transliterator for use in C.
UVersionInfo[U_MAX_VERSION_LENGTH] typedef
uint8_t
The binary form of a version on ICU APIs is an array of 4 uint8_t.
UVerticalOrientation typedef
Vertical Orientation constants.
UWordBreak typedef
enum UWordBreak
Enum constants for the word break tags returned by getRuleStatus().
UWordBreakValues typedef
Word Break constants.

Variables

context
U_CDECL_BEGIN typedef void *

Functions

UChar(U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset
U_CDECL_BEGIN typedef
Callback function for u_unescapeAt() that returns a character of the source text given an offset and a context pointer.
u_charAge(UChar32 c, UVersionInfo versionArray)
U_CAPI void U_EXPORT2
Get the "age" of the code point.
u_charDigitValue(UChar32 c)
U_CAPI int32_t U_EXPORT2
Returns the decimal digit value of a decimal digit character.
u_charDirection(UChar32 c)
Returns the bidirectional category value for the code point, which is used in the Unicode bidirectional algorithm (UAX #9 http://www.unicode.org/reports/tr9/).
u_charFromName(UCharNameChoice nameChoice, const char *name, UErrorCode *pErrorCode)
U_CAPIUChar32 U_EXPORT2
Find a Unicode character by its name and return its code point value.
u_charMirror(UChar32 c)
U_CAPIUChar32 U_EXPORT2
Maps the specified character to a "mirror-image" character.
u_charName(UChar32 code, UCharNameChoice nameChoice, char *buffer, int32_t bufferLength, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Retrieve the name of a Unicode character.
u_charType(UChar32 c)
U_CAPI int8_t U_EXPORT2
Returns the general category value for the code point.
u_countChar32(const UChar *s, int32_t length)
U_CAPI int32_t U_EXPORT2
Count Unicode code points in the length UChar code units of the string.
u_digit(UChar32 ch, int8_t radix)
U_CAPI int32_t U_EXPORT2
Returns the decimal digit value of the code point in the specified radix.
u_enumCharNames(UChar32 start, UChar32 limit, UEnumCharNamesFn *fn, void *context, UCharNameChoice nameChoice, UErrorCode *pErrorCode)
U_CAPI void U_EXPORT2
Enumerate all assigned Unicode characters between the start and limit code points (start inclusive, limit exclusive) and call a function for each, passing the code point value and the character name.
u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context)
U_CAPI void U_EXPORT2
Enumerate efficiently all code points with their Unicode general categories.
u_errorName(UErrorCode code)
U_CAPI const char *U_EXPORT2
Return a string for a UErrorCode value.
u_foldCase(UChar32 c, uint32_t options)
U_CAPIUChar32 U_EXPORT2
The given character is mapped to its case folding equivalent according to UnicodeData.txt and CaseFolding.txt; if the character has no case folding equivalent, the character itself is returned.
u_forDigit(int32_t digit, int8_t radix)
U_CAPIUChar32 U_EXPORT2
Determines the character representation for a specific digit in the specified radix.
u_getBidiPairedBracket(UChar32 c)
U_CAPIUChar32 U_EXPORT2
Maps the specified character to its paired bracket character.
u_getCombiningClass(UChar32 c)
U_CAPI uint8_t U_EXPORT2
Returns the combining class of the code point as specified in UnicodeData.txt.
u_getIntPropertyMaxValue(UProperty which)
U_CAPI int32_t U_EXPORT2
Get the maximum value for an enumerated/integer/binary Unicode property.
u_getIntPropertyMinValue(UProperty which)
U_CAPI int32_t U_EXPORT2
Get the minimum value for an enumerated/integer/binary Unicode property.
u_getIntPropertyValue(UChar32 c, UProperty which)
U_CAPI int32_t U_EXPORT2
Get the property value for an enumerated or integer Unicode property for a code point.
u_getNumericValue(UChar32 c)
U_CAPI double U_EXPORT2
Get the numeric value for a Unicode code point as defined in the Unicode Character Database.
u_getPropertyEnum(const char *alias)
U_CAPIUProperty U_EXPORT2
Return the UProperty enum for a given property name, as specified in the Unicode database file PropertyAliases.txt.
u_getPropertyName(UProperty property, UPropertyNameChoice nameChoice)
U_CAPI const char *U_EXPORT2
Return the Unicode name for a given property, as given in the Unicode database file PropertyAliases.txt.
u_getPropertyValueEnum(UProperty property, const char *alias)
U_CAPI int32_t U_EXPORT2
Return the property value integer for a given value name, as specified in the Unicode database file PropertyValueAliases.txt.
u_getPropertyValueName(UProperty property, int32_t value, UPropertyNameChoice nameChoice)
U_CAPI const char *U_EXPORT2
Return the Unicode name for a given property value, as given in the Unicode database file PropertyValueAliases.txt.
u_getUnicodeVersion(UVersionInfo versionArray)
U_CAPI void U_EXPORT2
Gets the Unicode version information.
u_getVersion(UVersionInfo versionArray)
U_CAPI void U_EXPORT2
Gets the ICU release version.
u_hasBinaryProperty(UChar32 c, UProperty which)
U_CAPIUBool U_EXPORT2
Check a binary Unicode property for a code point.
u_isIDIgnorable(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines if the specified character should be regarded as an ignorable character in an identifier, according to Java.
u_isIDPart(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines if the specified character is permissible in an identifier according to Java.
u_isIDStart(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines if the specified character is permissible as the first character in an identifier according to Unicode (The Unicode Standard, Version 3.0, chapter 5.16 Identifiers).
u_isISOControl(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point is an ISO control code.
u_isJavaIDPart(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines if the specified character is permissible in a Java identifier.
u_isJavaIDStart(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines if the specified character is permissible as the first character in a Java identifier.
u_isJavaSpaceChar(UChar32 c)
U_CAPIUBool U_EXPORT2
Determine if the specified code point is a space character according to Java.
u_isMirrored(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the code point has the Bidi_Mirrored property.
u_isUAlphabetic(UChar32 c)
U_CAPIUBool U_EXPORT2
Check if a code point has the Alphabetic Unicode property.
u_isULowercase(UChar32 c)
U_CAPIUBool U_EXPORT2
Check if a code point has the Lowercase Unicode property.
u_isUUppercase(UChar32 c)
U_CAPIUBool U_EXPORT2
Check if a code point has the Uppercase Unicode property.
u_isUWhiteSpace(UChar32 c)
U_CAPIUBool U_EXPORT2
Check if a code point has the White_Space Unicode property.
u_isWhitespace(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines if the specified code point is a whitespace character according to Java/ICU.
u_isalnum(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point is an alphanumeric character (letter or digit) according to Java.
u_isalpha(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point is a letter character.
u_isbase(UChar32 c)
U_CAPIUBool U_EXPORT2
Non-standard: Determines whether the specified code point is a base character.
u_isblank(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point is a "blank" or "horizontal space", a character that visibly separates words on a line.
u_iscntrl(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point is a control character (as defined by this function).
u_isdefined(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point is "defined", which usually means that it is assigned a character.
u_isdigit(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point is a digit character according to Java.
u_isgraph(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point is a "graphic" character (printable, excluding spaces).
u_islower(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point has the general category "Ll" (lowercase letter).
u_isprint(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point is a printable character.
u_ispunct(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point is a punctuation character.
u_isspace(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines if the specified character is a space character or not.
u_istitle(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point is a titlecase letter.
u_isupper(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point has the general category "Lu" (uppercase letter).
u_isxdigit(UChar32 c)
U_CAPIUBool U_EXPORT2
Determines whether the specified code point is a hexadecimal digit.
u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options)
U_CAPI int32_t U_EXPORT2
Compare two strings case-insensitively using full case folding.
u_memchr(const UChar *s, UChar c, int32_t count)
U_CAPIUChar *U_EXPORT2
Find the first occurrence of a BMP code point in a string.
u_memchr32(const UChar *s, UChar32 c, int32_t count)
U_CAPIUChar *U_EXPORT2
Find the first occurrence of a code point in a string.
u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count)
U_CAPI int32_t U_EXPORT2
Compare the first count UChars of each buffer.
u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count)
U_CAPI int32_t U_EXPORT2
Compare two Unicode strings in code point order.
u_memcpy(UChar *dest, const UChar *src, int32_t count)
U_CAPIUChar *U_EXPORT2
Synonym for memcpy(), but with UChars only.
u_memmove(UChar *dest, const UChar *src, int32_t count)
U_CAPIUChar *U_EXPORT2
Synonym for memmove(), but with UChars only.
u_memrchr(const UChar *s, UChar c, int32_t count)
U_CAPIUChar *U_EXPORT2
Find the last occurrence of a BMP code point in a string.
u_memrchr32(const UChar *s, UChar32 c, int32_t count)
U_CAPIUChar *U_EXPORT2
Find the last occurrence of a code point in a string.
u_memset(UChar *dest, UChar c, int32_t count)
U_CAPIUChar *U_EXPORT2
Initialize count characters of dest to c.
u_strCaseCompare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Compare two strings case-insensitively using full case folding.
u_strCompare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, UBool codePointOrder)
U_CAPI int32_t U_EXPORT2
Compare two Unicode strings (binary order).
u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength)
U_CAPIUChar *U_EXPORT2
Find the first occurrence of a substring in a string.
u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength)
U_CAPIUChar *U_EXPORT2
Find the last occurrence of a substring in a string.
u_strFoldCase(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, uint32_t options, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Case-folds the characters in a string.
u_strFromUTF32(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const UChar32 *src, int32_t srcLength, UErrorCode *pErrorCode)
U_CAPIUChar *U_EXPORT2
Convert a UTF-32 string to UTF-16.
u_strFromUTF32WithSub(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const UChar32 *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
U_CAPIUChar *U_EXPORT2
Convert a UTF-32 string to UTF-16.
u_strFromUTF8(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UErrorCode *pErrorCode)
U_CAPIUChar *U_EXPORT2
Convert a UTF-8 string to UTF-16.
u_strFromUTF8Lenient(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UErrorCode *pErrorCode)
U_CAPIUChar *U_EXPORT2
Convert a UTF-8 string to UTF-16.
u_strFromUTF8WithSub(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
U_CAPIUChar *U_EXPORT2
Convert a UTF-8 string to UTF-16.
u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number)
U_CAPIUBool U_EXPORT2
Check if the string contains more Unicode code points than a certain number.
u_strToLower(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Lowercase the characters in a string.
u_strToTitle(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UBreakIterator *titleIter, const char *locale, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Titlecase a string.
u_strToUTF32(UChar32 *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
U_CAPIUChar32 *U_EXPORT2
Convert a UTF-16 string to UTF-32.
u_strToUTF32WithSub(UChar32 *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
U_CAPIUChar32 *U_EXPORT2
Convert a UTF-16 string to UTF-32.
u_strToUTF8(char *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
U_CAPI char *U_EXPORT2
Convert a UTF-16 string to UTF-8.
u_strToUTF8WithSub(char *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
U_CAPI char *U_EXPORT2
Convert a UTF-16 string to UTF-8.
u_strToUpper(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode)
U_CDECL_ENDU_CAPI int32_t U_EXPORT2
Uppercase the characters in a string.
u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options)
U_CAPI int32_t U_EXPORT2
Compare two strings case-insensitively using full case folding.
u_strcat(UChar *dst, const UChar *src)
U_CAPIUChar *U_EXPORT2
Concatenate two ustrings.
u_strchr(const UChar *s, UChar c)
U_CAPIUChar *U_EXPORT2
Find the first occurrence of a BMP code point in a string.
u_strchr32(const UChar *s, UChar32 c)
U_CAPIUChar *U_EXPORT2
Find the first occurrence of a code point in a string.
u_strcmp(const UChar *s1, const UChar *s2)
U_CAPI int32_t U_EXPORT2
Compare two Unicode strings for bitwise equality (code unit order).
u_strcmpCodePointOrder(const UChar *s1, const UChar *s2)
U_CAPI int32_t U_EXPORT2
Compare two Unicode strings in code point order.
u_strcpy(UChar *dst, const UChar *src)
U_CAPIUChar *U_EXPORT2
Copy a ustring.
u_strcspn(const UChar *string, const UChar *matchSet)
U_CAPI int32_t U_EXPORT2
Returns the number of consecutive characters in string, beginning with the first, that do not occur somewhere in matchSet.
u_strlen(const UChar *s)
U_CAPI int32_t U_EXPORT2
Determine the length of an array of UChar.
u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options)
U_CAPI int32_t U_EXPORT2
Compare two strings case-insensitively using full case folding.
u_strncat(UChar *dst, const UChar *src, int32_t n)
U_CAPIUChar *U_EXPORT2
Concatenate two ustrings.
u_strncmp(const UChar *ucs1, const UChar *ucs2, int32_t n)
U_CAPI int32_t U_EXPORT2
Compare two ustrings for bitwise equality.
u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n)
U_CAPI int32_t U_EXPORT2
Compare two Unicode strings in code point order.
u_strncpy(UChar *dst, const UChar *src, int32_t n)
U_CAPIUChar *U_EXPORT2
Copy a ustring.
u_strpbrk(const UChar *string, const UChar *matchSet)
U_CAPIUChar *U_EXPORT2
Locates the first occurrence in the string string of any of the characters in the string matchSet.
u_strrchr(const UChar *s, UChar c)
U_CAPIUChar *U_EXPORT2
Find the last occurrence of a BMP code point in a string.
u_strrchr32(const UChar *s, UChar32 c)
U_CAPIUChar *U_EXPORT2
Find the last occurrence of a code point in a string.
u_strrstr(const UChar *s, const UChar *substring)
U_CAPIUChar *U_EXPORT2
Find the last occurrence of a substring in a string.
u_strspn(const UChar *string, const UChar *matchSet)
U_CAPI int32_t U_EXPORT2
Returns the number of consecutive characters in string, beginning with the first, that occur somewhere in matchSet.
u_strstr(const UChar *s, const UChar *substring)
U_CAPIUChar *U_EXPORT2
Find the first occurrence of a substring in a string.
u_strtok_r(UChar *src, const UChar *delim, UChar **saveState)
U_CAPIUChar *U_EXPORT2
The string tokenizer API allows an application to break a string into tokens.
u_tolower(UChar32 c)
U_CAPIUChar32 U_EXPORT2
The given character is mapped to its lowercase equivalent according to UnicodeData.txt; if the character has no lowercase equivalent, the character itself is returned.
u_totitle(UChar32 c)
U_CAPIUChar32 U_EXPORT2
The given character is mapped to its titlecase equivalent according to UnicodeData.txt; if none is defined, the character itself is returned.
u_toupper(UChar32 c)
U_CAPIUChar32 U_EXPORT2
The given character is mapped to its uppercase equivalent according to UnicodeData.txt; if the character has no uppercase equivalent, the character itself is returned.
u_versionToString(const UVersionInfo versionArray, char *versionString)
U_CAPI void U_EXPORT2
Write a string with dotted-decimal version information according to the input UVersionInfo.
ubrk_clone(const UBreakIterator *bi, UErrorCode *status)
Thread safe cloning operation.
ubrk_close(UBreakIterator *bi)
U_CAPI void U_EXPORT2
Close a UBreakIterator.
ubrk_countAvailable(void)
U_CAPI int32_t U_EXPORT2
Determine how many locales have text breaking information available.
ubrk_current(const UBreakIterator *bi)
U_CAPI int32_t U_EXPORT2
Determine the most recently-returned text boundary.
ubrk_first(UBreakIterator *bi)
U_CAPI int32_t U_EXPORT2
Set the iterator position to zero, the start of the text being scanned.
ubrk_following(UBreakIterator *bi, int32_t offset)
U_CAPI int32_t U_EXPORT2
Advance the iterator to the first boundary following the specified offset.
ubrk_getAvailable(int32_t index)
U_CAPI const char *U_EXPORT2
Get a locale for which text breaking information is available.
ubrk_getRuleStatus(UBreakIterator *bi)
U_CAPI int32_t U_EXPORT2
Return the status from the break rule that determined the most recently returned break position.
ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Get the statuses from the break rules that determined the most recently returned break position.
ubrk_isBoundary(UBreakIterator *bi, int32_t offset)
U_CAPIUBool U_EXPORT2
Returns true if the specified position is a boundary position.
ubrk_last(UBreakIterator *bi)
U_CAPI int32_t U_EXPORT2
Set the iterator position to the index immediately beyond the last character in the text being scanned.
ubrk_next(UBreakIterator *bi)
U_CAPI int32_t U_EXPORT2
Advance the iterator to the boundary following the current boundary.
ubrk_open(UBreakIteratorType type, const char *locale, const UChar *text, int32_t textLength, UErrorCode *status)
Open a new UBreakIterator for locating text boundaries for a specified locale.
ubrk_preceding(UBreakIterator *bi, int32_t offset)
U_CAPI int32_t U_EXPORT2
Set the iterator position to the first boundary preceding the specified offset.
ubrk_previous(UBreakIterator *bi)
U_CAPI int32_t U_EXPORT2
Set the iterator position to the boundary preceding the current boundary.
ubrk_setText(UBreakIterator *bi, const UChar *text, int32_t textLength, UErrorCode *status)
U_CAPI void U_EXPORT2
Sets an existing iterator to point to a new piece of text.
ubrk_setUText(UBreakIterator *bi, UText *text, UErrorCode *status)
U_CAPI void U_EXPORT2
Sets an existing iterator to point to a new piece of text.
ucol_clone(const UCollator *coll, UErrorCode *status)
U_CAPIUCollator *U_EXPORT2
Thread safe cloning operation.
ucol_close(UCollator *coll)
U_CAPI void U_EXPORT2
Close a UCollator.
ucol_countAvailable(void)
U_CAPI int32_t U_EXPORT2
Determine how many locales have collation rules available.
ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status)
Universal attribute getter.
ucol_getAvailable(int32_t localeIndex)
U_CAPI const char *U_EXPORT2
Get a locale for which collation rules are available.
ucol_getDisplayName(const char *objLoc, const char *dispLoc, UChar *result, int32_t resultLength, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Get the display name for a UCollator.
ucol_getEquivalentReorderCodes(int32_t reorderCode, int32_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Retrieves the reorder codes that are grouped with the given reorder code.
ucol_getFunctionalEquivalent(char *result, int32_t resultCapacity, const char *keyword, const char *locale, UBool *isAvailable, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Return the functionally equivalent locale for the specified input locale, with respect to given keyword, for the collation service.
ucol_getKeywordValues(const char *keyword, UErrorCode *status)
U_CAPIUEnumeration *U_EXPORT2
Given a keyword, create a string enumeration of all values for that keyword that are currently in use.
ucol_getKeywordValuesForLocale(const char *key, const char *locale, UBool commonlyUsed, UErrorCode *status)
U_CAPIUEnumeration *U_EXPORT2
Given a key and a locale, returns an array of string values in a preferred order that would make a difference.
ucol_getKeywords(UErrorCode *status)
U_CAPIUEnumeration *U_EXPORT2
Create a string enumerator of all possible keywords that are relevant to collation.
ucol_getMaxVariable(const UCollator *coll)
Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
ucol_getReorderCodes(const UCollator *coll, int32_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Retrieves the reordering codes for this collator.
ucol_getSortKey(const UCollator *coll, const UChar *source, int32_t sourceLength, uint8_t *result, int32_t resultLength)
U_CAPI int32_t U_EXPORT2
Get a sort key for a string from a UCollator.
ucol_getStrength(const UCollator *coll)
Get the collation strength used in a UCollator.
ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length, const uint8_t *src2, int32_t src2Length, uint8_t *dest, int32_t destCapacity)
U_CAPI int32_t U_EXPORT2
Merges two sort keys.
ucol_open(const char *loc, UErrorCode *status)
U_CAPIUCollator *U_EXPORT2
Open a UCollator for comparing strings.
ucol_openAvailableLocales(UErrorCode *status)
U_CAPIUEnumeration *U_EXPORT2
Create a string enumerator of all locales for which a valid collator may be opened.
ucol_openRules(const UChar *rules, int32_t rulesLength, UColAttributeValue normalizationMode, UCollationStrength strength, UParseError *parseError, UErrorCode *status)
U_CAPIUCollator *U_EXPORT2
Produce a UCollator instance according to the rules supplied.
ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status)
U_CAPI void U_EXPORT2
Universal attribute setter.
ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode)
U_CAPI void U_EXPORT2
Sets the variable top to the top of the specified reordering group.
ucol_setReorderCodes(UCollator *coll, const int32_t *reorderCodes, int32_t reorderCodesLength, UErrorCode *pErrorCode)
U_CAPI void U_EXPORT2
Sets the reordering codes for this collator.
ucol_setStrength(UCollator *coll, UCollationStrength strength)
U_CAPI void U_EXPORT2
Set the collation strength used in a UCollator.
ucol_strcoll(const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength)
Compare two strings.
ucol_strcollUTF8(const UCollator *coll, const char *source, int32_t sourceLength, const char *target, int32_t targetLength, UErrorCode *status)
Compare two strings in UTF-8.
uenum_close(UEnumeration *en)
U_CAPI void U_EXPORT2
Disposes of resources in use by the iterator.
uenum_count(UEnumeration *en, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Returns the number of elements that the iterator traverses.
uenum_next(UEnumeration *en, int32_t *resultLength, UErrorCode *status)
U_CAPI const char *U_EXPORT2
Returns the next element in the iterator's list.
uenum_openCharStringsEnumeration(const char *const strings[], int32_t count, UErrorCode *ec)
U_CAPIUEnumeration *U_EXPORT2
Given an array of const char* strings (invariant chars only), return a UEnumeration.
uenum_openUCharStringsEnumeration(const UChar *const strings[], int32_t count, UErrorCode *ec)
U_CAPIUEnumeration *U_EXPORT2
Given an array of const UChar* strings, return a UEnumeration.
uenum_reset(UEnumeration *en, UErrorCode *status)
U_CAPI void U_EXPORT2
Resets the iterator to the current list of service IDs.
uenum_unext(UEnumeration *en, int32_t *resultLength, UErrorCode *status)
U_CAPI const UChar *U_EXPORT2
Returns the next element in the iterator's list.
uldn_close(ULocaleDisplayNames *ldn)
U_CAPI void U_EXPORT2
Closes a ULocaleDisplayNames instance obtained from uldn_open().
uldn_getContext(const ULocaleDisplayNames *ldn, UDisplayContextType type, UErrorCode *pErrorCode)
Returns the UDisplayContext value for the specified UDisplayContextType.
uldn_getDialectHandling(const ULocaleDisplayNames *ldn)
Returns the dialect handling used in the display names.
uldn_getLocale(const ULocaleDisplayNames *ldn)
U_CAPI const char *U_EXPORT2
Returns the locale used to determine the display names.
uldn_keyDisplayName(const ULocaleDisplayNames *ldn, const char *key, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided locale key.
uldn_keyValueDisplayName(const ULocaleDisplayNames *ldn, const char *key, const char *value, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided value (used with the provided key).
uldn_languageDisplayName(const ULocaleDisplayNames *ldn, const char *lang, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided language code.
uldn_localeDisplayName(const ULocaleDisplayNames *ldn, const char *locale, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided locale.
uldn_open(const char *locale, UDialectHandling dialectHandling, UErrorCode *pErrorCode)
Returns an instance of LocaleDisplayNames that returns names formatted for the provided locale, using the provided dialectHandling.
uldn_openForContext(const char *locale, UDisplayContext *contexts, int32_t length, UErrorCode *pErrorCode)
Returns an instance of LocaleDisplayNames that returns names formatted for the provided locale, using the provided UDisplayContext settings.
uldn_regionDisplayName(const ULocaleDisplayNames *ldn, const char *region, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided region code.
uldn_scriptCodeDisplayName(const ULocaleDisplayNames *ldn, UScriptCode scriptCode, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided script code.
uldn_scriptDisplayName(const ULocaleDisplayNames *ldn, const char *script, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided script.
uldn_variantDisplayName(const ULocaleDisplayNames *ldn, const char *variant, UChar *result, int32_t maxResultSize, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Returns the display name of the provided variant.
uloc_acceptLanguage(char *result, int32_t resultAvailable, UAcceptResult *outResult, const char **acceptList, int32_t acceptListCount, UEnumeration *availableLocales, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Based on a list of available locales, determine an acceptable locale for the user.
uloc_addLikelySubtags(const char *localeID, char *maximizedLocaleID, int32_t maximizedLocaleIDCapacity, UErrorCode *err)
U_CAPI int32_t U_EXPORT2
Add the likely subtags for a provided locale ID, per the algorithm described in the following CLDR technical report:
uloc_canonicalize(const char *localeID, char *name, int32_t nameCapacity, UErrorCode *err)
U_CAPI int32_t U_EXPORT2
Gets the full name for the specified locale.
uloc_countAvailable(void)
U_CAPI int32_t U_EXPORT2
Gets the size of the all available locale list.
uloc_forLanguageTag(const char *langtag, char *localeID, int32_t localeIDCapacity, int32_t *parsedLength, UErrorCode *err)
U_CAPI int32_t U_EXPORT2
Returns a locale ID for the specified BCP47 language tag string.
uloc_getAvailable(int32_t n)
U_CAPI const char *U_EXPORT2
Gets the specified locale from a list of available locales.
uloc_getBaseName(const char *localeID, char *name, int32_t nameCapacity, UErrorCode *err)
U_CAPI int32_t U_EXPORT2
Gets the full name for the specified locale, like uloc_getName(), but without keywords.
uloc_getCharacterOrientation(const char *localeId, UErrorCode *status)
U_CAPIULayoutType U_EXPORT2
Get the layout character orientation for the specified locale.
uloc_getCountry(const char *localeID, char *country, int32_t countryCapacity, UErrorCode *err)
U_CAPI int32_t U_EXPORT2
Gets the country code for the specified locale.
uloc_getDefault(void)
U_CAPI const char *U_EXPORT2
Gets ICU's default locale.
uloc_getDisplayCountry(const char *locale, const char *displayLocale, UChar *country, int32_t countryCapacity, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Gets the country name suitable for display for the specified locale.
uloc_getDisplayKeyword(const char *keyword, const char *displayLocale, UChar *dest, int32_t destCapacity, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Gets the keyword name suitable for display for the specified locale.
uloc_getDisplayKeywordValue(const char *locale, const char *keyword, const char *displayLocale, UChar *dest, int32_t destCapacity, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Gets the value of the keyword suitable for display for the specified locale.
uloc_getDisplayLanguage(const char *locale, const char *displayLocale, UChar *language, int32_t languageCapacity, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Gets the language name suitable for display for the specified locale.
uloc_getDisplayName(const char *localeID, const char *inLocaleID, UChar *result, int32_t maxResultSize, UErrorCode *err)
U_CAPI int32_t U_EXPORT2
Gets the full name suitable for display for the specified locale.
uloc_getDisplayScript(const char *locale, const char *displayLocale, UChar *script, int32_t scriptCapacity, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Gets the script name suitable for display for the specified locale.
uloc_getDisplayVariant(const char *locale, const char *displayLocale, UChar *variant, int32_t variantCapacity, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Gets the variant name suitable for display for the specified locale.
uloc_getISO3Country(const char *localeID)
U_CAPI const char *U_EXPORT2
Gets the ISO country code for the specified locale.
uloc_getISO3Language(const char *localeID)
U_CAPI const char *U_EXPORT2
Gets the ISO language code for the specified locale.
uloc_getISOCountries(void)
U_CAPI const char *const *U_EXPORT2
Gets a list of all available 2-letter country codes defined in ISO 639.
uloc_getISOLanguages(void)
U_CAPI const char *const *U_EXPORT2
Gets a list of all available 2-letter language codes defined in ISO 639, plus additional 3-letter codes determined to be useful for locale generation as defined by Unicode CLDR.
uloc_getKeywordValue(const char *localeID, const char *keywordName, char *buffer, int32_t bufferCapacity, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Get the value for a keyword.
uloc_getLanguage(const char *localeID, char *language, int32_t languageCapacity, UErrorCode *err)
U_CAPI int32_t U_EXPORT2
Gets the language code for the specified locale.
uloc_getLineOrientation(const char *localeId, UErrorCode *status)
U_CAPIULayoutType U_EXPORT2
Get the layout line orientation for the specified locale.
uloc_getName(const char *localeID, char *name, int32_t nameCapacity, UErrorCode *err)
U_CAPI int32_t U_EXPORT2
Gets the full name for the specified locale.
uloc_getScript(const char *localeID, char *script, int32_t scriptCapacity, UErrorCode *err)
U_CAPI int32_t U_EXPORT2
Gets the script code for the specified locale.
uloc_getVariant(const char *localeID, char *variant, int32_t variantCapacity, UErrorCode *err)
U_CAPI int32_t U_EXPORT2
Gets the variant code for the specified locale.
uloc_isRightToLeft(const char *locale)
U_CAPIUBool U_EXPORT2
Returns whether the locale's script is written right-to-left.
uloc_minimizeSubtags(const char *localeID, char *minimizedLocaleID, int32_t minimizedLocaleIDCapacity, UErrorCode *err)
U_CAPI int32_t U_EXPORT2
Minimize the subtags for a provided locale ID, per the algorithm described in the following CLDR technical report:
uloc_openKeywords(const char *localeID, UErrorCode *status)
U_CAPIUEnumeration *U_EXPORT2
Gets an enumeration of keywords for the specified locale.
uloc_setKeywordValue(const char *keywordName, const char *keywordValue, char *buffer, int32_t bufferCapacity, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Sets or removes the value of the specified keyword.
uloc_toLanguageTag(const char *localeID, char *langtag, int32_t langtagCapacity, UBool strict, UErrorCode *err)
U_CAPI int32_t U_EXPORT2
Returns a well-formed language tag for this locale ID.
uloc_toLegacyKey(const char *keyword)
U_CAPI const char *U_EXPORT2
Converts the specified keyword (BCP 47 Unicode locale extension key, or legacy key) to the legacy key.
uloc_toLegacyType(const char *keyword, const char *value)
U_CAPI const char *U_EXPORT2
Converts the specified keyword value (BCP 47 Unicode locale extension type, or legacy type or type alias) to the canonical legacy type.
uloc_toUnicodeLocaleKey(const char *keyword)
U_CAPI const char *U_EXPORT2
Converts the specified keyword (legacy key, or BCP 47 Unicode locale extension key) to the equivalent BCP 47 Unicode locale extension key.
uloc_toUnicodeLocaleType(const char *keyword, const char *value)
U_CAPI const char *U_EXPORT2
Converts the specified keyword value (legacy type, or BCP 47 Unicode locale extension type) to the well-formed BCP 47 Unicode locale extension type for the specified keyword (category).
ulocdata_getCLDRVersion(UVersionInfo versionArray, UErrorCode *status)
U_CAPI void U_EXPORT2
Return the current CLDR version used by the library.
unorm2_append(const UNormalizer2 *norm2, UChar *first, int32_t firstLength, int32_t firstCapacity, const UChar *second, int32_t secondLength, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Appends the second string to the first string (merging them at the boundary) and returns the length of the first string.
unorm2_close(UNormalizer2 *norm2)
U_CAPI void U_EXPORT2
Closes a UNormalizer2 instance from unorm2_openFiltered().
unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b)
U_CAPIUChar32 U_EXPORT2
Performs pairwise composition of a & b and returns the composite if there is one.
unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c)
U_CAPI uint8_t U_EXPORT2
Gets the combining class of c.
unorm2_getDecomposition(const UNormalizer2 *norm2, UChar32 c, UChar *decomposition, int32_t capacity, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Gets the decomposition mapping of c.
unorm2_getNFCInstance(UErrorCode *pErrorCode)
U_CAPI const UNormalizer2 *U_EXPORT2
Returns a UNormalizer2 instance for Unicode NFC normalization.
unorm2_getNFDInstance(UErrorCode *pErrorCode)
U_CAPI const UNormalizer2 *U_EXPORT2
Returns a UNormalizer2 instance for Unicode NFD normalization.
unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode)
U_CAPI const UNormalizer2 *U_EXPORT2
Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization.
unorm2_getNFKCInstance(UErrorCode *pErrorCode)
U_CAPI const UNormalizer2 *U_EXPORT2
Returns a UNormalizer2 instance for Unicode NFKC normalization.
unorm2_getNFKDInstance(UErrorCode *pErrorCode)
U_CAPI const UNormalizer2 *U_EXPORT2
Returns a UNormalizer2 instance for Unicode NFKD normalization.
unorm2_getRawDecomposition(const UNormalizer2 *norm2, UChar32 c, UChar *decomposition, int32_t capacity, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Gets the raw decomposition mapping of c.
unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c)
U_CAPIUBool U_EXPORT2
Tests if the character always has a normalization boundary after it, regardless of context.
unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c)
U_CAPIUBool U_EXPORT2
Tests if the character always has a normalization boundary before it, regardless of context.
unorm2_isInert(const UNormalizer2 *norm2, UChar32 c)
U_CAPIUBool U_EXPORT2
Tests if the character is normalization-inert.
unorm2_isNormalized(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode)
U_CAPIUBool U_EXPORT2
Tests if the string is normalized.
unorm2_normalize(const UNormalizer2 *norm2, const UChar *src, int32_t length, UChar *dest, int32_t capacity, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Writes the normalized form of the source string to the destination string (replacing its contents) and returns the length of the destination string.
unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, UChar *first, int32_t firstLength, int32_t firstCapacity, const UChar *second, int32_t secondLength, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Appends the normalized form of the second string to the first string (merging them at the boundary) and returns the length of the first string.
unorm2_quickCheck(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode)
Tests if the string is normalized.
unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Returns the end of the normalized substring of the input string.
uscript_breaksBetweenLetters(UScriptCode script)
U_CAPIUBool U_EXPORT2
Returns true if the script allows line breaks between letters (excluding hyphenation).
uscript_getCode(const char *nameOrAbbrOrLocale, UScriptCode *fillIn, int32_t capacity, UErrorCode *err)
U_CAPI int32_t U_EXPORT2
Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.
uscript_getName(UScriptCode scriptCode)
U_CAPI const char *U_EXPORT2
Returns the long Unicode script name, if there is one.
uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode)
U_CAPI int32_t U_EXPORT2
Writes the script sample character string.
uscript_getScript(UChar32 codepoint, UErrorCode *err)
U_CAPIUScriptCode U_EXPORT2
Gets the script code associated with the given codepoint.
uscript_getScriptExtensions(UChar32 c, UScriptCode *scripts, int32_t capacity, UErrorCode *errorCode)
U_CAPI int32_t U_EXPORT2
Writes code point c's Script_Extensions as a list of UScriptCode values to the output scripts array and returns the number of script codes.
uscript_getShortName(UScriptCode scriptCode)
U_CAPI const char *U_EXPORT2
Returns the 4-letter ISO 15924 script code, which is the same as the short Unicode script name if Unicode has names for the script.
uscript_getUsage(UScriptCode script)
U_CAPIUScriptUsage U_EXPORT2
Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.
uscript_hasScript(UChar32 c, UScriptCode sc)
U_CAPIUBool U_EXPORT2
Do the Script_Extensions of code point c contain script sc? If c does not have explicit Script_Extensions, then this tests whether c has the Script property value sc.
uscript_isCased(UScriptCode script)
U_CAPIUBool U_EXPORT2
Returns true if in modern (or most recent) usage of the script case distinctions are customary.
uscript_isRightToLeft(UScriptCode script)
U_CAPIUBool U_EXPORT2
Returns true if the script is written right-to-left.
utext_char32At(UText *ut, int64_t nativeIndex)
U_CAPIUChar32 U_EXPORT2
Returns the code point at the requested index, or U_SENTINEL (-1) if it is out of bounds.
utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status)
U_CAPIUText *U_EXPORT2
Clone a UText.
utext_close(UText *ut)
U_CAPIUText *U_EXPORT2
Close function for UText instances.
utext_current32(UText *ut)
U_CAPIUChar32 U_EXPORT2
Get the code point at the current iteration position, or U_SENTINEL (-1) if the iteration has reached the end of the input text.
utext_equals(const UText *a, const UText *b)
U_CAPIUBool U_EXPORT2
Compare two UText objects for equality.
utext_extract(UText *ut, int64_t nativeStart, int64_t nativeLimit, UChar *dest, int32_t destCapacity, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Extract text from a UText into a UChar buffer.
utext_getNativeIndex(const UText *ut)
U_CAPI int64_t U_EXPORT2
Get the current iterator position, which can range from 0 to the length of the text.
utext_getPreviousNativeIndex(UText *ut)
U_CAPI int64_t U_EXPORT2
Get the native index of the character preceding the current position.
utext_moveIndex32(UText *ut, int32_t delta)
U_CAPIUBool U_EXPORT2
Move the iterator position by delta code points.
utext_nativeLength(UText *ut)
U_CAPI int64_t U_EXPORT2
Get the length of the text.
utext_next32(UText *ut)
U_CAPIUChar32 U_EXPORT2
Get the code point at the current iteration position of the UText, and advance the position to the first index following the character.
utext_next32From(UText *ut, int64_t nativeIndex)
U_CAPIUChar32 U_EXPORT2
Set the iteration index and return the code point at that index.
utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status)
U_CAPIUText *U_EXPORT2
Open a read-only UText for UChar * string.
utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status)
U_CAPIUText *U_EXPORT2
Open a read-only UText implementation for UTF-8 strings.
utext_previous32(UText *ut)
U_CAPIUChar32 U_EXPORT2
Move the iterator position to the character (code point) whose index precedes the current position, and return that character.
utext_previous32From(UText *ut, int64_t nativeIndex)
U_CAPIUChar32 U_EXPORT2
Set the iteration index, and return the code point preceding the one specified by the initial index.
utext_setNativeIndex(UText *ut, int64_t nativeIndex)
U_CAPI void U_EXPORT2
Set the current iteration position to the nearest code point boundary at or preceding the specified index.
utrans_clone(const UTransliterator *trans, UErrorCode *status)
Create a copy of a transliterator.
utrans_close(UTransliterator *trans)
U_CAPI void U_EXPORT2
Close a transliterator.
utrans_openIDs(UErrorCode *pErrorCode)
U_CAPIUEnumeration *U_EXPORT2
Return a UEnumeration for the available transliterators.
utrans_openInverse(const UTransliterator *trans, UErrorCode *status)
Open an inverse of an existing transliterator.
utrans_openU(const UChar *id, int32_t idLength, UTransDirection dir, const UChar *rules, int32_t rulesLength, UParseError *parseError, UErrorCode *pErrorCode)
Open a custom transliterator, given a custom rules string OR a system transliterator, given its ID.
utrans_setFilter(UTransliterator *trans, const UChar *filterPattern, int32_t filterPatternLen, UErrorCode *status)
U_CAPI void U_EXPORT2
Set the filter used by a transliterator.
utrans_toRules(const UTransliterator *trans, UBool escapeUnprintable, UChar *result, int32_t resultLength, UErrorCode *status)
U_CAPI int32_t U_EXPORT2
Create a rule string that can be passed to utrans_openU to recreate this transliterator.
utrans_trans(const UTransliterator *trans, UReplaceable *rep, const UReplaceableCallbacks *repFunc, int32_t start, int32_t *limit, UErrorCode *status)
U_CAPI void U_EXPORT2
Transliterate a segment of a UReplaceable string.
utrans_transIncremental(const UTransliterator *trans, UReplaceable *rep, const UReplaceableCallbacks *repFunc, UTransPosition *pos, UErrorCode *status)
U_CAPI void U_EXPORT2
Transliterate the portion of the UReplaceable text buffer that can be transliterated unambiguously.
utrans_transIncrementalUChars(const UTransliterator *trans, UChar *text, int32_t *textLength, int32_t textCapacity, UTransPosition *pos, UErrorCode *status)
U_CAPI void U_EXPORT2
Transliterate the portion of the UChar* text buffer that can be transliterated unambiguously.
utrans_transUChars(const UTransliterator *trans, UChar *text, int32_t *textLength, int32_t textCapacity, int32_t start, int32_t *limit, UErrorCode *status)
U_CAPI void U_EXPORT2
Transliterate a segment of a UChar* string.

Structs

UParseError

A UParseError struct is used to returned detailed information about parsing errors.

UReplaceableCallbacks

A set of function pointers that transliterators use to manipulate a UReplaceable.

UTransPosition

Position structure for utrans_transIncremental() incremental transliteration.

Enumerations

Anonymous Enum 124

 Anonymous Enum 124

The capacity of the context strings in UParseError.

Properties
U_PARSE_CONTEXT_LEN

UAcceptResult

 UAcceptResult

Output values which uloc_acceptLanguage() writes to the 'outResult' parameter.

See also:uloc_acceptLanguageFromHTTP See also:uloc_acceptLanguage

Properties
ULOC_ACCEPT_FAILED

No exact match was found.

ULOC_ACCEPT_FALLBACK

A fallback was found.

For example, the Accept-Language list includes 'ja_JP' and is matched with available locale 'ja'.

ULOC_ACCEPT_VALID

An exact match was found.

UBidiPairedBracketType

 UBidiPairedBracketType

Bidi Paired Bracket Type constants.

See also: UCHAR_BIDI_PAIRED_BRACKET_TYPE

Properties
U_BPT_CLOSE

Close paired bracket.

U_BPT_COUNT

One more than the highest normal UBidiPairedBracketType value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE).

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_BPT_NONE

Not a paired bracket.

U_BPT_OPEN

Open paired bracket.

UBlockCode

 UBlockCode

Constants for Unicode blocks, see the Unicode Data file Blocks.txt.

Properties
UBLOCK_ADLAM

UBLOCK_AEGEAN_NUMBERS

UBLOCK_AHOM

UBLOCK_ALCHEMICAL_SYMBOLS

UBLOCK_ALPHABETIC_PRESENTATION_FORMS

UBLOCK_ANATOLIAN_HIEROGLYPHS

UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION

UBLOCK_ANCIENT_GREEK_NUMBERS

UBLOCK_ANCIENT_SYMBOLS

UBLOCK_ARABIC

UBLOCK_ARABIC_EXTENDED_A

UBLOCK_ARABIC_EXTENDED_B

UBLOCK_ARABIC_EXTENDED_C

UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS

UBLOCK_ARABIC_PRESENTATION_FORMS_A

UBLOCK_ARABIC_PRESENTATION_FORMS_B

UBLOCK_ARABIC_SUPPLEMENT

UBLOCK_ARMENIAN

UBLOCK_ARROWS

UBLOCK_AVESTAN

UBLOCK_BALINESE

UBLOCK_BAMUM

UBLOCK_BAMUM_SUPPLEMENT

UBLOCK_BASIC_LATIN

UBLOCK_BASSA_VAH

UBLOCK_BATAK

UBLOCK_BENGALI

UBLOCK_BHAIKSUKI

UBLOCK_BLOCK_ELEMENTS

UBLOCK_BOPOMOFO

UBLOCK_BOPOMOFO_EXTENDED

UBLOCK_BOX_DRAWING

UBLOCK_BRAHMI

UBLOCK_BRAILLE_PATTERNS

UBLOCK_BUGINESE

UBLOCK_BUHID

UBLOCK_BYZANTINE_MUSICAL_SYMBOLS

UBLOCK_CARIAN

UBLOCK_CAUCASIAN_ALBANIAN

UBLOCK_CHAKMA

UBLOCK_CHAM

UBLOCK_CHEROKEE

UBLOCK_CHEROKEE_SUPPLEMENT

UBLOCK_CHESS_SYMBOLS

UBLOCK_CHORASMIAN

UBLOCK_CJK_COMPATIBILITY

UBLOCK_CJK_COMPATIBILITY_FORMS

UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS

UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT

UBLOCK_CJK_RADICALS_SUPPLEMENT

UBLOCK_CJK_STROKES

UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION

UBLOCK_CJK_UNIFIED_IDEOGRAPHS

UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A

UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B

UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C

UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D

UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E

UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F

UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G

UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H

UBLOCK_COMBINING_DIACRITICAL_MARKS

UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED

UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT

UBLOCK_COMBINING_HALF_MARKS

UBLOCK_COMBINING_MARKS_FOR_SYMBOLS

Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols".

UBLOCK_COMMON_INDIC_NUMBER_FORMS

UBLOCK_CONTROL_PICTURES

UBLOCK_COPTIC

UBLOCK_COPTIC_EPACT_NUMBERS

UBLOCK_COUNT

One more than the highest normal UBlockCode value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_BLOCK).

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

UBLOCK_COUNTING_ROD_NUMERALS

UBLOCK_CUNEIFORM

UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION

UBLOCK_CURRENCY_SYMBOLS

UBLOCK_CYPRIOT_SYLLABARY

UBLOCK_CYPRO_MINOAN

UBLOCK_CYRILLIC

UBLOCK_CYRILLIC_EXTENDED_A

UBLOCK_CYRILLIC_EXTENDED_B

UBLOCK_CYRILLIC_EXTENDED_C

UBLOCK_CYRILLIC_EXTENDED_D

UBLOCK_CYRILLIC_SUPPLEMENT

UBLOCK_CYRILLIC_SUPPLEMENTARY

Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".

UBLOCK_DESERET

UBLOCK_DEVANAGARI

UBLOCK_DEVANAGARI_EXTENDED

UBLOCK_DEVANAGARI_EXTENDED_A

UBLOCK_DINGBATS

UBLOCK_DIVES_AKURU

UBLOCK_DOGRA

UBLOCK_DOMINO_TILES

UBLOCK_DUPLOYAN

UBLOCK_EARLY_DYNASTIC_CUNEIFORM

UBLOCK_EGYPTIAN_HIEROGLYPHS

UBLOCK_EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS

UBLOCK_ELBASAN

UBLOCK_ELYMAIC

UBLOCK_EMOTICONS

UBLOCK_ENCLOSED_ALPHANUMERICS

UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT

UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS

UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT

UBLOCK_ETHIOPIC

UBLOCK_ETHIOPIC_EXTENDED

UBLOCK_ETHIOPIC_EXTENDED_A

UBLOCK_ETHIOPIC_EXTENDED_B

UBLOCK_ETHIOPIC_SUPPLEMENT

UBLOCK_GENERAL_PUNCTUATION

UBLOCK_GEOMETRIC_SHAPES

UBLOCK_GEOMETRIC_SHAPES_EXTENDED

UBLOCK_GEORGIAN

UBLOCK_GEORGIAN_EXTENDED

UBLOCK_GEORGIAN_SUPPLEMENT

UBLOCK_GLAGOLITIC

UBLOCK_GLAGOLITIC_SUPPLEMENT

UBLOCK_GOTHIC

UBLOCK_GRANTHA

UBLOCK_GREEK

Unicode 3.2 renames this block to "Greek and Coptic".

UBLOCK_GREEK_EXTENDED

UBLOCK_GUJARATI

UBLOCK_GUNJALA_GONDI

UBLOCK_GURMUKHI

UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS

UBLOCK_HANGUL_COMPATIBILITY_JAMO

UBLOCK_HANGUL_JAMO

UBLOCK_HANGUL_JAMO_EXTENDED_A

UBLOCK_HANGUL_JAMO_EXTENDED_B

UBLOCK_HANGUL_SYLLABLES

UBLOCK_HANIFI_ROHINGYA

UBLOCK_HANUNOO

UBLOCK_HATRAN

UBLOCK_HEBREW

UBLOCK_HIGH_PRIVATE_USE_SURROGATES

UBLOCK_HIGH_SURROGATES

UBLOCK_HIRAGANA

UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS

UBLOCK_IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION

UBLOCK_IMPERIAL_ARAMAIC

UBLOCK_INDIC_SIYAQ_NUMBERS

UBLOCK_INSCRIPTIONAL_PAHLAVI

UBLOCK_INSCRIPTIONAL_PARTHIAN

UBLOCK_INVALID_CODE

UBLOCK_IPA_EXTENSIONS

UBLOCK_JAVANESE

UBLOCK_KAITHI

UBLOCK_KAKTOVIK_NUMERALS

UBLOCK_KANA_EXTENDED_A

UBLOCK_KANA_EXTENDED_B

UBLOCK_KANA_SUPPLEMENT

UBLOCK_KANBUN

UBLOCK_KANGXI_RADICALS

UBLOCK_KANNADA

UBLOCK_KATAKANA

UBLOCK_KATAKANA_PHONETIC_EXTENSIONS

UBLOCK_KAWI

UBLOCK_KAYAH_LI

UBLOCK_KHAROSHTHI

UBLOCK_KHITAN_SMALL_SCRIPT

UBLOCK_KHMER

UBLOCK_KHMER_SYMBOLS

UBLOCK_KHOJKI

UBLOCK_KHUDAWADI

UBLOCK_LAO

UBLOCK_LATIN_1_SUPPLEMENT

UBLOCK_LATIN_EXTENDED_A

UBLOCK_LATIN_EXTENDED_ADDITIONAL

UBLOCK_LATIN_EXTENDED_B

UBLOCK_LATIN_EXTENDED_C

UBLOCK_LATIN_EXTENDED_D

UBLOCK_LATIN_EXTENDED_E

UBLOCK_LATIN_EXTENDED_F

UBLOCK_LATIN_EXTENDED_G

UBLOCK_LEPCHA

UBLOCK_LETTERLIKE_SYMBOLS

UBLOCK_LIMBU

UBLOCK_LINEAR_A

UBLOCK_LINEAR_B_IDEOGRAMS

UBLOCK_LINEAR_B_SYLLABARY

UBLOCK_LISU

UBLOCK_LISU_SUPPLEMENT

UBLOCK_LOW_SURROGATES

UBLOCK_LYCIAN

UBLOCK_LYDIAN

UBLOCK_MAHAJANI

UBLOCK_MAHJONG_TILES

UBLOCK_MAKASAR

UBLOCK_MALAYALAM

UBLOCK_MANDAIC

UBLOCK_MANICHAEAN

UBLOCK_MARCHEN

UBLOCK_MASARAM_GONDI

UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS

UBLOCK_MATHEMATICAL_OPERATORS

UBLOCK_MAYAN_NUMERALS

UBLOCK_MEDEFAIDRIN

UBLOCK_MEETEI_MAYEK

UBLOCK_MEETEI_MAYEK_EXTENSIONS

UBLOCK_MENDE_KIKAKUI

UBLOCK_MEROITIC_CURSIVE

UBLOCK_MEROITIC_HIEROGLYPHS

UBLOCK_MIAO

UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A

UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B

UBLOCK_MISCELLANEOUS_SYMBOLS

UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS

UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS

UBLOCK_MISCELLANEOUS_TECHNICAL

UBLOCK_MODI

UBLOCK_MODIFIER_TONE_LETTERS

UBLOCK_MONGOLIAN

UBLOCK_MONGOLIAN_SUPPLEMENT

UBLOCK_MRO

UBLOCK_MULTANI

UBLOCK_MUSICAL_SYMBOLS

UBLOCK_MYANMAR

UBLOCK_MYANMAR_EXTENDED_A

UBLOCK_MYANMAR_EXTENDED_B

UBLOCK_NABATAEAN

UBLOCK_NAG_MUNDARI

UBLOCK_NANDINAGARI

UBLOCK_NEWA

UBLOCK_NEW_TAI_LUE

UBLOCK_NKO

UBLOCK_NO_BLOCK

New No_Block value in Unicode 4.

UBLOCK_NUMBER_FORMS

UBLOCK_NUSHU

UBLOCK_NYIAKENG_PUACHUE_HMONG

UBLOCK_OGHAM

UBLOCK_OLD_HUNGARIAN

UBLOCK_OLD_ITALIC

UBLOCK_OLD_NORTH_ARABIAN

UBLOCK_OLD_PERMIC

UBLOCK_OLD_PERSIAN

UBLOCK_OLD_SOGDIAN

UBLOCK_OLD_SOUTH_ARABIAN

UBLOCK_OLD_TURKIC

UBLOCK_OLD_UYGHUR

UBLOCK_OL_CHIKI

UBLOCK_OPTICAL_CHARACTER_RECOGNITION

UBLOCK_ORIYA

UBLOCK_ORNAMENTAL_DINGBATS

UBLOCK_OSAGE

UBLOCK_OSMANYA

UBLOCK_OTTOMAN_SIYAQ_NUMBERS

UBLOCK_PAHAWH_HMONG

UBLOCK_PALMYRENE

UBLOCK_PAU_CIN_HAU

UBLOCK_PHAGS_PA

UBLOCK_PHAISTOS_DISC

UBLOCK_PHOENICIAN

UBLOCK_PHONETIC_EXTENSIONS

UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT

UBLOCK_PLAYING_CARDS

UBLOCK_PRIVATE_USE

Same as UBLOCK_PRIVATE_USE_AREA.

Until Unicode 3.1.1, the corresponding block name was "Private Use", and multiple code point ranges had this block. Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and adds separate blocks for the supplementary PUAs.

UBLOCK_PRIVATE_USE_AREA

Same as UBLOCK_PRIVATE_USE.

Until Unicode 3.1.1, the corresponding block name was "Private Use", and multiple code point ranges had this block. Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and adds separate blocks for the supplementary PUAs.

UBLOCK_PSALTER_PAHLAVI

UBLOCK_REJANG

UBLOCK_RUMI_NUMERAL_SYMBOLS

UBLOCK_RUNIC

UBLOCK_SAMARITAN

UBLOCK_SAURASHTRA

UBLOCK_SHARADA

UBLOCK_SHAVIAN

UBLOCK_SHORTHAND_FORMAT_CONTROLS

UBLOCK_SIDDHAM

UBLOCK_SINHALA

UBLOCK_SINHALA_ARCHAIC_NUMBERS

UBLOCK_SMALL_FORM_VARIANTS

UBLOCK_SMALL_KANA_EXTENSION

UBLOCK_SOGDIAN

UBLOCK_SORA_SOMPENG

UBLOCK_SOYOMBO

UBLOCK_SPACING_MODIFIER_LETTERS

UBLOCK_SPECIALS

UBLOCK_SUNDANESE

UBLOCK_SUNDANESE_SUPPLEMENT

UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS

UBLOCK_SUPPLEMENTAL_ARROWS_A

UBLOCK_SUPPLEMENTAL_ARROWS_B

UBLOCK_SUPPLEMENTAL_ARROWS_C

UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS

UBLOCK_SUPPLEMENTAL_PUNCTUATION

UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS

UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A

UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B

UBLOCK_SUTTON_SIGNWRITING

UBLOCK_SYLOTI_NAGRI

UBLOCK_SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A

UBLOCK_SYMBOLS_FOR_LEGACY_COMPUTING

UBLOCK_SYRIAC

UBLOCK_SYRIAC_SUPPLEMENT

UBLOCK_TAGALOG

UBLOCK_TAGBANWA

UBLOCK_TAGS

UBLOCK_TAI_LE

UBLOCK_TAI_THAM

UBLOCK_TAI_VIET

UBLOCK_TAI_XUAN_JING_SYMBOLS

UBLOCK_TAKRI

UBLOCK_TAMIL

UBLOCK_TAMIL_SUPPLEMENT

UBLOCK_TANGSA

UBLOCK_TANGUT

UBLOCK_TANGUT_COMPONENTS

UBLOCK_TANGUT_SUPPLEMENT

UBLOCK_TELUGU

UBLOCK_THAANA

UBLOCK_THAI

UBLOCK_TIBETAN

UBLOCK_TIFINAGH

UBLOCK_TIRHUTA

UBLOCK_TOTO

UBLOCK_TRANSPORT_AND_MAP_SYMBOLS

UBLOCK_UGARITIC

UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS

UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED

UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A

UBLOCK_VAI

UBLOCK_VARIATION_SELECTORS

UBLOCK_VARIATION_SELECTORS_SUPPLEMENT

UBLOCK_VEDIC_EXTENSIONS

UBLOCK_VERTICAL_FORMS

UBLOCK_VITHKUQI

UBLOCK_WANCHO

UBLOCK_WARANG_CITI

UBLOCK_YEZIDI

UBLOCK_YIJING_HEXAGRAM_SYMBOLS

UBLOCK_YI_RADICALS

UBLOCK_YI_SYLLABLES

UBLOCK_ZANABAZAR_SQUARE

UBLOCK_ZNAMENNY_MUSICAL_NOTATION

UBreakIteratorType

 UBreakIteratorType

The possible types of text boundaries.

Properties
UBRK_CHARACTER

Character breaks.

UBRK_COUNT

One more than the highest normal UBreakIteratorType value.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

UBRK_LINE

Line breaks.

UBRK_SENTENCE

Sentence breaks.

UBRK_TITLE

Title Case breaks The iterator created using this type locates title boundaries as described for Unicode 3.2 only.

For Unicode 4.0 and above title boundary iteration, please use Word Boundary iterator.

Deprecated. ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later.

UBRK_WORD

Word breaks.

UCPMapRangeOption

 UCPMapRangeOption

Selectors for how ucpmap_getRange() etc.

should report value ranges overlapping with surrogates. Most users should use UCPMAP_RANGE_NORMAL.

See also:ucpmap_getRange See also:ucptrie_getRange See also:umutablecptrie_getRange

Properties
UCPMAP_RANGE_FIXED_ALL_SURROGATES

ucpmap_getRange() enumerates all same-value ranges as stored in the map, except that all surrogates (U+D800..U+DFFF) are treated as having the surrogateValue, which is passed to getRange() as a separate parameter.

The surrogateValue is not transformed via filter(). See U_IS_SURROGATE(c).

Most users should use UCPMAP_RANGE_NORMAL instead.

This option is useful for maps that map surrogate code units to special values optimized for UTF-16 string processing or for special error behavior for unpaired surrogates, but those values are not to be associated with the lead surrogate code points.

UCPMAP_RANGE_FIXED_LEAD_SURROGATES

ucpmap_getRange() enumerates all same-value ranges as stored in the map, except that lead surrogates (U+D800..U+DBFF) are treated as having the surrogateValue, which is passed to getRange() as a separate parameter.

The surrogateValue is not transformed via filter(). See U_IS_LEAD(c).

Most users should use UCPMAP_RANGE_NORMAL instead.

This option is useful for maps that map surrogate code units to special values optimized for UTF-16 string processing or for special error behavior for unpaired surrogates, but those values are not to be associated with the lead surrogate code points.

UCPMAP_RANGE_NORMAL

ucpmap_getRange() enumerates all same-value ranges as stored in the map.

Most users should use this option.

UCharCategory

 UCharCategory

Data for enumerated Unicode general category types.

See http://www.unicode.org/Public/UNIDATA/UnicodeData.html .

Properties
U_CHAR_CATEGORY_COUNT

One higher than the last enum UCharCategory constant.

This numeric value is stable (will not change), see http://www.unicode.org/policies/stability_policy.html#Property_Value

U_COMBINING_SPACING_MARK

Mc.

U_CONNECTOR_PUNCTUATION

Pc.

U_CONTROL_CHAR

Cc.

U_CURRENCY_SYMBOL

Sc.

U_DASH_PUNCTUATION

Pd.

U_DECIMAL_DIGIT_NUMBER

Nd.

U_ENCLOSING_MARK

Me.

U_END_PUNCTUATION

Pe.

U_FINAL_PUNCTUATION

Pf.

U_FORMAT_CHAR

Cf.

U_GENERAL_OTHER_TYPES

Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!)

U_INITIAL_PUNCTUATION

Pi.

U_LETTER_NUMBER

Nl.

U_LINE_SEPARATOR

Zl.

U_LOWERCASE_LETTER

Ll.

U_MATH_SYMBOL

Sm.

U_MODIFIER_LETTER

Lm.

U_MODIFIER_SYMBOL

Sk.

U_NON_SPACING_MARK

Mn.

U_OTHER_LETTER

Lo.

U_OTHER_NUMBER

No.

U_OTHER_PUNCTUATION

Po.

U_OTHER_SYMBOL

So.

U_PARAGRAPH_SEPARATOR

Zp.

U_PRIVATE_USE_CHAR

Co.

U_SPACE_SEPARATOR

Zs.

U_START_PUNCTUATION

Ps.

U_SURROGATE

Cs.

U_TITLECASE_LETTER

Lt.

U_UNASSIGNED

Non-category for unassigned and non-character code points.

U_UPPERCASE_LETTER

Lu.

UCharDirection

 UCharDirection

This specifies the language directional property of a character set.

Properties
U_ARABIC_NUMBER

AN.

U_BLOCK_SEPARATOR

B.

U_BOUNDARY_NEUTRAL

BN.

U_CHAR_DIRECTION_COUNT

One more than the highest UCharDirection value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS).

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_COMMON_NUMBER_SEPARATOR

CS.

U_DIR_NON_SPACING_MARK

NSM.

U_EUROPEAN_NUMBER

EN.

U_EUROPEAN_NUMBER_SEPARATOR

ES.

U_EUROPEAN_NUMBER_TERMINATOR

ET.

U_FIRST_STRONG_ISOLATE

FSI.

U_LEFT_TO_RIGHT

L.

U_LEFT_TO_RIGHT_EMBEDDING

LRE.

U_LEFT_TO_RIGHT_ISOLATE

LRI.

U_LEFT_TO_RIGHT_OVERRIDE

LRO.

U_OTHER_NEUTRAL

ON.

U_POP_DIRECTIONAL_FORMAT

PDF.

U_POP_DIRECTIONAL_ISOLATE

PDI.

U_RIGHT_TO_LEFT

R.

U_RIGHT_TO_LEFT_ARABIC

AL.

U_RIGHT_TO_LEFT_EMBEDDING

RLE.

U_RIGHT_TO_LEFT_ISOLATE

RLI.

U_RIGHT_TO_LEFT_OVERRIDE

RLO.

U_SEGMENT_SEPARATOR

S.

U_WHITE_SPACE_NEUTRAL

WS.

UCharNameChoice

 UCharNameChoice

Selector constants for u_charName().

u_charName() returns the "modern" name of a Unicode character; or the name that was defined in Unicode version 1.0, before the Unicode standard merged with ISO-10646; or an "extended" name that gives each Unicode code point a unique name.

See also: u_charName

Properties
U_CHAR_NAME_ALIAS

Corrected name from NameAliases.txt.

U_CHAR_NAME_CHOICE_COUNT

One more than the highest normal UCharNameChoice value.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_EXTENDED_CHAR_NAME

Standard or synthetic character name.

U_UNICODE_10_CHAR_NAME

The Unicode_1_Name property value which is of little practical value.

Beginning with ICU 49, ICU APIs return an empty string for this name choice. Deprecated. ICU 49

U_UNICODE_CHAR_NAME

Unicode character name (Name property).

UColAttribute

 UColAttribute

Attributes that collation service understands.

All the attributes can take UCOL_DEFAULT value, as well as the values specific to each one.

Properties
UCOL_ALTERNATE_HANDLING

Attribute for handling variable elements.

Acceptable values are UCOL_NON_IGNORABLE (default) which treats all the codepoints with non-ignorable primary weights in the same way, and UCOL_SHIFTED which causes codepoints with primary weights that are equal or below the variable top value to be ignored on primary level and moved to the quaternary level.

UCOL_ATTRIBUTE_COUNT

One more than the highest normal UColAttribute value.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

UCOL_CASE_FIRST

Controls the ordering of upper and lower case letters.

Acceptable values are UCOL_OFF (default), which orders upper and lower case letters in accordance to their tertiary weights, UCOL_UPPER_FIRST which forces upper case letters to sort before lower case letters, and UCOL_LOWER_FIRST which does the opposite.

UCOL_CASE_LEVEL

Controls whether an extra case level (positioned before the third level) is generated or not.

Acceptable values are UCOL_OFF (default), when case level is not generated, and UCOL_ON which causes the case level to be generated. Contents of the case level are affected by the value of UCOL_CASE_FIRST attribute. A simple way to ignore accent differences in a string is to set the strength to UCOL_PRIMARY and enable case level.

UCOL_DECOMPOSITION_MODE

An alias for UCOL_NORMALIZATION_MODE attribute.

UCOL_FRENCH_COLLATION

Attribute for direction of secondary weights - used in Canadian French.

Acceptable values are UCOL_ON, which results in secondary weights being considered backwards and UCOL_OFF which treats secondary weights in the order they appear.

UCOL_HIRAGANA_QUATERNARY_MODE

When turned on, this attribute positions Hiragana before all non-ignorables on quaternary level This is a sneaky way to produce JIS sort order.

This attribute was an implementation detail of the CLDR Japanese tailoring. Since ICU 50, this attribute is not settable any more via API functions. Since CLDR 25/ICU 53, explicit quaternary relations are used to achieve the same Japanese sort order.

Deprecated. ICU 50 Implementation detail, cannot be set via API, was removed from implementation.

UCOL_NORMALIZATION_MODE

Controls whether the normalization check and necessary normalizations are performed.

When set to UCOL_OFF (default) no normalization check is performed. The correctness of the result is guaranteed only if the input data is in so-called FCD form (see users manual for more info). When set to UCOL_ON, an incremental check is performed to see whether the input data is in the FCD form. If the data is not in the FCD form, incremental NFD normalization is performed.

UCOL_NUMERIC_COLLATION

When turned on, this attribute makes substrings of digits sort according to their numeric values.

This is a way to get '100' to sort AFTER '2'. Note that the longest digit substring that can be treated as a single unit is 254 digits (not counting leading zeros). If a digit substring is longer than that, the digits beyond the limit will be treated as a separate digit substring.

A "digit" in this sense is a code point with General_Category=Nd, which does not include circled numbers, roman numerals, etc. Only a contiguous digit substring is considered, that is, non-negative integers without separators. There is no support for plus/minus signs, decimals, exponents, etc.

UCOL_STRENGTH

The strength attribute.

Can be either UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength for most locales (except Japanese) is tertiary.

Quaternary strength is useful when combined with shifted setting for alternate handling attribute and for JIS X 4061 collation, when it is used to distinguish between Katakana and Hiragana. Otherwise, quaternary level is affected only by the number of non-ignorable code points in the string.

Identical strength is rarely useful, as it amounts to codepoints of the NFD form of the string.

UColAttributeValue

 UColAttributeValue

Enum containing attribute values for controlling collation behavior.

Here are all the allowable values. Not every attribute can take every value. The only universal value is UCOL_DEFAULT, which resets the attribute value to the predefined value for that locale

Properties
UCOL_ATTRIBUTE_VALUE_COUNT

One more than the highest normal UColAttributeValue value.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

UCOL_CE_STRENGTH_LIMIT
UCOL_DEFAULT

accepted by most attributes

UCOL_DEFAULT_STRENGTH

Default collation strength.

UCOL_IDENTICAL

Identical collation strength.

UCOL_LOWER_FIRST

Valid for UCOL_CASE_FIRST - lower case sorts before upper case.

UCOL_NON_IGNORABLE

Valid for UCOL_ALTERNATE_HANDLING.

Alternate handling will be non ignorable

UCOL_OFF

Turn the feature off - works for UCOL_FRENCH_COLLATION, UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE & UCOL_DECOMPOSITION_MODE.

UCOL_ON

Turn the feature on - works for UCOL_FRENCH_COLLATION, UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE & UCOL_DECOMPOSITION_MODE.

UCOL_PRIMARY

Primary collation strength.

UCOL_QUATERNARY

Quaternary collation strength.

UCOL_SECONDARY

Secondary collation strength.

UCOL_SHIFTED

Valid for UCOL_ALTERNATE_HANDLING.

Alternate handling will be shifted

UCOL_STRENGTH_LIMIT
UCOL_TERTIARY

Tertiary collation strength.

UCOL_UPPER_FIRST

upper case sorts before lower case

UColBoundMode

 UColBoundMode

enum that is taken by ucol_getBound API See below for explanation do not change the values assigned to the members of this enum.

Underlying code depends on them having these numbers

Properties
UCOL_BOUND_LOWER

lower bound

UCOL_BOUND_UPPER

upper bound that will match strings of exact size

UCOL_BOUND_UPPER_LONG

upper bound that will match all the strings that have the same initial substring as the given string

UCOL_BOUND_VALUE_COUNT

One more than the highest normal UColBoundMode value.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

UColReorderCode

 UColReorderCode

Enum containing the codes for reordering segments of the collation table that are not script codes.

These reordering codes are to be used in conjunction with the script codes. See also:ucol_getReorderCodesSee also:ucol_setReorderCodesSee also:ucol_getEquivalentReorderCodesSee also:UScriptCode

Properties
UCOL_REORDER_CODE_CURRENCY

Characters with the currency property.

This is equivalent to the rule value "currency".

UCOL_REORDER_CODE_DEFAULT

A special reordering code that is used to specify the default reordering codes for a locale.

UCOL_REORDER_CODE_DIGIT

Characters with the digit property.

This is equivalent to the rule value "digit".

UCOL_REORDER_CODE_FIRST

The first entry in the enumeration of reordering groups.

This is intended for use in range checking and enumeration of the reorder codes.

UCOL_REORDER_CODE_LIMIT

One more than the highest normal UColReorderCode value.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

UCOL_REORDER_CODE_NONE

A special reordering code that is used to specify no reordering codes.

UCOL_REORDER_CODE_OTHERS

A special reordering code that is used to specify all other codes used for reordering except for the codes lised as UColReorderCode values and those listed explicitly in a reordering.

UCOL_REORDER_CODE_PUNCTUATION

Characters with the punctuation property.

This is equivalent to the rule value "punct".

UCOL_REORDER_CODE_SPACE

Characters with the space property.

This is equivalent to the rule value "space".

UCOL_REORDER_CODE_SYMBOL

Characters with the symbol property.

This is equivalent to the rule value "symbol".

UCollationResult

 UCollationResult

UCOL_LESS is returned if source string is compared to be less than target string in the ucol_strcoll() method.

UCOL_EQUAL is returned if source string is compared to be equal to target string in the ucol_strcoll() method. UCOL_GREATER is returned if source string is compared to be greater than target string in the ucol_strcoll() method. See also:ucol_strcoll() Possible values for a comparison result

Properties
UCOL_EQUAL

string a == string b

UCOL_GREATER

string a > string b

UCOL_LESS

string a < string b

UDecompositionType

 UDecompositionType

Decomposition Type constants.

See also: UCHAR_DECOMPOSITION_TYPE

Properties
U_DT_CANONICAL
U_DT_CIRCLE
U_DT_COMPAT
U_DT_COUNT

One more than the highest normal UDecompositionType value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE).

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_DT_FINAL
U_DT_FONT
U_DT_FRACTION
U_DT_INITIAL
U_DT_ISOLATED
U_DT_MEDIAL
U_DT_NARROW
U_DT_NOBREAK
U_DT_NONE
U_DT_SMALL
U_DT_SQUARE
U_DT_SUB
U_DT_SUPER
U_DT_VERTICAL
U_DT_WIDE

UDialectHandling

 UDialectHandling

Enum used in LocaleDisplayNames::createInstance.

Properties
ULDN_DIALECT_NAMES

Use dialect names, when generating a locale name, e.g.

en_GB displays as 'British English'.

ULDN_STANDARD_NAMES

Use standard names when generating a locale name, e.g.

en_GB displays as 'English (United Kingdom)'.

UDisplayContext

 UDisplayContext

Display context settings.

Note, the specific numeric values are internal and may change.

Properties
UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE

The capitalization context if a date, date symbol or display name is to be formatted with capitalization appropriate for the beginning of a sentence.

UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE

The capitalization context if a date, date symbol or display name is to be formatted with capitalization appropriate for the middle of a sentence.

UDISPCTX_CAPITALIZATION_FOR_STANDALONE

The capitalization context if a date, date symbol or display name is to be formatted with capitalization appropriate for stand-alone usage such as an isolated name on a calendar page.

UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU

The capitalization context if a date, date symbol or display name is to be formatted with capitalization appropriate for a user-interface list or menu item.

UDISPCTX_CAPITALIZATION_NONE


CAPITALIZATION can be set to one of UDISPCTX_CAPITALIZATION_NONE, UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE, UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU, or UDISPCTX_CAPITALIZATION_FOR_STANDALONE.

Use UDisplayContextType UDISPCTX_TYPE_CAPITALIZATION to get the value. The capitalization context to be used is unknown (this is the default value).

UDISPCTX_DIALECT_NAMES

A possible setting for DIALECT_HANDLING: use dialect names, when generating a locale name, e.g.

en_GB displays as 'British English'.

UDISPCTX_LENGTH_FULL


DISPLAY_LENGTH can be set to one of UDISPCTX_LENGTH_FULL or UDISPCTX_LENGTH_SHORT.

Use UDisplayContextType UDISPCTX_TYPE_DISPLAY_LENGTH to get the value. A possible setting for DISPLAY_LENGTH: use full names when generating a locale name, e.g. "United States" for US.

UDISPCTX_LENGTH_SHORT

A possible setting for DISPLAY_LENGTH: use short names when generating a locale name, e.g.

"U.S." for US.

UDISPCTX_NO_SUBSTITUTE

A possible setting for SUBSTITUTE_HANDLING: Returns a null value with error code set to U_ILLEGAL_ARGUMENT_ERROR when no data is available.

UDISPCTX_STANDARD_NAMES


DIALECT_HANDLING can be set to one of UDISPCTX_STANDARD_NAMES or UDISPCTX_DIALECT_NAMES.

Use UDisplayContextType UDISPCTX_TYPE_DIALECT_HANDLING to get the value. A possible setting for DIALECT_HANDLING: use standard names when generating a locale name, e.g. en_GB displays as 'English (United Kingdom)'.

UDISPCTX_SUBSTITUTE


SUBSTITUTE_HANDLING can be set to one of UDISPCTX_SUBSTITUTE or UDISPCTX_NO_SUBSTITUTE.

Use UDisplayContextType UDISPCTX_TYPE_SUBSTITUTE_HANDLING to get the value. A possible setting for SUBSTITUTE_HANDLING: Returns a fallback value (e.g., the input code) when no data is available. This is the default value.

UDisplayContextType

 UDisplayContextType

Display context types, for getting values of a particular setting.

Note, the specific numeric values are internal and may change.

Properties
UDISPCTX_TYPE_CAPITALIZATION

Type to retrieve the capitalization context setting, e.g.

UDISPCTX_CAPITALIZATION_NONE, UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE, UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, etc.

UDISPCTX_TYPE_DIALECT_HANDLING

Type to retrieve the dialect handling setting, e.g.

UDISPCTX_STANDARD_NAMES or UDISPCTX_DIALECT_NAMES.

UDISPCTX_TYPE_DISPLAY_LENGTH

Type to retrieve the display length setting, e.g.

UDISPCTX_LENGTH_FULL, UDISPCTX_LENGTH_SHORT.

UDISPCTX_TYPE_SUBSTITUTE_HANDLING

Type to retrieve the substitute handling setting, e.g.

UDISPCTX_SUBSTITUTE, UDISPCTX_NO_SUBSTITUTE.

UEastAsianWidth

 UEastAsianWidth

East Asian Width constants.

See also: UCHAR_EAST_ASIAN_WIDTH See also: u_getIntPropertyValue

Properties
U_EA_AMBIGUOUS
U_EA_COUNT

One more than the highest normal UEastAsianWidth value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH).

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_EA_FULLWIDTH
U_EA_HALFWIDTH
U_EA_NARROW
U_EA_NEUTRAL
U_EA_WIDE

UErrorCode

 UErrorCode

Standard ICU4C error code type, a substitute for exceptions.

Initialize the UErrorCode with U_ZERO_ERROR, and check for success or failure using U_SUCCESS() or U_FAILURE():

UErrorCode errorCode = U_ZERO_ERROR;
// call ICU API that needs an error code parameter.
if (U_FAILURE(errorCode)) {
    // An error occurred. Handle it here.
}

C++ code should use icu::ErrorCode, available in unicode/errorcode.h, or a suitable subclass.

For more information, see: https://unicode-org.github.io/icu/userguide/dev/codingguidelines#details-about-icu-error-codes

Note: By convention, ICU functions that take a reference (C++) or a pointer (C) to a UErrorCode first test:

if (U_FAILURE(errorCode)) { return immediately; }

so that in a chain of such functions the first one that sets an error code causes the following ones to not perform any operations.

Properties
U_AMBIGUOUS_ALIAS_WARNING

This converter alias can go to different converter implementations.

U_ARGUMENT_TYPE_MISMATCH

Argument name and argument index mismatch in MessageFormat functions.

U_BAD_VARIABLE_DEFINITION

Missing '$' or duplicate variable name.

U_BRK_ASSIGN_ERROR

Syntax error in RBBI rule assignment statement.

U_BRK_ERROR_LIMIT

One more than the highest normal BreakIterator error code.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_BRK_ERROR_START

Start of codes indicating Break Iterator failures.

U_BRK_HEX_DIGITS_EXPECTED

Hex digits expected as part of a escaped char in a rule.

U_BRK_INIT_ERROR

Initialization failure.

Probable missing ICU Data.

U_BRK_INTERNAL_ERROR

An internal error (bug) was detected.

U_BRK_MALFORMED_RULE_TAG

The {nnn} tag on a rule is malformed.

U_BRK_MISMATCHED_PAREN

Mis-matched parentheses in an RBBI rule.

U_BRK_NEW_LINE_IN_QUOTED_STRING

Missing closing quote in an RBBI rule.

U_BRK_RULE_EMPTY_SET

Rule contains an empty Unicode Set.

U_BRK_RULE_SYNTAX

Syntax error in RBBI rule.

U_BRK_SEMICOLON_EXPECTED

Missing ';' at the end of a RBBI rule.

U_BRK_UNCLOSED_SET

UnicodeSet writing an RBBI rule missing a closing ']'.

U_BRK_UNDEFINED_VARIABLE

Use of an undefined $Variable in an RBBI rule.

U_BRK_UNRECOGNIZED_OPTION

!!option in RBBI rules not recognized.

U_BRK_VARIABLE_REDFINITION

RBBI rule $Variable redefined.

U_BUFFER_OVERFLOW_ERROR

A result would not fit in the supplied buffer.

U_CE_NOT_FOUND_ERROR

Currently used only while setting variable top, but can be used generally.

U_COLLATOR_VERSION_MISMATCH

Collator version is not compatible with the base version.

U_DECIMAL_NUMBER_SYNTAX_ERROR

Decimal number syntax error.

U_DEFAULT_KEYWORD_MISSING

Missing DEFAULT rule in plural rules.

U_DIFFERENT_UCA_VERSION

ucol_open encountered a mismatch between UCA version and collator image version, so the collator was constructed from rules.

No impact to further function

U_DUPLICATE_KEYWORD

Duplicate keyword in PluralFormat.

U_ENUM_OUT_OF_SYNC_ERROR

UEnumeration out of sync with underlying collection.

U_ERROR_LIMIT

One more than the highest normal error code.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_ERROR_WARNING_LIMIT

One more than the highest normal UErrorCode warning value.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_ERROR_WARNING_START

Start of information results (semantically successful)

U_FILE_ACCESS_ERROR

The requested file cannot be found.

U_FMT_PARSE_ERROR_LIMIT

One more than the highest normal formatting API error code.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_FMT_PARSE_ERROR_START

Start of format library errors.

U_FORMAT_INEXACT_ERROR

Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY.

U_IDNA_ACE_PREFIX_ERROR
U_IDNA_CHECK_BIDI_ERROR
U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR
U_IDNA_ERROR_LIMIT

One more than the highest normal IDNA error code.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_IDNA_ERROR_START
U_IDNA_LABEL_TOO_LONG_ERROR
U_IDNA_PROHIBITED_ERROR
U_IDNA_STD3_ASCII_RULES_ERROR
U_IDNA_UNASSIGNED_ERROR
U_IDNA_VERIFICATION_ERROR
U_IDNA_ZERO_LENGTH_LABEL_ERROR
U_ILLEGAL_ARGUMENT_ERROR

Start of codes indicating failure.

U_ILLEGAL_CHARACTER

A special character is outside its allowed context.

U_ILLEGAL_CHAR_FOUND

Character conversion: Illegal input sequence/combination of input units.

U_ILLEGAL_CHAR_IN_SEGMENT

UNUSED as of ICU 2.4.

U_ILLEGAL_ESCAPE_SEQUENCE

ISO-2022 illegal escape sequence.

U_ILLEGAL_PAD_POSITION

Pad symbol misplaced in number pattern.

U_INDEX_OUTOFBOUNDS_ERROR

Trying to access the index that is out of bounds.

U_INPUT_TOO_LONG_ERROR

The input is impractically long for an operation.

It is rejected because it may lead to problems such as excessive processing time, stack depth, or heap memory requirements.

U_INTERNAL_PROGRAM_ERROR

Indicates a bug in the library code.

U_INTERNAL_TRANSLITERATOR_ERROR

Internal transliterator system error.

U_INVALID_CHAR_FOUND

Character conversion: Unmappable input sequence.

In other APIs: Invalid character.

U_INVALID_FORMAT_ERROR

Data format is not what is expected.

U_INVALID_FUNCTION

A "&fn()" rule specifies an unknown transliterator.

U_INVALID_ID

A "::id" rule specifies an unknown transliterator.

U_INVALID_PROPERTY_PATTERN

UNUSED as of ICU 2.4.

U_INVALID_RBT_SYNTAX

A "::id" rule was passed to the RuleBasedTransliterator parser.

U_INVALID_STATE_ERROR

Requested operation can not be completed with ICU in its current state.

U_INVALID_TABLE_FILE

Conversion table file not found.

U_INVALID_TABLE_FORMAT

Conversion table file found, but corrupted.

U_INVARIANT_CONVERSION_ERROR

Unable to convert a UChar* string to char* with the invariant converter.

U_MALFORMED_EXPONENTIAL_PATTERN

Grouping symbol in exponent pattern.

U_MALFORMED_PRAGMA

A 'use' pragma is invalid.

U_MALFORMED_RULE

Elements of a rule are misplaced.

U_MALFORMED_SET

A UnicodeSet pattern is invalid.

U_MALFORMED_SYMBOL_REFERENCE

UNUSED as of ICU 2.4.

U_MALFORMED_UNICODE_ESCAPE

A Unicode escape pattern is invalid.

U_MALFORMED_VARIABLE_DEFINITION

A variable definition is invalid.

U_MALFORMED_VARIABLE_REFERENCE

A variable reference is invalid.

U_MEMORY_ALLOCATION_ERROR

Memory allocation error.

U_MESSAGE_PARSE_ERROR

Unable to parse a message (message format)

U_MISMATCHED_SEGMENT_DELIMITERS

UNUSED as of ICU 2.4.

U_MISPLACED_ANCHOR_START

A start anchor appears at an illegal position.

U_MISPLACED_COMPOUND_FILTER

A compound filter is in an invalid location.

U_MISPLACED_CURSOR_OFFSET

A cursor offset occurs at an illegal position.

U_MISPLACED_QUANTIFIER

A quantifier appears after a segment close delimiter.

U_MISSING_OPERATOR

A rule contains no operator.

U_MISSING_RESOURCE_ERROR

The requested resource cannot be found.

U_MISSING_SEGMENT_CLOSE

UNUSED as of ICU 2.4.

U_MULTIPLE_ANTE_CONTEXTS

More than one ante context.

U_MULTIPLE_COMPOUND_FILTERS

More than one compound filter.

U_MULTIPLE_CURSORS

More than one cursor.

U_MULTIPLE_DECIMAL_SEPARATORS

More than one decimal separator in number pattern.

U_MULTIPLE_DECIMAL_SEPERATORS

Typo: kept for backward compatibility.

Use U_MULTIPLE_DECIMAL_SEPARATORS

U_MULTIPLE_EXPONENTIAL_SYMBOLS

More than one exponent symbol in number pattern.

U_MULTIPLE_PAD_SPECIFIERS

More than one pad symbol in number pattern.

U_MULTIPLE_PERCENT_SYMBOLS

More than one percent symbol in number pattern.

U_MULTIPLE_PERMILL_SYMBOLS

More than one permill symbol in number pattern.

U_MULTIPLE_POST_CONTEXTS

More than one post context.

U_NO_SPACE_AVAILABLE

No space available for in-buffer expansion for Arabic shaping.

U_NO_WRITE_PERMISSION

Attempt to modify read-only or constant data.

U_NUMBER_ARG_OUTOFBOUNDS_ERROR

The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999.

U_NUMBER_SKELETON_SYNTAX_ERROR

The number skeleton passed to C++ NumberFormatter or C UNumberFormatter was invalid or contained a syntax error.

U_PARSE_ERROR

Equivalent to Java ParseException.

U_PARSE_ERROR_LIMIT

One more than the highest normal Transliterator error code.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_PARSE_ERROR_START

Start of Transliterator errors.

U_PATTERN_SYNTAX_ERROR

Syntax error in format pattern.

U_PLUGIN_CHANGED_LEVEL_WARNING

A plugin caused a level change.

May not be an error, but later plugins may not load.

U_PLUGIN_DIDNT_SET_LEVEL

The plugin didn't call uplug_setPlugLevel in response to a QUERY.

U_PLUGIN_ERROR_LIMIT

One more than the highest normal plug-in error code.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_PLUGIN_ERROR_START

Start of codes indicating plugin failures.

U_PLUGIN_TOO_HIGH

The plugin's level is too high to be loaded right now.

U_PRIMARY_TOO_LONG_ERROR

User tried to set variable top to a primary that is longer than two bytes.

U_REGEX_BAD_ESCAPE_SEQUENCE

Unrecognized backslash escape sequence in pattern.

U_REGEX_BAD_INTERVAL

Error in {min,max} interval.

U_REGEX_ERROR_LIMIT

One more than the highest normal regular expression error code.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_REGEX_ERROR_START

Start of codes indicating Regexp failures.

U_REGEX_INTERNAL_ERROR

An internal error (bug) was detected.

U_REGEX_INVALID_BACK_REF

Back-reference to a non-existent capture group.

U_REGEX_INVALID_CAPTURE_GROUP_NAME

Invalid capture group name.

U_REGEX_INVALID_FLAG

Invalid value for match mode flags.

U_REGEX_INVALID_RANGE

In a character range [x-y], x is greater than y.

U_REGEX_INVALID_STATE

RegexMatcher in invalid state for requested operation.

U_REGEX_LOOK_BEHIND_LIMIT

Look-Behind pattern matches must have a bounded maximum length.

U_REGEX_MAX_LT_MIN

In {min,max}, max is less than min.

U_REGEX_MISMATCHED_PAREN

Incorrectly nested parentheses in regexp pattern.

U_REGEX_MISSING_CLOSE_BRACKET

Missing closing bracket on a bracket expression.

U_REGEX_NUMBER_TOO_BIG

Decimal number is too large.

U_REGEX_OCTAL_TOO_BIG

Octal character constants must be <= 0377.

Deprecated. ICU 54. This error cannot occur.

U_REGEX_PATTERN_TOO_BIG

Pattern exceeds limits on size or complexity.

U_REGEX_PROPERTY_SYNTAX

Incorrect Unicode property.

U_REGEX_RULE_SYNTAX

Syntax error in regexp pattern.

U_REGEX_SET_CONTAINS_STRING

Regexps cannot have UnicodeSets containing strings.

U_REGEX_STACK_OVERFLOW

Regular expression backtrack stack overflow.

U_REGEX_STOPPED_BY_CALLER

Matching operation aborted by user callback fn.

U_REGEX_TIME_OUT

Maximum allowed match time exceeded.

U_REGEX_UNIMPLEMENTED

Use of regexp feature that is not yet implemented.

U_RESOURCE_TYPE_MISMATCH

an operation is requested over a resource that does not support it

U_RULE_MASK_ERROR

A rule is hidden by an earlier more general rule.

U_SAFECLONE_ALLOCATED_WARNING

A SafeClone operation required allocating memory (informational only)

U_SORT_KEY_TOO_SHORT_WARNING

Number of levels requested in getBound is higher than the number of levels in the sort key.

U_STANDARD_ERROR_LIMIT

One more than the highest standard error code.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_STATE_OLD_WARNING

ICU has to use compatibility layer to construct the service.

Expect performance/memory usage degradation. Consider upgrading

U_STATE_TOO_OLD_ERROR

ICU cannot construct a service from this state, as it is no longer supported.

U_STRINGPREP_CHECK_BIDI_ERROR
U_STRINGPREP_PROHIBITED_ERROR
U_STRINGPREP_UNASSIGNED_ERROR
U_STRING_NOT_TERMINATED_WARNING

An output string could not be NUL-terminated because output length==destCapacity.

U_TOO_MANY_ALIASES_ERROR

There are too many aliases in the path to the requested resource.

It is very possible that a circular alias definition has occurred

U_TRAILING_BACKSLASH

A dangling backslash.

U_TRUNCATED_CHAR_FOUND

Character conversion: Incomplete input sequence.

U_UNCLOSED_SEGMENT

A closing ')' is missing.

U_UNDEFINED_KEYWORD

Undefined Plural keyword.

U_UNDEFINED_SEGMENT_REFERENCE

A segment reference does not correspond to a defined segment.

U_UNDEFINED_VARIABLE

A variable reference does not correspond to a defined variable.

U_UNEXPECTED_TOKEN

Syntax error in format pattern.

U_UNMATCHED_BRACES

Braces do not match in message pattern.

U_UNQUOTED_SPECIAL

A special character was not quoted or escaped.

U_UNSUPPORTED_ATTRIBUTE

UNUSED as of ICU 2.4.

U_UNSUPPORTED_ERROR

Requested operation not supported in current context.

U_UNSUPPORTED_ESCAPE_SEQUENCE

ISO-2022 unsupported escape sequence.

U_UNSUPPORTED_PROPERTY

UNUSED as of ICU 2.4.

U_UNTERMINATED_QUOTE

A closing single quote is missing.

U_USELESS_COLLATOR_ERROR

Collator is options only and no base is specified.

U_USING_DEFAULT_WARNING

A resource bundle lookup returned a result from the root locale (not an error)

U_USING_FALLBACK_WARNING

A resource bundle lookup returned a fallback result (not an error)

U_VARIABLE_RANGE_EXHAUSTED

Too many stand-ins generated for the given variable range.

U_VARIABLE_RANGE_OVERLAP

The variable range overlaps characters used in rules.

U_ZERO_ERROR

No error, no warning.

UGraphemeClusterBreak

 UGraphemeClusterBreak

Grapheme Cluster Break constants.

See also: UCHAR_GRAPHEME_CLUSTER_BREAK

Properties
U_GCB_CONTROL
U_GCB_COUNT

One more than the highest normal UGraphemeClusterBreak value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK).

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_GCB_CR
U_GCB_EXTEND
U_GCB_E_BASE

U_GCB_E_BASE_GAZ

U_GCB_E_MODIFIER

U_GCB_GLUE_AFTER_ZWJ

U_GCB_L
U_GCB_LF
U_GCB_LV
U_GCB_LVT
U_GCB_OTHER
U_GCB_PREPEND

U_GCB_REGIONAL_INDICATOR

U_GCB_SPACING_MARK

U_GCB_T
U_GCB_V
U_GCB_ZWJ

UHangulSyllableType

 UHangulSyllableType

Hangul Syllable Type constants.

See also: UCHAR_HANGUL_SYLLABLE_TYPE

Properties
U_HST_COUNT

One more than the highest normal UHangulSyllableType value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE).

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_HST_LEADING_JAMO
U_HST_LVT_SYLLABLE
U_HST_LV_SYLLABLE
U_HST_NOT_APPLICABLE
U_HST_TRAILING_JAMO
U_HST_VOWEL_JAMO

UJoiningGroup

 UJoiningGroup

Joining Group constants.

See also: UCHAR_JOINING_GROUP

Properties
U_JG_AFRICAN_FEH

U_JG_AFRICAN_NOON

U_JG_AFRICAN_QAF

U_JG_AIN
U_JG_ALAPH
U_JG_ALEF
U_JG_BEH
U_JG_BETH
U_JG_BURUSHASKI_YEH_BARREE

U_JG_COUNT

One more than the highest normal UJoiningGroup value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP).

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_JG_DAL
U_JG_DALATH_RISH
U_JG_E
U_JG_FARSI_YEH

U_JG_FE

U_JG_FEH
U_JG_FINAL_SEMKATH
U_JG_GAF
U_JG_GAMAL
U_JG_HAH
U_JG_HAMZA_ON_HEH_GOAL
U_JG_HANIFI_ROHINGYA_KINNA_YA

U_JG_HANIFI_ROHINGYA_PA

U_JG_HE
U_JG_HEH
U_JG_HEH_GOAL
U_JG_HETH
U_JG_KAF
U_JG_KAPH
U_JG_KHAPH

U_JG_KNOTTED_HEH
U_JG_LAM
U_JG_LAMADH
U_JG_MALAYALAM_BHA

U_JG_MALAYALAM_JA

U_JG_MALAYALAM_LLA

U_JG_MALAYALAM_LLLA

U_JG_MALAYALAM_NGA

U_JG_MALAYALAM_NNA

U_JG_MALAYALAM_NNNA

U_JG_MALAYALAM_NYA

U_JG_MALAYALAM_RA

U_JG_MALAYALAM_SSA

U_JG_MALAYALAM_TTA

U_JG_MANICHAEAN_ALEPH

U_JG_MANICHAEAN_AYIN

U_JG_MANICHAEAN_BETH

U_JG_MANICHAEAN_DALETH

U_JG_MANICHAEAN_DHAMEDH

U_JG_MANICHAEAN_FIVE

U_JG_MANICHAEAN_GIMEL

U_JG_MANICHAEAN_HETH

U_JG_MANICHAEAN_HUNDRED

U_JG_MANICHAEAN_KAPH

U_JG_MANICHAEAN_LAMEDH

U_JG_MANICHAEAN_MEM

U_JG_MANICHAEAN_NUN

U_JG_MANICHAEAN_ONE

U_JG_MANICHAEAN_PE

U_JG_MANICHAEAN_QOPH

U_JG_MANICHAEAN_RESH

U_JG_MANICHAEAN_SADHE

U_JG_MANICHAEAN_SAMEKH

U_JG_MANICHAEAN_TAW

U_JG_MANICHAEAN_TEN

U_JG_MANICHAEAN_TETH

U_JG_MANICHAEAN_THAMEDH

U_JG_MANICHAEAN_TWENTY

U_JG_MANICHAEAN_WAW

U_JG_MANICHAEAN_YODH

U_JG_MANICHAEAN_ZAYIN

U_JG_MEEM
U_JG_MIM
U_JG_NOON
U_JG_NO_JOINING_GROUP
U_JG_NUN
U_JG_NYA

U_JG_PE
U_JG_QAF
U_JG_QAPH
U_JG_REH
U_JG_REVERSED_PE
U_JG_ROHINGYA_YEH

U_JG_SAD
U_JG_SADHE
U_JG_SEEN
U_JG_SEMKATH
U_JG_SHIN
U_JG_STRAIGHT_WAW

U_JG_SWASH_KAF
U_JG_SYRIAC_WAW
U_JG_TAH
U_JG_TAW
U_JG_TEH_MARBUTA
U_JG_TEH_MARBUTA_GOAL

U_JG_TETH
U_JG_THIN_YEH

U_JG_VERTICAL_TAIL

U_JG_WAW
U_JG_YEH
U_JG_YEH_BARREE
U_JG_YEH_WITH_TAIL
U_JG_YUDH
U_JG_YUDH_HE
U_JG_ZAIN
U_JG_ZHAIN

UJoiningType

 UJoiningType

Joining Type constants.

See also: UCHAR_JOINING_TYPE

Properties
U_JT_COUNT

One more than the highest normal UJoiningType value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE).

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_JT_DUAL_JOINING
U_JT_JOIN_CAUSING
U_JT_LEFT_JOINING
U_JT_NON_JOINING
U_JT_RIGHT_JOINING
U_JT_TRANSPARENT

ULayoutType

 ULayoutType

enums for the return value for the character and line orientation functions.

Properties
ULOC_LAYOUT_BTT
ULOC_LAYOUT_LTR
ULOC_LAYOUT_RTL
ULOC_LAYOUT_TTB
ULOC_LAYOUT_UNKNOWN

ULineBreakTag

 ULineBreakTag

Enum constants for the line break tags returned by getRuleStatus().

A range of values is defined for each category of word, to allow for further subdivisions of a category in future releases. Applications should check for tag values falling within the range, rather than for single individual values.

The numeric values of all of these constants are stable (will not change).

Properties
UBRK_LINE_HARD

Tag value for a hard, or mandatory line break.

UBRK_LINE_HARD_LIMIT

Upper bound for hard line breaks.

UBRK_LINE_SOFT

Tag value for soft line breaks, positions at which a line break is acceptable but not required.

UBRK_LINE_SOFT_LIMIT

Upper bound for soft line breaks.

ULocAvailableType

 ULocAvailableType

Types for uloc_getAvailableByType and uloc_countAvailableByType.

Properties
ULOC_AVAILABLE_COUNT

ULOC_AVAILABLE_DEFAULT

Locales that return data when passed to ICU APIs, but not including legacy or alias locales.

ULOC_AVAILABLE_ONLY_LEGACY_ALIASES

Legacy or alias locales that return data when passed to ICU APIs.

Examples of supported legacy or alias locales:

  • iw (alias to he)
  • mo (alias to ro)
  • zh_CN (alias to zh_Hans_CN)
  • sr_BA (alias to sr_Cyrl_BA)
  • ars (alias to ar_SA)

The locales in this set are disjoint from the ones in ULOC_AVAILABLE_DEFAULT. To get both sets at the same time, use ULOC_AVAILABLE_WITH_LEGACY_ALIASES.

ULOC_AVAILABLE_WITH_LEGACY_ALIASES

The union of the locales in ULOC_AVAILABLE_DEFAULT and ULOC_AVAILABLE_ONLY_LEGACY_ALIAS.

ULocDataLocaleType

 ULocDataLocaleType

Constants for *_getLocale() Allow user to select whether she wants information on requested, valid or actual locale.

For example, a collator for "en_US_CALIFORNIA" was requested. In the current state of ICU (2.0), the requested locale is "en_US_CALIFORNIA", the valid locale is "en_US" (most specific locale supported by ICU) and the actual locale is "root" (the collation data comes unmodified from the UCA) The locale is considered supported by ICU if there is a core ICU bundle for that locale (although it may be empty).

Properties
ULOC_ACTUAL_LOCALE

This is locale the data actually comes from.

ULOC_DATA_LOCALE_TYPE_LIMIT

One more than the highest normal ULocDataLocaleType value.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

ULOC_REQUESTED_LOCALE

This is the requested locale.

Deprecated. ICU 2.8

ULOC_VALID_LOCALE

This is the most specific locale supported by ICU.

UNormalization2Mode

 UNormalization2Mode

Constants for normalization modes.

For details about standard Unicode normalization forms and about the algorithms which are also used with custom mapping tables see http://www.unicode.org/unicode/reports/tr15/

Properties
UNORM2_COMPOSE

Decomposition followed by composition.

Same as standard NFC when using an "nfc" instance. Same as standard NFKC when using an "nfkc" instance. For details about standard Unicode normalization forms see http://www.unicode.org/unicode/reports/tr15/

UNORM2_COMPOSE_CONTIGUOUS

Compose only contiguously.

Also known as "FCC" or "Fast C Contiguous". The result will often but not always be in NFC. The result will conform to FCD which is useful for processing. Not a standard Unicode normalization form. For details see http://www.unicode.org/notes/tn5/#FCC

UNORM2_DECOMPOSE

Map, and reorder canonically.

Same as standard NFD when using an "nfc" instance. Same as standard NFKD when using an "nfkc" instance. For details about standard Unicode normalization forms see http://www.unicode.org/unicode/reports/tr15/

UNORM2_FCD

"Fast C or D" form.

If a string is in this form, then further decomposition without reordering would yield the same form as DECOMPOSE. Text in "Fast C or D" form can be processed efficiently with data tables that are "canonically closed", that is, that provide equivalent data for equivalent text, without having to be fully normalized. Not a standard Unicode normalization form. Not a unique form: Different FCD strings can be canonically equivalent. For details see http://www.unicode.org/notes/tn5/#FCD

UNormalizationCheckResult

 UNormalizationCheckResult

Result values for normalization quick check functions.

For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms

Properties
UNORM_MAYBE

The input string may or may not be in the normalization form.

This value is only returned for composition forms like NFC and FCC, when a backward-combining character is found for which the surrounding text would have to be analyzed further.

UNORM_NO

The input string is not in the normalization form.

UNORM_YES

The input string is in the normalization form.

UNumericType

 UNumericType

Numeric Type constants.

See also: UCHAR_NUMERIC_TYPE

Properties
U_NT_COUNT

One more than the highest normal UNumericType value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE).

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_NT_DECIMAL
U_NT_DIGIT
U_NT_NONE
U_NT_NUMERIC

UProperty

 UProperty

Selection constants for Unicode properties.

These constants are used in functions like u_hasBinaryProperty to select one of the Unicode properties.

The properties APIs are intended to reflect Unicode properties as defined in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).

For details about the properties see UAX #44: Unicode Character Database (http://www.unicode.org/reports/tr44/).

Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2, then properties marked with "new in Unicode 3.2" are not or not fully available. Check u_getUnicodeVersion to be sure.

See also: u_hasBinaryProperty See also: u_getIntPropertyValue See also: u_getUnicodeVersion

Properties
UCHAR_AGE

String property Age.

Corresponds to u_charAge.

UCHAR_ALPHABETIC

Binary property Alphabetic.

Same as u_isUAlphabetic, different from u_isalpha. Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic

UCHAR_ASCII_HEX_DIGIT

Binary property ASCII_Hex_Digit.

0-9 A-F a-f

UCHAR_BASIC_EMOJI

Binary property of strings Basic_Emoji.

See https://www.unicode.org/reports/tr51/#Emoji_Sets

UCHAR_BIDI_CLASS

Enumerated property Bidi_Class.

Same as u_charDirection, returns UCharDirection values.

UCHAR_BIDI_CONTROL

Binary property Bidi_Control.

Format controls which have specific functions in the Bidi Algorithm.

UCHAR_BIDI_MIRRORED

Binary property Bidi_Mirrored.

Characters that may change display in RTL text. Same as u_isMirrored. See Bidi Algorithm, UTR 9.

UCHAR_BIDI_MIRRORING_GLYPH

String property Bidi_Mirroring_Glyph.

Corresponds to u_charMirror.

UCHAR_BIDI_PAIRED_BRACKET

String property Bidi_Paired_Bracket (new in Unicode 6.3).

Corresponds to u_getBidiPairedBracket.

UCHAR_BIDI_PAIRED_BRACKET_TYPE

Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).

Used in UAX #9: Unicode Bidirectional Algorithm (http://www.unicode.org/reports/tr9/) Returns UBidiPairedBracketType values.

UCHAR_BINARY_LIMIT

One more than the last constant for binary Unicode properties.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

UCHAR_BINARY_START

First constant for binary Unicode properties.

UCHAR_BLOCK

Enumerated property Block.

Same as ublock_getCode, returns UBlockCode values.

UCHAR_CANONICAL_COMBINING_CLASS

Enumerated property Canonical_Combining_Class.

Same as u_getCombiningClass, returns 8-bit numeric values.

UCHAR_CASED

Binary property Cased.

For Lowercase, Uppercase and Titlecase characters.

UCHAR_CASE_FOLDING

String property Case_Folding.

Corresponds to u_strFoldCase in ustring.h.

UCHAR_CASE_IGNORABLE

Binary property Case_Ignorable.

Used in context-sensitive case mappings.

UCHAR_CASE_SENSITIVE

Binary property Case_Sensitive.

Either the source of a case mapping or in the target of a case mapping. Not the same as the general category Cased_Letter.

UCHAR_CHANGES_WHEN_CASEFOLDED

Binary property Changes_When_Casefolded.

UCHAR_CHANGES_WHEN_CASEMAPPED

Binary property Changes_When_Casemapped.

UCHAR_CHANGES_WHEN_LOWERCASED

Binary property Changes_When_Lowercased.

UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED

Binary property Changes_When_NFKC_Casefolded.

UCHAR_CHANGES_WHEN_TITLECASED

Binary property Changes_When_Titlecased.

UCHAR_CHANGES_WHEN_UPPERCASED

Binary property Changes_When_Uppercased.

UCHAR_DASH

Binary property Dash.

Variations of dashes.

UCHAR_DECOMPOSITION_TYPE

Enumerated property Decomposition_Type.

Returns UDecompositionType values.

UCHAR_DEFAULT_IGNORABLE_CODE_POINT

Binary property Default_Ignorable_Code_Point (new in Unicode 3.2).

Ignorable in most processing. <2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space)

UCHAR_DEPRECATED

Binary property Deprecated (new in Unicode 3.2).

The usage of deprecated characters is strongly discouraged.

UCHAR_DIACRITIC

Binary property Diacritic.

Characters that linguistically modify the meaning of another character to which they apply.

UCHAR_DOUBLE_LIMIT

One more than the last constant for double Unicode properties.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

UCHAR_DOUBLE_START

First constant for double Unicode properties.

UCHAR_EAST_ASIAN_WIDTH

Enumerated property East_Asian_Width.

See http://www.unicode.org/reports/tr11/ Returns UEastAsianWidth values.

UCHAR_EMOJI

Binary property Emoji.

See http://www.unicode.org/reports/tr51/#Emoji_Properties

UCHAR_EMOJI_COMPONENT

Binary property Emoji_Component.

See http://www.unicode.org/reports/tr51/#Emoji_Properties

UCHAR_EMOJI_KEYCAP_SEQUENCE

Binary property of strings Emoji_Keycap_Sequence.

See https://www.unicode.org/reports/tr51/#Emoji_Sets

UCHAR_EMOJI_MODIFIER

Binary property Emoji_Modifier.

See http://www.unicode.org/reports/tr51/#Emoji_Properties

UCHAR_EMOJI_MODIFIER_BASE

Binary property Emoji_Modifier_Base.

See http://www.unicode.org/reports/tr51/#Emoji_Properties

UCHAR_EMOJI_PRESENTATION

Binary property Emoji_Presentation.

See http://www.unicode.org/reports/tr51/#Emoji_Properties

UCHAR_EXTENDED_PICTOGRAPHIC

Binary property Extended_Pictographic.

See http://www.unicode.org/reports/tr51/#Emoji_Properties

UCHAR_EXTENDER

Binary property Extender.

Extend the value or shape of a preceding alphabetic character, e.g., length and iteration marks.

UCHAR_FULL_COMPOSITION_EXCLUSION

Binary property Full_Composition_Exclusion.

CompositionExclusions.txt+Singleton Decompositions+ Non-Starter Decompositions.

UCHAR_GENERAL_CATEGORY

Enumerated property General_Category.

Same as u_charType, returns UCharCategory values.

UCHAR_GENERAL_CATEGORY_MASK

Bitmask property General_Category_Mask.

This is the General_Category property returned as a bit mask. When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)), returns bit masks for UCharCategory values where exactly one bit is set. When used with u_getPropertyValueName() and u_getPropertyValueEnum(), a multi-bit mask is used for sets of categories like "Letters". Mask values should be cast to uint32_t.

UCHAR_GRAPHEME_BASE

Binary property Grapheme_Base (new in Unicode 3.2).

For programmatic determination of grapheme cluster boundaries. [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ

UCHAR_GRAPHEME_CLUSTER_BREAK

Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1).

Used in UAX #29: Text Boundaries (http://www.unicode.org/reports/tr29/) Returns UGraphemeClusterBreak values.

UCHAR_GRAPHEME_EXTEND

Binary property Grapheme_Extend (new in Unicode 3.2).

For programmatic determination of grapheme cluster boundaries. Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ

UCHAR_GRAPHEME_LINK

Binary property Grapheme_Link (new in Unicode 3.2).

For programmatic determination of grapheme cluster boundaries.

UCHAR_HANGUL_SYLLABLE_TYPE

Enumerated property Hangul_Syllable_Type, new in Unicode 4.

Returns UHangulSyllableType values.

UCHAR_HEX_DIGIT

Binary property Hex_Digit.

Characters commonly used for hexadecimal numbers.

UCHAR_HYPHEN

Binary property Hyphen.

Dashes used to mark connections between pieces of words, plus the Katakana middle dot.

UCHAR_IDEOGRAPHIC

Binary property Ideographic.

CJKV ideographs.

UCHAR_IDS_BINARY_OPERATOR

Binary property IDS_Binary_Operator (new in Unicode 3.2).

For programmatic determination of Ideographic Description Sequences.

UCHAR_IDS_TRINARY_OPERATOR

Binary property IDS_Trinary_Operator (new in Unicode 3.2).

For programmatic determination of Ideographic Description Sequences.

UCHAR_ID_CONTINUE

Binary property ID_Continue.

Characters that can continue an identifier. DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out." ID_Start+Mn+Mc+Nd+Pc

UCHAR_ID_START

Binary property ID_Start.

Characters that can start an identifier. Lu+Ll+Lt+Lm+Lo+Nl

UCHAR_INDIC_POSITIONAL_CATEGORY

Enumerated property Indic_Positional_Category.

New in Unicode 6.0 as provisional property Indic_Matra_Category; renamed and changed to informative in Unicode 8.0. See http://www.unicode.org/reports/tr44/#IndicPositionalCategory.txt

UCHAR_INDIC_SYLLABIC_CATEGORY

Enumerated property Indic_Syllabic_Category.

New in Unicode 6.0 as provisional; informative since Unicode 8.0. See http://www.unicode.org/reports/tr44/#IndicSyllabicCategory.txt

UCHAR_INT_LIMIT

One more than the last constant for enumerated/integer Unicode properties.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

UCHAR_INT_START

First constant for enumerated/integer Unicode properties.

UCHAR_INVALID_CODE

Represents a nonexistent or invalid property or property value.

UCHAR_ISO_COMMENT

Deprecated string property ISO_Comment.

Corresponds to u_getISOComment. Deprecated. ICU 49

UCHAR_JOINING_GROUP

Enumerated property Joining_Group.

Returns UJoiningGroup values.

UCHAR_JOINING_TYPE

Enumerated property Joining_Type.

Returns UJoiningType values.

UCHAR_JOIN_CONTROL

Binary property Join_Control.

Format controls for cursive joining and ligation.

UCHAR_LEAD_CANONICAL_COMBINING_CLASS

Enumerated property Lead_Canonical_Combining_Class.

ICU-specific property for the ccc of the first code point of the decomposition, or lccc(c)=ccc(NFD(c)[0]). Useful for checking for canonically ordered text; see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS.

UCHAR_LINE_BREAK

Enumerated property Line_Break.

Returns ULineBreak values.

UCHAR_LOGICAL_ORDER_EXCEPTION

Binary property Logical_Order_Exception (new in Unicode 3.2).

Characters that do not use logical order and require special handling in most processing.

UCHAR_LOWERCASE

Binary property Lowercase.

Same as u_isULowercase, different from u_islower. Ll+Other_Lowercase

UCHAR_LOWERCASE_MAPPING

String property Lowercase_Mapping.

Corresponds to u_strToLower in ustring.h.

UCHAR_MASK_LIMIT

One more than the last constant for bit-mask Unicode properties.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

UCHAR_MASK_START

First constant for bit-mask Unicode properties.

UCHAR_MATH

Binary property Math.

Sm+Other_Math

UCHAR_NAME

String property Name.

Corresponds to u_charName.

UCHAR_NFC_INERT

Binary property NFC_Inert.

ICU-specific property for characters that are inert under NFC, i.e., they do not interact with adjacent characters. See the documentation for the Normalizer2 class and the Normalizer2::isInert() method.

UCHAR_NFC_QUICK_CHECK

Enumerated property NFC_Quick_Check.

Returns UNormalizationCheckResult values.

UCHAR_NFD_INERT

Binary property NFD_Inert.

ICU-specific property for characters that are inert under NFD, i.e., they do not interact with adjacent characters. See the documentation for the Normalizer2 class and the Normalizer2::isInert() method.

UCHAR_NFD_QUICK_CHECK

Enumerated property NFD_Quick_Check.

Returns UNormalizationCheckResult values.

UCHAR_NFKC_INERT

Binary property NFKC_Inert.

ICU-specific property for characters that are inert under NFKC, i.e., they do not interact with adjacent characters. See the documentation for the Normalizer2 class and the Normalizer2::isInert() method.

UCHAR_NFKC_QUICK_CHECK

Enumerated property NFKC_Quick_Check.

Returns UNormalizationCheckResult values.

UCHAR_NFKD_INERT

Binary property NFKD_Inert.

ICU-specific property for characters that are inert under NFKD, i.e., they do not interact with adjacent characters. See the documentation for the Normalizer2 class and the Normalizer2::isInert() method.

UCHAR_NFKD_QUICK_CHECK

Enumerated property NFKD_Quick_Check.

Returns UNormalizationCheckResult values.

UCHAR_NONCHARACTER_CODE_POINT

Binary property Noncharacter_Code_Point.

Code points that are explicitly defined as illegal for the encoding of characters.

UCHAR_NUMERIC_TYPE

Enumerated property Numeric_Type.

Returns UNumericType values.

UCHAR_NUMERIC_VALUE

Double property Numeric_Value.

Corresponds to u_getNumericValue.

UCHAR_OTHER_PROPERTY_LIMIT

One more than the last constant for Unicode properties with unusual value types.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

UCHAR_OTHER_PROPERTY_START

First constant for Unicode properties with unusual value types.

UCHAR_PATTERN_SYNTAX

Binary property Pattern_Syntax (new in Unicode 4.1).

See UAX #31 Identifier and Pattern Syntax (http://www.unicode.org/reports/tr31/)

UCHAR_PATTERN_WHITE_SPACE

Binary property Pattern_White_Space (new in Unicode 4.1).

See UAX #31 Identifier and Pattern Syntax (http://www.unicode.org/reports/tr31/)

UCHAR_POSIX_ALNUM

Binary property alnum (a C/POSIX character class).

Implemented according to the UTS #18 Annex C Standard Recommendation. See the uchar.h file documentation.

UCHAR_POSIX_BLANK

Binary property blank (a C/POSIX character class).

Implemented according to the UTS #18 Annex C Standard Recommendation. See the uchar.h file documentation.

UCHAR_POSIX_GRAPH

Binary property graph (a C/POSIX character class).

Implemented according to the UTS #18 Annex C Standard Recommendation. See the uchar.h file documentation.

UCHAR_POSIX_PRINT

Binary property print (a C/POSIX character class).

Implemented according to the UTS #18 Annex C Standard Recommendation. See the uchar.h file documentation.

UCHAR_POSIX_XDIGIT

Binary property xdigit (a C/POSIX character class).

Implemented according to the UTS #18 Annex C Standard Recommendation. See the uchar.h file documentation.

UCHAR_PREPENDED_CONCATENATION_MARK

Binary property Prepended_Concatenation_Mark.

UCHAR_QUOTATION_MARK

Binary property Quotation_Mark.

UCHAR_RADICAL

Binary property Radical (new in Unicode 3.2).

For programmatic determination of Ideographic Description Sequences.

UCHAR_REGIONAL_INDICATOR

Binary property Regional_Indicator.

UCHAR_RGI_EMOJI

Binary property of strings RGI_Emoji.

See https://www.unicode.org/reports/tr51/#Emoji_Sets

UCHAR_RGI_EMOJI_FLAG_SEQUENCE

Binary property of strings RGI_Emoji_Flag_Sequence.

See https://www.unicode.org/reports/tr51/#Emoji_Sets

UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE

Binary property of strings RGI_Emoji_Modifier_Sequence.

See https://www.unicode.org/reports/tr51/#Emoji_Sets

UCHAR_RGI_EMOJI_TAG_SEQUENCE

Binary property of strings RGI_Emoji_Tag_Sequence.

See https://www.unicode.org/reports/tr51/#Emoji_Sets

UCHAR_RGI_EMOJI_ZWJ_SEQUENCE

Binary property of strings RGI_Emoji_ZWJ_Sequence.

See https://www.unicode.org/reports/tr51/#Emoji_Sets

UCHAR_SCRIPT

Enumerated property Script.

Same as uscript_getScript, returns UScriptCode values.

UCHAR_SCRIPT_EXTENSIONS

Miscellaneous property Script_Extensions (new in Unicode 6.0).

Some characters are commonly used in multiple scripts. For more information, see UAX #24: http://www.unicode.org/reports/tr24/. Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h.

UCHAR_SEGMENT_STARTER

Binary Property Segment_Starter.

ICU-specific property for characters that are starters in terms of Unicode normalization and combining character sequences. They have ccc=0 and do not occur in non-initial position of the canonical decomposition of any character (like a-umlaut in NFD and a Jamo T in an NFD(Hangul LVT)). ICU uses this property for segmenting a string for generating a set of canonically equivalent strings, e.g. for canonical closure while processing collation tailoring rules.

UCHAR_SENTENCE_BREAK

Enumerated property Sentence_Break (new in Unicode 4.1).

Used in UAX #29: Text Boundaries (http://www.unicode.org/reports/tr29/) Returns USentenceBreak values.

UCHAR_SIMPLE_CASE_FOLDING

String property Simple_Case_Folding.

Corresponds to u_foldCase.

UCHAR_SIMPLE_LOWERCASE_MAPPING

String property Simple_Lowercase_Mapping.

Corresponds to u_tolower.

UCHAR_SIMPLE_TITLECASE_MAPPING

String property Simple_Titlecase_Mapping.

Corresponds to u_totitle.

UCHAR_SIMPLE_UPPERCASE_MAPPING

String property Simple_Uppercase_Mapping.

Corresponds to u_toupper.

UCHAR_SOFT_DOTTED

Binary property Soft_Dotted (new in Unicode 3.2).

Characters with a "soft dot", like i or j. An accent placed on these characters causes the dot to disappear.

UCHAR_STRING_LIMIT

One more than the last constant for string Unicode properties.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

UCHAR_STRING_START

First constant for string Unicode properties.

UCHAR_S_TERM

Binary property STerm (new in Unicode 4.0.1).

Sentence Terminal. Used in UAX #29: Text Boundaries (http://www.unicode.org/reports/tr29/)

UCHAR_TERMINAL_PUNCTUATION

Binary property Terminal_Punctuation.

Punctuation characters that generally mark the end of textual units.

UCHAR_TITLECASE_MAPPING

String property Titlecase_Mapping.

Corresponds to u_strToTitle in ustring.h.

UCHAR_TRAIL_CANONICAL_COMBINING_CLASS

Enumerated property Trail_Canonical_Combining_Class.

ICU-specific property for the ccc of the last code point of the decomposition, or tccc(c)=ccc(NFD(c)[last]). Useful for checking for canonically ordered text; see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS.

UCHAR_UNICODE_1_NAME

String property Unicode_1_Name.

This property is of little practical value. Beginning with ICU 49, ICU APIs return an empty string for this property. Corresponds to u_charName(U_UNICODE_10_CHAR_NAME). Deprecated. ICU 49

UCHAR_UNIFIED_IDEOGRAPH

Binary property Unified_Ideograph (new in Unicode 3.2).

For programmatic determination of Ideographic Description Sequences.

UCHAR_UPPERCASE

Binary property Uppercase.

Same as u_isUUppercase, different from u_isupper. Lu+Other_Uppercase

UCHAR_UPPERCASE_MAPPING

String property Uppercase_Mapping.

Corresponds to u_strToUpper in ustring.h.

UCHAR_VARIATION_SELECTOR

Binary property Variation_Selector (new in Unicode 4.0.1).

Indicates all those characters that qualify as Variation Selectors. For details on the behavior of these characters, see StandardizedVariants.html and 15.6 Variation Selectors.

UCHAR_VERTICAL_ORIENTATION

Enumerated property Vertical_Orientation.

Used for UAX #50 Unicode Vertical Text Layout (https://www.unicode.org/reports/tr50/). New as a UCD property in Unicode 10.0.

UCHAR_WHITE_SPACE

Binary property White_Space.

Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace. Space characters+TAB+CR+LF-ZWSP-ZWNBSP

UCHAR_WORD_BREAK

Enumerated property Word_Break (new in Unicode 4.1).

Used in UAX #29: Text Boundaries (http://www.unicode.org/reports/tr29/) Returns UWordBreakValues values.

UCHAR_XID_CONTINUE

Binary property XID_Continue.

ID_Continue modified to allow closure under normalization forms NFKC and NFKD.

UCHAR_XID_START

Binary property XID_Start.

ID_Start modified to allow closure under normalization forms NFKC and NFKD.

UPropertyNameChoice

 UPropertyNameChoice

Selector constants for u_getPropertyName() and u_getPropertyValueName().

These selectors are used to choose which name is returned for a given property or value. All properties and values have a long name. Most have a short name, but some do not. Unicode allows for additional names, beyond the long and short name, which would be indicated by U_LONG_PROPERTY_NAME + i, where i=1, 2,...

See also: u_getPropertyName() See also: u_getPropertyValueName()

Properties
U_LONG_PROPERTY_NAME
U_PROPERTY_NAME_CHOICE_COUNT

One more than the highest normal UPropertyNameChoice value.

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_SHORT_PROPERTY_NAME

UScriptCode

 UScriptCode

Constants for ISO 15924 script codes.

The current set of script code constants supports at least all scripts that are encoded in the version of Unicode which ICU currently supports. The names of the constants are usually derived from the Unicode script property value aliases. See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/) and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt .

In addition, constants for many ISO 15924 script codes are included, for use with language tags, CLDR data, and similar. Some of those codes are not used in the Unicode Character Database (UCD). For example, there are no characters that have a UCD script property value of Hans or Hant. All Han ideographs have the Hani script property value in Unicode.

Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR.

Starting with ICU 55, script codes are only added when their scripts have been or will certainly be encoded in Unicode, and have been assigned Unicode script property value aliases, to ensure that their script names are stable and match the names of the constants. Script codes like Latf and Aran that are not subject to separate encoding may be added at any time.

Properties
USCRIPT_ADLAM

USCRIPT_AFAKA

USCRIPT_AHOM

USCRIPT_ANATOLIAN_HIEROGLYPHS

USCRIPT_ARABIC

USCRIPT_ARMENIAN

USCRIPT_AVESTAN

USCRIPT_BALINESE

USCRIPT_BAMUM

USCRIPT_BASSA_VAH

USCRIPT_BATAK

USCRIPT_BENGALI

USCRIPT_BHAIKSUKI

USCRIPT_BLISSYMBOLS

USCRIPT_BOOK_PAHLAVI

USCRIPT_BOPOMOFO

USCRIPT_BRAHMI

USCRIPT_BRAILLE

USCRIPT_BUGINESE

USCRIPT_BUHID

USCRIPT_CANADIAN_ABORIGINAL

Canadian_Aboriginal script.

USCRIPT_CARIAN

USCRIPT_CAUCASIAN_ALBANIAN

USCRIPT_CHAKMA

USCRIPT_CHAM

USCRIPT_CHEROKEE

USCRIPT_CHORASMIAN

USCRIPT_CIRTH

USCRIPT_CODE_LIMIT

One more than the highest normal UScriptCode value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_SCRIPT).

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

USCRIPT_COMMON

USCRIPT_COPTIC

USCRIPT_CUNEIFORM

USCRIPT_CYPRIOT

USCRIPT_CYPRO_MINOAN

USCRIPT_CYRILLIC

USCRIPT_DEMOTIC_EGYPTIAN

USCRIPT_DESERET

USCRIPT_DEVANAGARI

USCRIPT_DIVES_AKURU

USCRIPT_DOGRA

USCRIPT_DUPLOYAN

USCRIPT_DUPLOYAN_SHORTAND

Deprecated. ICU 54 Typo, use USCRIPT_DUPLOYAN

USCRIPT_EASTERN_SYRIAC

USCRIPT_EGYPTIAN_HIEROGLYPHS

USCRIPT_ELBASAN

USCRIPT_ELYMAIC

USCRIPT_ESTRANGELO_SYRIAC

USCRIPT_ETHIOPIC

USCRIPT_GEORGIAN

USCRIPT_GLAGOLITIC

USCRIPT_GOTHIC

USCRIPT_GRANTHA

USCRIPT_GREEK

USCRIPT_GUJARATI

USCRIPT_GUNJALA_GONDI

USCRIPT_GURMUKHI

USCRIPT_HAN

USCRIPT_HANGUL

USCRIPT_HANIFI_ROHINGYA

USCRIPT_HANUNOO

USCRIPT_HAN_WITH_BOPOMOFO

USCRIPT_HARAPPAN_INDUS

USCRIPT_HATRAN

USCRIPT_HEBREW

USCRIPT_HIERATIC_EGYPTIAN

USCRIPT_HIRAGANA

USCRIPT_IMPERIAL_ARAMAIC

USCRIPT_INHERITED

USCRIPT_INSCRIPTIONAL_PAHLAVI

USCRIPT_INSCRIPTIONAL_PARTHIAN

USCRIPT_INVALID_CODE

USCRIPT_JAMO

USCRIPT_JAPANESE

USCRIPT_JAVANESE

USCRIPT_JURCHEN

USCRIPT_KAITHI

USCRIPT_KANNADA

USCRIPT_KATAKANA

USCRIPT_KATAKANA_OR_HIRAGANA

New script code in Unicode 4.0.1.

USCRIPT_KAWI

USCRIPT_KAYAH_LI

USCRIPT_KHAROSHTHI

USCRIPT_KHITAN_SMALL_SCRIPT

USCRIPT_KHMER

USCRIPT_KHOJKI

USCRIPT_KHUDAWADI

USCRIPT_KHUTSURI

USCRIPT_KOREAN

USCRIPT_KPELLE

USCRIPT_LANNA

USCRIPT_LAO

USCRIPT_LATIN

USCRIPT_LATIN_FRAKTUR

USCRIPT_LATIN_GAELIC

USCRIPT_LEPCHA

USCRIPT_LIMBU

USCRIPT_LINEAR_A

USCRIPT_LINEAR_B

USCRIPT_LISU

USCRIPT_LOMA

USCRIPT_LYCIAN

USCRIPT_LYDIAN

USCRIPT_MAHAJANI

USCRIPT_MAKASAR

USCRIPT_MALAYALAM

USCRIPT_MANDAEAN

USCRIPT_MANDAIC

USCRIPT_MANICHAEAN

USCRIPT_MARCHEN

USCRIPT_MASARAM_GONDI

USCRIPT_MATHEMATICAL_NOTATION

USCRIPT_MAYAN_HIEROGLYPHS

USCRIPT_MEDEFAIDRIN

USCRIPT_MEITEI_MAYEK

USCRIPT_MENDE

Mende Kikakui.

USCRIPT_MEROITIC

USCRIPT_MEROITIC_CURSIVE

USCRIPT_MEROITIC_HIEROGLYPHS

USCRIPT_MIAO

USCRIPT_MODI

USCRIPT_MONGOLIAN

USCRIPT_MOON

USCRIPT_MRO

USCRIPT_MULTANI

USCRIPT_MYANMAR

USCRIPT_NABATAEAN

USCRIPT_NAG_MUNDARI

USCRIPT_NAKHI_GEBA

USCRIPT_NANDINAGARI

USCRIPT_NEWA

USCRIPT_NEW_TAI_LUE

USCRIPT_NKO

USCRIPT_NUSHU

USCRIPT_NYIAKENG_PUACHUE_HMONG

USCRIPT_OGHAM

USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC

USCRIPT_OLD_HUNGARIAN

USCRIPT_OLD_ITALIC

USCRIPT_OLD_NORTH_ARABIAN

USCRIPT_OLD_PERMIC

USCRIPT_OLD_PERSIAN

USCRIPT_OLD_SOGDIAN

USCRIPT_OLD_SOUTH_ARABIAN

USCRIPT_OLD_UYGHUR

USCRIPT_OL_CHIKI

USCRIPT_ORIYA

USCRIPT_ORKHON

USCRIPT_OSAGE

USCRIPT_OSMANYA

USCRIPT_PAHAWH_HMONG

USCRIPT_PALMYRENE

USCRIPT_PAU_CIN_HAU

USCRIPT_PHAGS_PA

USCRIPT_PHOENICIAN

USCRIPT_PHONETIC_POLLARD

USCRIPT_PSALTER_PAHLAVI

USCRIPT_REJANG

USCRIPT_RONGORONGO

USCRIPT_RUNIC

USCRIPT_SAMARITAN

USCRIPT_SARATI

USCRIPT_SAURASHTRA

USCRIPT_SHARADA

USCRIPT_SHAVIAN

USCRIPT_SIDDHAM

USCRIPT_SIGN_WRITING

Sutton SignWriting.

USCRIPT_SIMPLIFIED_HAN

USCRIPT_SINDHI

USCRIPT_SINHALA

USCRIPT_SOGDIAN

USCRIPT_SORA_SOMPENG

USCRIPT_SOYOMBO

USCRIPT_SUNDANESE

USCRIPT_SYLOTI_NAGRI

USCRIPT_SYMBOLS

USCRIPT_SYMBOLS_EMOJI

USCRIPT_SYRIAC

USCRIPT_TAGALOG

USCRIPT_TAGBANWA

USCRIPT_TAI_LE

USCRIPT_TAI_VIET

USCRIPT_TAKRI

USCRIPT_TAMIL

USCRIPT_TANGSA

USCRIPT_TANGUT

USCRIPT_TELUGU

USCRIPT_TENGWAR

USCRIPT_THAANA

USCRIPT_THAI

USCRIPT_TIBETAN

USCRIPT_TIFINAGH

USCRIPT_TIRHUTA

USCRIPT_TOTO

USCRIPT_TRADITIONAL_HAN

USCRIPT_UCAS

Canadian_Aboriginal script (alias).

USCRIPT_UGARITIC

USCRIPT_UNKNOWN

USCRIPT_UNWRITTEN_LANGUAGES

USCRIPT_VAI

USCRIPT_VISIBLE_SPEECH

USCRIPT_VITHKUQI

USCRIPT_WANCHO

USCRIPT_WARANG_CITI

USCRIPT_WESTERN_SYRIAC

USCRIPT_WOLEAI

USCRIPT_YEZIDI

USCRIPT_YI

USCRIPT_ZANABAZAR_SQUARE

UScriptUsage

 UScriptUsage

Script usage constants.

See UAX #31 Unicode Identifier and Pattern Syntax. http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers

Properties
USCRIPT_USAGE_ASPIRATIONAL

Aspirational Use script.

USCRIPT_USAGE_EXCLUDED

Candidate for Exclusion from Identifiers.

USCRIPT_USAGE_LIMITED_USE

Limited Use script.

USCRIPT_USAGE_NOT_ENCODED

Not encoded in Unicode.

USCRIPT_USAGE_RECOMMENDED

Recommended script.

USCRIPT_USAGE_UNKNOWN

Unknown script usage.

USentenceBreak

 USentenceBreak

Sentence Break constants.

See also: UCHAR_SENTENCE_BREAK

Properties
U_SB_ATERM
U_SB_CLOSE
U_SB_COUNT

One more than the highest normal USentenceBreak value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK).

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_SB_CR
U_SB_EXTEND
U_SB_FORMAT
U_SB_LF
U_SB_LOWER
U_SB_NUMERIC
U_SB_OLETTER
U_SB_OTHER
U_SB_SCONTINUE
U_SB_SEP
U_SB_SP
U_SB_STERM
U_SB_UPPER

USentenceBreakTag

 USentenceBreakTag

Enum constants for the sentence break tags returned by getRuleStatus().

A range of values is defined for each category of sentence, to allow for further subdivisions of a category in future releases. Applications should check for tag values falling within the range, rather than for single individual values.

The numeric values of all of these constants are stable (will not change).

Properties
UBRK_SENTENCE_SEP

Tag value for for sentences that do not contain an ending sentence terminator ('.

', '?', '!', etc.) character, but are ended only by a hard separator (CR, LF, PS, etc.) or end of input.

UBRK_SENTENCE_SEP_LIMIT

Upper bound for tags for sentences ended by a separator.

UBRK_SENTENCE_TERM

Tag value for for sentences ending with a sentence terminator ('.

', '?', '!', etc.) character, possibly followed by a hard separator (CR, LF, PS, etc.)

UBRK_SENTENCE_TERM_LIMIT

Upper bound for tags for sentences ended by sentence terminators.

UTransDirection

 UTransDirection

Direction constant indicating the direction in a transliterator, e.g., the forward or reverse rules of a RuleBasedTransliterator.

Specified when a transliterator is opened. An "A-B" transliterator transliterates A to B when operating in the forward direction, and B to A when operating in the reverse direction.

Properties
UTRANS_FORWARD

UTRANS_FORWARD means from to for a transliterator with ID -.

For a transliterator opened using a rule, it means forward direction rules, e.g., "A > B".

UTRANS_REVERSE

UTRANS_REVERSE means from to for a transliterator with ID -.

For a transliterator opened using a rule, it means reverse direction rules, e.g., "A < B".

UVerticalOrientation

 UVerticalOrientation

UWordBreak

 UWordBreak

Enum constants for the word break tags returned by getRuleStatus().

A range of values is defined for each category of word, to allow for further subdivisions of a category in future releases. Applications should check for tag values falling within the range, rather than for single individual values.

The numeric values of all of these constants are stable (will not change).

Properties
UBRK_WORD_IDEO

Tag value for words containing ideographic characters, lower limit.

UBRK_WORD_IDEO_LIMIT

Tag value for words containing ideographic characters, upper limit.

UBRK_WORD_KANA

Tag value for words containing kana characters, lower limit.

UBRK_WORD_KANA_LIMIT

Tag value for words containing kana characters, upper limit.

UBRK_WORD_LETTER

Tag value for words that contain letters, excluding hiragana, katakana or ideographic characters, lower limit.

UBRK_WORD_LETTER_LIMIT

Tag value for words containing letters, upper limit.

UBRK_WORD_NONE

Tag value for "words" that do not fit into any of other categories.

Includes spaces and most punctuation.

UBRK_WORD_NONE_LIMIT

Upper bound for tags for uncategorized words.

UBRK_WORD_NUMBER

Tag value for words that appear to be numbers, lower limit.

UBRK_WORD_NUMBER_LIMIT

Tag value for words that appear to be numbers, upper limit.

UWordBreakValues

 UWordBreakValues

Word Break constants.

(UWordBreak is a pre-existing enum type in ubrk.h for word break status tags.)

See also: UCHAR_WORD_BREAK

Properties
U_WB_ALETTER
U_WB_COUNT

One more than the highest normal UWordBreakValues value.

The highest value is available via u_getIntPropertyMaxValue(UCHAR_WORD_BREAK).

Deprecated. ICU 58 The numeric value may change over time, see ICU ticket #12420.

U_WB_CR

U_WB_DOUBLE_QUOTE

U_WB_EXTEND

U_WB_EXTENDNUMLET
U_WB_E_BASE

U_WB_E_BASE_GAZ

U_WB_E_MODIFIER

U_WB_FORMAT
U_WB_GLUE_AFTER_ZWJ

U_WB_HEBREW_LETTER

U_WB_KATAKANA
U_WB_LF

U_WB_MIDLETTER
U_WB_MIDNUM
U_WB_MIDNUMLET

U_WB_NEWLINE

U_WB_NUMERIC
U_WB_OTHER
U_WB_REGIONAL_INDICATOR

U_WB_SINGLE_QUOTE

U_WB_WSEGSPACE

U_WB_ZWJ

Typedefs

OldUChar

uint16_t OldUChar

Default ICU 58 definition of UChar.

A base type for UTF-16 code units and pointers. Unsigned 16-bit integer.

Define OldUChar to be wchar_t if that is 16 bits wide. If wchar_t is not 16 bits wide, then define UChar to be uint16_t.

This makes the definition of OldUChar platform-dependent but allows direct string type compatibility with platforms with 16-bit wchar_t types.

This is how UChar was defined in ICU 58, for transition convenience. Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined. The current UChar responds to UCHAR_TYPE but OldUChar does not.

UBidiPairedBracketType

enum UBidiPairedBracketType UBidiPairedBracketType

Bidi Paired Bracket Type constants.

See also: UCHAR_BIDI_PAIRED_BRACKET_TYPE

UBlockCode

enum UBlockCode UBlockCode

UBool

int8_t UBool

The ICU boolean type, a signed-byte integer.

ICU-specific for historical reasons: The C and C++ standards used to not define type bool. Also provides a fixed type definition, as opposed to type bool whose details (e.g., sizeof) may vary by compiler and between C and C++.

UBreakIteratorType

enum UBreakIteratorType UBreakIteratorType

The possible types of text boundaries.

UCPMap

struct UCPMap UCPMap

Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values.

See also:UCPTrie See also:UMutableCPTrie

UCPMapValueFilter

uint32_t U_CALLCONV UCPMapValueFilter(const void *context, uint32_t value)

Callback function type: Modifies a map value.

Optionally called by ucpmap_getRange()/ucptrie_getRange()/umutablecptrie_getRange(). The modified value will be returned by the getRange function.

Can be used to ignore some of the value bits, make a filter for one of several values, return a value index computed from the map value, etc.

Details
Parameters
context
an opaque pointer, as passed into the getRange function
value
a value from the map
Returns
the modified value

UChar

uint16_t UChar

The base type for UTF-16 code units and pointers.

Unsigned 16-bit integer. Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar.

UChar is configurable by defining the macro UCHAR_TYPE on the preprocessor or compiler command line: -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc. (The UCHAR_TYPE can also be #defined earlier in this file, for outside the ICU library code.) This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16.

The default is UChar=char16_t.

C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type.

In C, char16_t is a simple typedef of uint_least16_t. ICU requires uint_least16_t=uint16_t for data memory mapping. On macOS, char16_t is not available because the uchar.h standard header is missing.

UChar32

int32_t UChar32

Define UChar32 as a type for single Unicode code points.

UChar32 is a signed 32-bit integer (same as int32_t).

The Unicode code point range is 0..0x10ffff. All other values (negative or >=0x110000) are illegal as Unicode code points. They may be used as sentinel values to indicate "done", "error" or similar non-code point conditions.

Before ICU 2.4 (Jitterbug 2146), UChar32 was defined to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned) or else to be uint32_t. That is, the definition of UChar32 was platform-dependent.

See also: U_SENTINEL

UCharCategory

enum UCharCategory UCharCategory

Data for enumerated Unicode general category types.

See http://www.unicode.org/Public/UNIDATA/UnicodeData.html .

UCharDirection

enum UCharDirection UCharDirection

This specifies the language directional property of a character set.

UCharEnumTypeRange

UBoolU_CALLCONV UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type)

Callback from u_enumCharTypes(), is called for each contiguous range of code points c (where start<=c

The callback function can stop the enumeration by returning false.

Details
Parameters
context
an opaque pointer, as passed into utrie_enum()
start
the first code point in a contiguous range with value
limit
one past the last code point in a contiguous range with value
type
the general category for all code points in [start..limit[
Returns
false to stop the enumeration

UCharNameChoice

enum UCharNameChoice UCharNameChoice

Selector constants for u_charName().

u_charName() returns the "modern" name of a Unicode character; or the name that was defined in Unicode version 1.0, before the Unicode standard merged with ISO-10646; or an "extended" name that gives each Unicode code point a unique name.

See also: u_charName

UColAttribute

enum UColAttribute UColAttribute

Attributes that collation service understands.

All the attributes can take UCOL_DEFAULT value, as well as the values specific to each one.

UColAttributeValue

enum UColAttributeValue UColAttributeValue

Enum containing attribute values for controlling collation behavior.

Here are all the allowable values. Not every attribute can take every value. The only universal value is UCOL_DEFAULT, which resets the attribute value to the predefined value for that locale

UColBoundMode

enum UColBoundMode UColBoundMode

enum that is taken by ucol_getBound API See below for explanation do not change the values assigned to the members of this enum.

Underlying code depends on them having these numbers

UColReorderCode

enum UColReorderCode UColReorderCode

Enum containing the codes for reordering segments of the collation table that are not script codes.

These reordering codes are to be used in conjunction with the script codes. See also:ucol_getReorderCodesSee also:ucol_setReorderCodesSee also:ucol_getEquivalentReorderCodesSee also:UScriptCode

UCollationResult

enum UCollationResult UCollationResult

UCOL_LESS is returned if source string is compared to be less than target string in the ucol_strcoll() method.

UCOL_EQUAL is returned if source string is compared to be equal to target string in the ucol_strcoll() method. UCOL_GREATER is returned if source string is compared to be greater than target string in the ucol_strcoll() method. See also:ucol_strcoll() Possible values for a comparison result

UCollationStrength

UColAttributeValue UCollationStrength

Base letter represents a primary difference.

Set comparison level to UCOL_PRIMARY to ignore secondary and tertiary differences. Use this to set the strength of a Collator object. Example of primary difference, "abc" < "abd"

Diacritical differences on the same base letter represent a secondary difference. Set comparison level to UCOL_SECONDARY to ignore tertiary differences. Use this to set the strength of a Collator object. Example of secondary difference, "ä" >> "a".

Uppercase and lowercase versions of the same character represents a tertiary difference. Set comparison level to UCOL_TERTIARY to include all comparison differences. Use this to set the strength of a Collator object. Example of tertiary difference, "abc" <<< "ABC".

Two characters are considered "identical" when they have the same unicode spellings. UCOL_IDENTICAL. For example, "ä" == "ä".

UCollationStrength is also used to determine the strength of sort keys generated from UCollator objects These values can be now found in the UColAttributeValue enum.

UCollator

struct UCollator UCollator

structure representing a collator object instance

UDate

double UDate

Date and Time data type.

This is a primitive data type that holds the date and time as the number of milliseconds since 1970-jan-01, 00:00 UTC. UTC leap seconds are ignored.

UDecompositionType

enum UDecompositionType UDecompositionType

Decomposition Type constants.

See also: UCHAR_DECOMPOSITION_TYPE

UDisplayContext

enum UDisplayContext UDisplayContext

UDisplayContextType

enum UDisplayContextType UDisplayContextType

UEastAsianWidth

enum UEastAsianWidth UEastAsianWidth

East Asian Width constants.

See also: UCHAR_EAST_ASIAN_WIDTH See also: u_getIntPropertyValue

UEnumCharNamesFn

UBoolU_CALLCONV UEnumCharNamesFn(void *context, UChar32 code, UCharNameChoice nameChoice, const char *name, int32_t length)

Type of a callback function for u_enumCharNames() that gets called for each Unicode character with the code point value and the character name.

If such a function returns false, then the enumeration is stopped.

See also: UCharNameChoice See also: u_enumCharNames

Details
Parameters
context
The context pointer that was passed to u_enumCharNames().
code
The Unicode code point for the character with this name.
nameChoice
Selector for which kind of names is enumerated.
name
The character's name, zero-terminated.
length
The length of the name.
Returns
true if the enumeration should continue, false to stop it.

UEnumeration

struct UEnumeration UEnumeration

structure representing an enumeration object instance

UErrorCode

enum UErrorCode UErrorCode

Standard ICU4C error code type, a substitute for exceptions.

Initialize the UErrorCode with U_ZERO_ERROR, and check for success or failure using U_SUCCESS() or U_FAILURE():

UErrorCode errorCode = U_ZERO_ERROR;
// call ICU API that needs an error code parameter.
if (U_FAILURE(errorCode)) {
    // An error occurred. Handle it here.
}

C++ code should use icu::ErrorCode, available in unicode/errorcode.h, or a suitable subclass.

For more information, see: https://unicode-org.github.io/icu/userguide/dev/codingguidelines#details-about-icu-error-codes

Note: By convention, ICU functions that take a reference (C++) or a pointer (C) to a UErrorCode first test:

if (U_FAILURE(errorCode)) { return immediately; }

so that in a chain of such functions the first one that sets an error code causes the following ones to not perform any operations.

UGraphemeClusterBreak

enum UGraphemeClusterBreak UGraphemeClusterBreak

Grapheme Cluster Break constants.

See also: UCHAR_GRAPHEME_CLUSTER_BREAK

UHangulSyllableType

enum UHangulSyllableType UHangulSyllableType

Hangul Syllable Type constants.

See also: UCHAR_HANGUL_SYLLABLE_TYPE

UIndicPositionalCategory

enum UIndicPositionalCategory UIndicPositionalCategory

Indic Positional Category constants.

See also: UCHAR_INDIC_POSITIONAL_CATEGORY

UIndicSyllabicCategory

enum UIndicSyllabicCategory UIndicSyllabicCategory

Indic Syllabic Category constants.

See also: UCHAR_INDIC_S