ICU 59.1  59.1
uchar.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 1997-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 *
9 * File UCHAR.H
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 04/02/97 aliu Creation.
15 * 03/29/99 helena Updated for C APIs.
16 * 4/15/99 Madhu Updated for C Implementation and Javadoc
17 * 5/20/99 Madhu Added the function u_getVersion()
18 * 8/19/1999 srl Upgraded scripts to Unicode 3.0
19 * 8/27/1999 schererm UCharDirection constants: U_...
20 * 11/11/1999 weiv added u_isalnum(), cleaned comments
21 * 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion().
22 ******************************************************************************
23 */
24 
25 #ifndef UCHAR_H
26 #define UCHAR_H
27 
28 #include "unicode/utypes.h"
29 
31 
32 /*==========================================================================*/
33 /* Unicode version number */
34 /*==========================================================================*/
44 #define U_UNICODE_VERSION "9.0"
45 
126 #define UCHAR_MIN_VALUE 0
127 
136 #define UCHAR_MAX_VALUE 0x10ffff
137 
142 #define U_MASK(x) ((uint32_t)1<<(x))
143 
163 typedef enum UProperty {
164  /*
165  * Note: UProperty constants are parsed by preparseucd.py.
166  * It matches lines like
167  * UCHAR_<Unicode property name>=<integer>,
168  */
169 
170  /* Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that
171  debuggers display UCHAR_ALPHABETIC as the symbolic name for 0,
172  rather than UCHAR_BINARY_START. Likewise for other *_START
173  identifiers. */
174 
430 #ifndef U_HIDE_DEPRECATED_API
431 
436 #endif // U_HIDE_DEPRECATED_API
437 
445  UCHAR_BLOCK=0x1001,
473  UCHAR_SCRIPT=0x100A,
523 #ifndef U_HIDE_DEPRECATED_API
524 
529 #endif // U_HIDE_DEPRECATED_API
530 
542 #ifndef U_HIDE_DEPRECATED_API
543 
548 #endif // U_HIDE_DEPRECATED_API
549 
555 #ifndef U_HIDE_DEPRECATED_API
556 
561 #endif // U_HIDE_DEPRECATED_API
562 
565  UCHAR_AGE=0x4000,
574 #ifndef U_HIDE_DEPRECATED_API
575 
578 #endif /* U_HIDE_DEPRECATED_API */
579 
584  UCHAR_NAME=0x4005,
600 #ifndef U_HIDE_DEPRECATED_API
601 
606 #endif /* U_HIDE_DEPRECATED_API */
607 
613 #ifndef U_HIDE_DEPRECATED_API
614 
619 #endif // U_HIDE_DEPRECATED_API
620 
629 #ifndef U_HIDE_DEPRECATED_API
630 
635 #endif // U_HIDE_DEPRECATED_API
636 
639 } UProperty;
640 
646 typedef enum UCharCategory
647 {
648  /*
649  * Note: UCharCategory constants and their API comments are parsed by preparseucd.py.
650  * It matches pairs of lines like
651  * / ** <Unicode 2-letter General_Category value> comment... * /
652  * U_<[A-Z_]+> = <integer>,
653  */
654 
725 } UCharCategory;
726 
741 #define U_GC_CN_MASK U_MASK(U_GENERAL_OTHER_TYPES)
742 
744 #define U_GC_LU_MASK U_MASK(U_UPPERCASE_LETTER)
745 
746 #define U_GC_LL_MASK U_MASK(U_LOWERCASE_LETTER)
747 
748 #define U_GC_LT_MASK U_MASK(U_TITLECASE_LETTER)
749 
750 #define U_GC_LM_MASK U_MASK(U_MODIFIER_LETTER)
751 
752 #define U_GC_LO_MASK U_MASK(U_OTHER_LETTER)
753 
755 #define U_GC_MN_MASK U_MASK(U_NON_SPACING_MARK)
756 
757 #define U_GC_ME_MASK U_MASK(U_ENCLOSING_MARK)
758 
759 #define U_GC_MC_MASK U_MASK(U_COMBINING_SPACING_MARK)
760 
762 #define U_GC_ND_MASK U_MASK(U_DECIMAL_DIGIT_NUMBER)
763 
764 #define U_GC_NL_MASK U_MASK(U_LETTER_NUMBER)
765 
766 #define U_GC_NO_MASK U_MASK(U_OTHER_NUMBER)
767 
769 #define U_GC_ZS_MASK U_MASK(U_SPACE_SEPARATOR)
770 
771 #define U_GC_ZL_MASK U_MASK(U_LINE_SEPARATOR)
772 
773 #define U_GC_ZP_MASK U_MASK(U_PARAGRAPH_SEPARATOR)
774 
776 #define U_GC_CC_MASK U_MASK(U_CONTROL_CHAR)
777 
778 #define U_GC_CF_MASK U_MASK(U_FORMAT_CHAR)
779 
780 #define U_GC_CO_MASK U_MASK(U_PRIVATE_USE_CHAR)
781 
782 #define U_GC_CS_MASK U_MASK(U_SURROGATE)
783 
785 #define U_GC_PD_MASK U_MASK(U_DASH_PUNCTUATION)
786 
787 #define U_GC_PS_MASK U_MASK(U_START_PUNCTUATION)
788 
789 #define U_GC_PE_MASK U_MASK(U_END_PUNCTUATION)
790 
791 #define U_GC_PC_MASK U_MASK(U_CONNECTOR_PUNCTUATION)
792 
793 #define U_GC_PO_MASK U_MASK(U_OTHER_PUNCTUATION)
794 
796 #define U_GC_SM_MASK U_MASK(U_MATH_SYMBOL)
797 
798 #define U_GC_SC_MASK U_MASK(U_CURRENCY_SYMBOL)
799 
800 #define U_GC_SK_MASK U_MASK(U_MODIFIER_SYMBOL)
801 
802 #define U_GC_SO_MASK U_MASK(U_OTHER_SYMBOL)
803 
805 #define U_GC_PI_MASK U_MASK(U_INITIAL_PUNCTUATION)
806 
807 #define U_GC_PF_MASK U_MASK(U_FINAL_PUNCTUATION)
808 
809 
811 #define U_GC_L_MASK \
812  (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK)
813 
815 #define U_GC_LC_MASK \
816  (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK)
817 
819 #define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK)
820 
822 #define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK)
823 
825 #define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK)
826 
828 #define U_GC_C_MASK \
829  (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK)
830 
832 #define U_GC_P_MASK \
833  (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \
834  U_GC_PI_MASK|U_GC_PF_MASK)
835 
837 #define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK)
838 
843 typedef enum UCharDirection {
844  /*
845  * Note: UCharDirection constants and their API comments are parsed by preparseucd.py.
846  * It matches pairs of lines like
847  * / ** <Unicode 1..3-letter Bidi_Class value> comment... * /
848  * U_<[A-Z_]+> = <integer>,
849  */
850 
897 #ifndef U_HIDE_DEPRECATED_API
898 
905 #endif // U_HIDE_DEPRECATED_API
907 
915  /*
916  * Note: UBidiPairedBracketType constants are parsed by preparseucd.py.
917  * It matches lines like
918  * U_BPT_<Unicode Bidi_Paired_Bracket_Type value name>
919  */
920 
927 #ifndef U_HIDE_DEPRECATED_API
928 
934  U_BPT_COUNT /* 3 */
935 #endif // U_HIDE_DEPRECATED_API
937 
943  /*
944  * Note: UBlockCode constants are parsed by preparseucd.py.
945  * It matches lines like
946  * UBLOCK_<Unicode Block value name> = <integer>,
947  */
948 
950  UBLOCK_NO_BLOCK = 0, /*[none]*/ /* Special range indicating No_Block */
951 
953  UBLOCK_BASIC_LATIN = 1, /*[0000]*/
954 
957 
959  UBLOCK_LATIN_EXTENDED_A =3, /*[0100]*/
960 
962  UBLOCK_LATIN_EXTENDED_B =4, /*[0180]*/
963 
965  UBLOCK_IPA_EXTENSIONS =5, /*[0250]*/
966 
969 
972 
977  UBLOCK_GREEK =8, /*[0370]*/
978 
980  UBLOCK_CYRILLIC =9, /*[0400]*/
981 
983  UBLOCK_ARMENIAN =10, /*[0530]*/
984 
986  UBLOCK_HEBREW =11, /*[0590]*/
987 
989  UBLOCK_ARABIC =12, /*[0600]*/
990 
992  UBLOCK_SYRIAC =13, /*[0700]*/
993 
995  UBLOCK_THAANA =14, /*[0780]*/
996 
998  UBLOCK_DEVANAGARI =15, /*[0900]*/
999 
1001  UBLOCK_BENGALI =16, /*[0980]*/
1002 
1004  UBLOCK_GURMUKHI =17, /*[0A00]*/
1005 
1007  UBLOCK_GUJARATI =18, /*[0A80]*/
1008 
1010  UBLOCK_ORIYA =19, /*[0B00]*/
1011 
1013  UBLOCK_TAMIL =20, /*[0B80]*/
1014 
1016  UBLOCK_TELUGU =21, /*[0C00]*/
1017 
1019  UBLOCK_KANNADA =22, /*[0C80]*/
1020 
1022  UBLOCK_MALAYALAM =23, /*[0D00]*/
1023 
1025  UBLOCK_SINHALA =24, /*[0D80]*/
1026 
1028  UBLOCK_THAI =25, /*[0E00]*/
1029 
1031  UBLOCK_LAO =26, /*[0E80]*/
1032 
1034  UBLOCK_TIBETAN =27, /*[0F00]*/
1035 
1037  UBLOCK_MYANMAR =28, /*[1000]*/
1038 
1040  UBLOCK_GEORGIAN =29, /*[10A0]*/
1041 
1043  UBLOCK_HANGUL_JAMO =30, /*[1100]*/
1044 
1046  UBLOCK_ETHIOPIC =31, /*[1200]*/
1047 
1049  UBLOCK_CHEROKEE =32, /*[13A0]*/
1050 
1053 
1055  UBLOCK_OGHAM =34, /*[1680]*/
1056 
1058  UBLOCK_RUNIC =35, /*[16A0]*/
1059 
1061  UBLOCK_KHMER =36, /*[1780]*/
1062 
1064  UBLOCK_MONGOLIAN =37, /*[1800]*/
1065 
1068 
1070  UBLOCK_GREEK_EXTENDED =39, /*[1F00]*/
1071 
1074 
1077 
1079  UBLOCK_CURRENCY_SYMBOLS =42, /*[20A0]*/
1080 
1086 
1089 
1091  UBLOCK_NUMBER_FORMS =45, /*[2150]*/
1092 
1094  UBLOCK_ARROWS =46, /*[2190]*/
1095 
1098 
1101 
1103  UBLOCK_CONTROL_PICTURES =49, /*[2400]*/
1104 
1107 
1110 
1112  UBLOCK_BOX_DRAWING =52, /*[2500]*/
1113 
1115  UBLOCK_BLOCK_ELEMENTS =53, /*[2580]*/
1116 
1118  UBLOCK_GEOMETRIC_SHAPES =54, /*[25A0]*/
1119 
1122 
1124  UBLOCK_DINGBATS =56, /*[2700]*/
1125 
1127  UBLOCK_BRAILLE_PATTERNS =57, /*[2800]*/
1128 
1131 
1133  UBLOCK_KANGXI_RADICALS =59, /*[2F00]*/
1134 
1137 
1140 
1142  UBLOCK_HIRAGANA =62, /*[3040]*/
1143 
1145  UBLOCK_KATAKANA =63, /*[30A0]*/
1146 
1148  UBLOCK_BOPOMOFO =64, /*[3100]*/
1149 
1152 
1154  UBLOCK_KANBUN =66, /*[3190]*/
1155 
1158 
1161 
1164 
1167 
1170 
1172  UBLOCK_YI_SYLLABLES =72, /*[A000]*/
1173 
1175  UBLOCK_YI_RADICALS =73, /*[A490]*/
1176 
1178  UBLOCK_HANGUL_SYLLABLES =74, /*[AC00]*/
1179 
1181  UBLOCK_HIGH_SURROGATES =75, /*[D800]*/
1182 
1185 
1187  UBLOCK_LOW_SURROGATES =77, /*[DC00]*/
1188 
1198  UBLOCK_PRIVATE_USE_AREA =78, /*[E000]*/
1209 
1212 
1215 
1218 
1221 
1224 
1227 
1230 
1232  UBLOCK_SPECIALS =86, /*[FFF0]*/
1233 
1236 
1237  /* New blocks in Unicode 3.1 */
1238 
1240  UBLOCK_OLD_ITALIC = 88, /*[10300]*/
1242  UBLOCK_GOTHIC = 89, /*[10330]*/
1244  UBLOCK_DESERET = 90, /*[10400]*/
1248  UBLOCK_MUSICAL_SYMBOLS = 92, /*[1D100]*/
1256  UBLOCK_TAGS = 96, /*[E0000]*/
1257 
1258  /* New blocks in Unicode 3.2 */
1259 
1268  UBLOCK_TAGALOG = 98, /*[1700]*/
1270  UBLOCK_HANUNOO = 99, /*[1720]*/
1272  UBLOCK_BUHID = 100, /*[1740]*/
1274  UBLOCK_TAGBANWA = 101, /*[1760]*/
1288  UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/
1293 
1294  /* New blocks in Unicode 4 */
1295 
1297  UBLOCK_LIMBU = 111, /*[1900]*/
1299  UBLOCK_TAI_LE = 112, /*[1950]*/
1301  UBLOCK_KHMER_SYMBOLS = 113, /*[19E0]*/
1303  UBLOCK_PHONETIC_EXTENSIONS = 114, /*[1D00]*/
1309  UBLOCK_LINEAR_B_SYLLABARY = 117, /*[10000]*/
1311  UBLOCK_LINEAR_B_IDEOGRAMS = 118, /*[10080]*/
1313  UBLOCK_AEGEAN_NUMBERS = 119, /*[10100]*/
1315  UBLOCK_UGARITIC = 120, /*[10380]*/
1317  UBLOCK_SHAVIAN = 121, /*[10450]*/
1319  UBLOCK_OSMANYA = 122, /*[10480]*/
1321  UBLOCK_CYPRIOT_SYLLABARY = 123, /*[10800]*/
1323  UBLOCK_TAI_XUAN_JING_SYMBOLS = 124, /*[1D300]*/
1326 
1327  /* New blocks in Unicode 4.1 */
1328 
1332  UBLOCK_ANCIENT_GREEK_NUMBERS = 127, /*[10140]*/
1334  UBLOCK_ARABIC_SUPPLEMENT = 128, /*[0750]*/
1336  UBLOCK_BUGINESE = 129, /*[1A00]*/
1338  UBLOCK_CJK_STROKES = 130, /*[31C0]*/
1342  UBLOCK_COPTIC = 132, /*[2C80]*/
1344  UBLOCK_ETHIOPIC_EXTENDED = 133, /*[2D80]*/
1346  UBLOCK_ETHIOPIC_SUPPLEMENT = 134, /*[1380]*/
1348  UBLOCK_GEORGIAN_SUPPLEMENT = 135, /*[2D00]*/
1350  UBLOCK_GLAGOLITIC = 136, /*[2C00]*/
1352  UBLOCK_KHAROSHTHI = 137, /*[10A00]*/
1356  UBLOCK_NEW_TAI_LUE = 139, /*[1980]*/
1358  UBLOCK_OLD_PERSIAN = 140, /*[103A0]*/
1364  UBLOCK_SYLOTI_NAGRI = 143, /*[A800]*/
1366  UBLOCK_TIFINAGH = 144, /*[2D30]*/
1368  UBLOCK_VERTICAL_FORMS = 145, /*[FE10]*/
1369 
1370  /* New blocks in Unicode 5.0 */
1371 
1373  UBLOCK_NKO = 146, /*[07C0]*/
1375  UBLOCK_BALINESE = 147, /*[1B00]*/
1377  UBLOCK_LATIN_EXTENDED_C = 148, /*[2C60]*/
1379  UBLOCK_LATIN_EXTENDED_D = 149, /*[A720]*/
1381  UBLOCK_PHAGS_PA = 150, /*[A840]*/
1383  UBLOCK_PHOENICIAN = 151, /*[10900]*/
1385  UBLOCK_CUNEIFORM = 152, /*[12000]*/
1389  UBLOCK_COUNTING_ROD_NUMERALS = 154, /*[1D360]*/
1390 
1391  /* New blocks in Unicode 5.1 */
1392 
1394  UBLOCK_SUNDANESE = 155, /*[1B80]*/
1396  UBLOCK_LEPCHA = 156, /*[1C00]*/
1398  UBLOCK_OL_CHIKI = 157, /*[1C50]*/
1400  UBLOCK_CYRILLIC_EXTENDED_A = 158, /*[2DE0]*/
1402  UBLOCK_VAI = 159, /*[A500]*/
1404  UBLOCK_CYRILLIC_EXTENDED_B = 160, /*[A640]*/
1406  UBLOCK_SAURASHTRA = 161, /*[A880]*/
1408  UBLOCK_KAYAH_LI = 162, /*[A900]*/
1410  UBLOCK_REJANG = 163, /*[A930]*/
1412  UBLOCK_CHAM = 164, /*[AA00]*/
1414  UBLOCK_ANCIENT_SYMBOLS = 165, /*[10190]*/
1416  UBLOCK_PHAISTOS_DISC = 166, /*[101D0]*/
1418  UBLOCK_LYCIAN = 167, /*[10280]*/
1420  UBLOCK_CARIAN = 168, /*[102A0]*/
1422  UBLOCK_LYDIAN = 169, /*[10920]*/
1424  UBLOCK_MAHJONG_TILES = 170, /*[1F000]*/
1426  UBLOCK_DOMINO_TILES = 171, /*[1F030]*/
1427 
1428  /* New blocks in Unicode 5.2 */
1429 
1431  UBLOCK_SAMARITAN = 172, /*[0800]*/
1435  UBLOCK_TAI_THAM = 174, /*[1A20]*/
1437  UBLOCK_VEDIC_EXTENSIONS = 175, /*[1CD0]*/
1439  UBLOCK_LISU = 176, /*[A4D0]*/
1441  UBLOCK_BAMUM = 177, /*[A6A0]*/
1445  UBLOCK_DEVANAGARI_EXTENDED = 179, /*[A8E0]*/
1449  UBLOCK_JAVANESE = 181, /*[A980]*/
1451  UBLOCK_MYANMAR_EXTENDED_A = 182, /*[AA60]*/
1453  UBLOCK_TAI_VIET = 183, /*[AA80]*/
1455  UBLOCK_MEETEI_MAYEK = 184, /*[ABC0]*/
1459  UBLOCK_IMPERIAL_ARAMAIC = 186, /*[10840]*/
1461  UBLOCK_OLD_SOUTH_ARABIAN = 187, /*[10A60]*/
1463  UBLOCK_AVESTAN = 188, /*[10B00]*/
1467  UBLOCK_INSCRIPTIONAL_PAHLAVI = 190, /*[10B60]*/
1469  UBLOCK_OLD_TURKIC = 191, /*[10C00]*/
1471  UBLOCK_RUMI_NUMERAL_SYMBOLS = 192, /*[10E60]*/
1473  UBLOCK_KAITHI = 193, /*[11080]*/
1475  UBLOCK_EGYPTIAN_HIEROGLYPHS = 194, /*[13000]*/
1482 
1483  /* New blocks in Unicode 6.0 */
1484 
1486  UBLOCK_MANDAIC = 198, /*[0840]*/
1488  UBLOCK_BATAK = 199, /*[1BC0]*/
1490  UBLOCK_ETHIOPIC_EXTENDED_A = 200, /*[AB00]*/
1492  UBLOCK_BRAHMI = 201, /*[11000]*/
1494  UBLOCK_BAMUM_SUPPLEMENT = 202, /*[16800]*/
1496  UBLOCK_KANA_SUPPLEMENT = 203, /*[1B000]*/
1498  UBLOCK_PLAYING_CARDS = 204, /*[1F0A0]*/
1502  UBLOCK_EMOTICONS = 206, /*[1F600]*/
1506  UBLOCK_ALCHEMICAL_SYMBOLS = 208, /*[1F700]*/
1509 
1510  /* New blocks in Unicode 6.1 */
1511 
1513  UBLOCK_ARABIC_EXTENDED_A = 210, /*[08A0]*/
1517  UBLOCK_CHAKMA = 212, /*[11100]*/
1521  UBLOCK_MEROITIC_CURSIVE = 214, /*[109A0]*/
1523  UBLOCK_MEROITIC_HIEROGLYPHS = 215, /*[10980]*/
1525  UBLOCK_MIAO = 216, /*[16F00]*/
1527  UBLOCK_SHARADA = 217, /*[11180]*/
1529  UBLOCK_SORA_SOMPENG = 218, /*[110D0]*/
1533  UBLOCK_TAKRI = 220, /*[11680]*/
1534 
1535  /* New blocks in Unicode 7.0 */
1536 
1538  UBLOCK_BASSA_VAH = 221, /*[16AD0]*/
1540  UBLOCK_CAUCASIAN_ALBANIAN = 222, /*[10530]*/
1542  UBLOCK_COPTIC_EPACT_NUMBERS = 223, /*[102E0]*/
1546  UBLOCK_DUPLOYAN = 225, /*[1BC00]*/
1548  UBLOCK_ELBASAN = 226, /*[10500]*/
1552  UBLOCK_GRANTHA = 228, /*[11300]*/
1554  UBLOCK_KHOJKI = 229, /*[11200]*/
1556  UBLOCK_KHUDAWADI = 230, /*[112B0]*/
1558  UBLOCK_LATIN_EXTENDED_E = 231, /*[AB30]*/
1560  UBLOCK_LINEAR_A = 232, /*[10600]*/
1562  UBLOCK_MAHAJANI = 233, /*[11150]*/
1564  UBLOCK_MANICHAEAN = 234, /*[10AC0]*/
1566  UBLOCK_MENDE_KIKAKUI = 235, /*[1E800]*/
1568  UBLOCK_MODI = 236, /*[11600]*/
1570  UBLOCK_MRO = 237, /*[16A40]*/
1572  UBLOCK_MYANMAR_EXTENDED_B = 238, /*[A9E0]*/
1574  UBLOCK_NABATAEAN = 239, /*[10880]*/
1576  UBLOCK_OLD_NORTH_ARABIAN = 240, /*[10A80]*/
1578  UBLOCK_OLD_PERMIC = 241, /*[10350]*/
1580  UBLOCK_ORNAMENTAL_DINGBATS = 242, /*[1F650]*/
1582  UBLOCK_PAHAWH_HMONG = 243, /*[16B00]*/
1584  UBLOCK_PALMYRENE = 244, /*[10860]*/
1586  UBLOCK_PAU_CIN_HAU = 245, /*[11AC0]*/
1588  UBLOCK_PSALTER_PAHLAVI = 246, /*[10B80]*/
1592  UBLOCK_SIDDHAM = 248, /*[11580]*/
1596  UBLOCK_SUPPLEMENTAL_ARROWS_C = 250, /*[1F800]*/
1598  UBLOCK_TIRHUTA = 251, /*[11480]*/
1600  UBLOCK_WARANG_CITI = 252, /*[118A0]*/
1601 
1602  /* New blocks in Unicode 8.0 */
1603 
1605  UBLOCK_AHOM = 253, /*[11700]*/
1607  UBLOCK_ANATOLIAN_HIEROGLYPHS = 254, /*[14400]*/
1609  UBLOCK_CHEROKEE_SUPPLEMENT = 255, /*[AB70]*/
1615  UBLOCK_HATRAN = 258, /*[108E0]*/
1617  UBLOCK_MULTANI = 259, /*[11280]*/
1619  UBLOCK_OLD_HUNGARIAN = 260, /*[10C80]*/
1623  UBLOCK_SUTTON_SIGNWRITING = 262, /*[1D800]*/
1624 
1625  /* New blocks in Unicode 9.0 */
1626 
1628  UBLOCK_ADLAM = 263, /*[1E900]*/
1630  UBLOCK_BHAIKSUKI = 264, /*[11C00]*/
1632  UBLOCK_CYRILLIC_EXTENDED_C = 265, /*[1C80]*/
1634  UBLOCK_GLAGOLITIC_SUPPLEMENT = 266, /*[1E000]*/
1638  UBLOCK_MARCHEN = 268, /*[11C70]*/
1640  UBLOCK_MONGOLIAN_SUPPLEMENT = 269, /*[11660]*/
1642  UBLOCK_NEWA = 270, /*[11400]*/
1644  UBLOCK_OSAGE = 271, /*[104B0]*/
1646  UBLOCK_TANGUT = 272, /*[17000]*/
1648  UBLOCK_TANGUT_COMPONENTS = 273, /*[18800]*/
1649 
1650 #ifndef U_HIDE_DEPRECATED_API
1651 
1658 #endif // U_HIDE_DEPRECATED_API
1659 
1662 };
1663 
1665 typedef enum UBlockCode UBlockCode;
1666 
1674 typedef enum UEastAsianWidth {
1675  /*
1676  * Note: UEastAsianWidth constants are parsed by preparseucd.py.
1677  * It matches lines like
1678  * U_EA_<Unicode East_Asian_Width value name>
1679  */
1680 
1681  U_EA_NEUTRAL, /*[N]*/
1682  U_EA_AMBIGUOUS, /*[A]*/
1683  U_EA_HALFWIDTH, /*[H]*/
1684  U_EA_FULLWIDTH, /*[F]*/
1685  U_EA_NARROW, /*[Na]*/
1686  U_EA_WIDE, /*[W]*/
1687 #ifndef U_HIDE_DEPRECATED_API
1688 
1695 #endif // U_HIDE_DEPRECATED_API
1696 } UEastAsianWidth;
1697 
1709 typedef enum UCharNameChoice {
1712 #ifndef U_HIDE_DEPRECATED_API
1713 
1719 #endif /* U_HIDE_DEPRECATED_API */
1720 
1724 #ifndef U_HIDE_DEPRECATED_API
1725 
1730 #endif // U_HIDE_DEPRECATED_API
1731 } UCharNameChoice;
1732 
1746 typedef enum UPropertyNameChoice {
1747  U_SHORT_PROPERTY_NAME,
1748  U_LONG_PROPERTY_NAME,
1749 #ifndef U_HIDE_DEPRECATED_API
1750 
1755 #endif // U_HIDE_DEPRECATED_API
1757 
1764 typedef enum UDecompositionType {
1765  /*
1766  * Note: UDecompositionType constants are parsed by preparseucd.py.
1767  * It matches lines like
1768  * U_DT_<Unicode Decomposition_Type value name>
1769  */
1770 
1771  U_DT_NONE, /*[none]*/
1772  U_DT_CANONICAL, /*[can]*/
1773  U_DT_COMPAT, /*[com]*/
1774  U_DT_CIRCLE, /*[enc]*/
1775  U_DT_FINAL, /*[fin]*/
1776  U_DT_FONT, /*[font]*/
1777  U_DT_FRACTION, /*[fra]*/
1778  U_DT_INITIAL, /*[init]*/
1779  U_DT_ISOLATED, /*[iso]*/
1780  U_DT_MEDIAL, /*[med]*/
1781  U_DT_NARROW, /*[nar]*/
1782  U_DT_NOBREAK, /*[nb]*/
1783  U_DT_SMALL, /*[sml]*/
1784  U_DT_SQUARE, /*[sqr]*/
1785  U_DT_SUB, /*[sub]*/
1786  U_DT_SUPER, /*[sup]*/
1787  U_DT_VERTICAL, /*[vert]*/
1788  U_DT_WIDE, /*[wide]*/
1789 #ifndef U_HIDE_DEPRECATED_API
1790 
1796  U_DT_COUNT /* 18 */
1797 #endif // U_HIDE_DEPRECATED_API
1799 
1806 typedef enum UJoiningType {
1807  /*
1808  * Note: UJoiningType constants are parsed by preparseucd.py.
1809  * It matches lines like
1810  * U_JT_<Unicode Joining_Type value name>
1811  */
1812 
1813  U_JT_NON_JOINING, /*[U]*/
1814  U_JT_JOIN_CAUSING, /*[C]*/
1815  U_JT_DUAL_JOINING, /*[D]*/
1816  U_JT_LEFT_JOINING, /*[L]*/
1817  U_JT_RIGHT_JOINING, /*[R]*/
1818  U_JT_TRANSPARENT, /*[T]*/
1819 #ifndef U_HIDE_DEPRECATED_API
1820 
1826  U_JT_COUNT /* 6 */
1827 #endif // U_HIDE_DEPRECATED_API
1828 } UJoiningType;
1829 
1836 typedef enum UJoiningGroup {
1837  /*
1838  * Note: UJoiningGroup constants are parsed by preparseucd.py.
1839  * It matches lines like
1840  * U_JG_<Unicode Joining_Group value name>
1841  */
1842 
1843  U_JG_NO_JOINING_GROUP,
1844  U_JG_AIN,
1845  U_JG_ALAPH,
1846  U_JG_ALEF,
1847  U_JG_BEH,
1848  U_JG_BETH,
1849  U_JG_DAL,
1850  U_JG_DALATH_RISH,
1851  U_JG_E,
1852  U_JG_FEH,
1853  U_JG_FINAL_SEMKATH,
1854  U_JG_GAF,
1855  U_JG_GAMAL,
1856  U_JG_HAH,
1858  U_JG_HAMZA_ON_HEH_GOAL=U_JG_TEH_MARBUTA_GOAL,
1859  U_JG_HE,
1860  U_JG_HEH,
1861  U_JG_HEH_GOAL,
1862  U_JG_HETH,
1863  U_JG_KAF,
1864  U_JG_KAPH,
1865  U_JG_KNOTTED_HEH,
1866  U_JG_LAM,
1867  U_JG_LAMADH,
1868  U_JG_MEEM,
1869  U_JG_MIM,
1870  U_JG_NOON,
1871  U_JG_NUN,
1872  U_JG_PE,
1873  U_JG_QAF,
1874  U_JG_QAPH,
1875  U_JG_REH,
1876  U_JG_REVERSED_PE,
1877  U_JG_SAD,
1878  U_JG_SADHE,
1879  U_JG_SEEN,
1880  U_JG_SEMKATH,
1881  U_JG_SHIN,
1882  U_JG_SWASH_KAF,
1883  U_JG_SYRIAC_WAW,
1884  U_JG_TAH,
1885  U_JG_TAW,
1886  U_JG_TEH_MARBUTA,
1887  U_JG_TETH,
1888  U_JG_WAW,
1889  U_JG_YEH,
1890  U_JG_YEH_BARREE,
1891  U_JG_YEH_WITH_TAIL,
1892  U_JG_YUDH,
1893  U_JG_YUDH_HE,
1894  U_JG_ZAIN,
1933 #ifndef U_HIDE_DEPRECATED_API
1934 
1941 #endif // U_HIDE_DEPRECATED_API
1942 } UJoiningGroup;
1943 
1951  /*
1952  * Note: UGraphemeClusterBreak constants are parsed by preparseucd.py.
1953  * It matches lines like
1954  * U_GCB_<Unicode Grapheme_Cluster_Break value name>
1955  */
1956 
1957  U_GCB_OTHER = 0, /*[XX]*/
1958  U_GCB_CONTROL = 1, /*[CN]*/
1959  U_GCB_CR = 2, /*[CR]*/
1960  U_GCB_EXTEND = 3, /*[EX]*/
1961  U_GCB_L = 4, /*[L]*/
1962  U_GCB_LF = 5, /*[LF]*/
1963  U_GCB_LV = 6, /*[LV]*/
1964  U_GCB_LVT = 7, /*[LVT]*/
1965  U_GCB_T = 8, /*[T]*/
1966  U_GCB_V = 9, /*[V]*/
1968  U_GCB_SPACING_MARK = 10, /*[SM]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
1970  U_GCB_PREPEND = 11, /*[PP]*/
1972  U_GCB_REGIONAL_INDICATOR = 12, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
1974  U_GCB_E_BASE = 13, /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
1976  U_GCB_E_BASE_GAZ = 14, /*[EBG]*/
1978  U_GCB_E_MODIFIER = 15, /*[EM]*/
1980  U_GCB_GLUE_AFTER_ZWJ = 16, /*[GAZ]*/
1982  U_GCB_ZWJ = 17, /*[ZWJ]*/
1983 #ifndef U_HIDE_DEPRECATED_API
1984 
1991 #endif // U_HIDE_DEPRECATED_API
1993 
2001 typedef enum UWordBreakValues {
2002  /*
2003  * Note: UWordBreakValues constants are parsed by preparseucd.py.
2004  * It matches lines like
2005  * U_WB_<Unicode Word_Break value name>
2006  */
2007 
2008  U_WB_OTHER = 0, /*[XX]*/
2009  U_WB_ALETTER = 1, /*[LE]*/
2010  U_WB_FORMAT = 2, /*[FO]*/
2011  U_WB_KATAKANA = 3, /*[KA]*/
2012  U_WB_MIDLETTER = 4, /*[ML]*/
2013  U_WB_MIDNUM = 5, /*[MN]*/
2014  U_WB_NUMERIC = 6, /*[NU]*/
2015  U_WB_EXTENDNUMLET = 7, /*[EX]*/
2017  U_WB_CR = 8, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
2019  U_WB_EXTEND = 9, /*[Extend]*/
2021  U_WB_LF = 10, /*[LF]*/
2023  U_WB_MIDNUMLET =11, /*[MB]*/
2025  U_WB_NEWLINE =12, /*[NL]*/
2027  U_WB_REGIONAL_INDICATOR = 13, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
2029  U_WB_HEBREW_LETTER = 14, /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */
2031  U_WB_SINGLE_QUOTE = 15, /*[SQ]*/
2033  U_WB_DOUBLE_QUOTE = 16, /*[DQ]*/
2035  U_WB_E_BASE = 17, /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
2037  U_WB_E_BASE_GAZ = 18, /*[EBG]*/
2039  U_WB_E_MODIFIER = 19, /*[EM]*/
2041  U_WB_GLUE_AFTER_ZWJ = 20, /*[GAZ]*/
2043  U_WB_ZWJ = 21, /*[ZWJ]*/
2044 #ifndef U_HIDE_DEPRECATED_API
2045 
2052 #endif // U_HIDE_DEPRECATED_API
2054 
2061 typedef enum USentenceBreak {
2062  /*
2063  * Note: USentenceBreak constants are parsed by preparseucd.py.
2064  * It matches lines like
2065  * U_SB_<Unicode Sentence_Break value name>
2066  */
2067 
2068  U_SB_OTHER = 0, /*[XX]*/
2069  U_SB_ATERM = 1, /*[AT]*/
2070  U_SB_CLOSE = 2, /*[CL]*/
2071  U_SB_FORMAT = 3, /*[FO]*/
2072  U_SB_LOWER = 4, /*[LO]*/
2073  U_SB_NUMERIC = 5, /*[NU]*/
2074  U_SB_OLETTER = 6, /*[LE]*/
2075  U_SB_SEP = 7, /*[SE]*/
2076  U_SB_SP = 8, /*[SP]*/
2077  U_SB_STERM = 9, /*[ST]*/
2078  U_SB_UPPER = 10, /*[UP]*/
2079  U_SB_CR = 11, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
2080  U_SB_EXTEND = 12, /*[EX]*/
2081  U_SB_LF = 13, /*[LF]*/
2082  U_SB_SCONTINUE = 14, /*[SC]*/
2083 #ifndef U_HIDE_DEPRECATED_API
2084 
2091 #endif // U_HIDE_DEPRECATED_API
2092 } USentenceBreak;
2093 
2100 typedef enum ULineBreak {
2101  /*
2102  * Note: ULineBreak constants are parsed by preparseucd.py.
2103  * It matches lines like
2104  * U_LB_<Unicode Line_Break value name>
2105  */
2106 
2107  U_LB_UNKNOWN = 0, /*[XX]*/
2108  U_LB_AMBIGUOUS = 1, /*[AI]*/
2109  U_LB_ALPHABETIC = 2, /*[AL]*/
2110  U_LB_BREAK_BOTH = 3, /*[B2]*/
2111  U_LB_BREAK_AFTER = 4, /*[BA]*/
2112  U_LB_BREAK_BEFORE = 5, /*[BB]*/
2113  U_LB_MANDATORY_BREAK = 6, /*[BK]*/
2114  U_LB_CONTINGENT_BREAK = 7, /*[CB]*/
2115  U_LB_CLOSE_PUNCTUATION = 8, /*[CL]*/
2116  U_LB_COMBINING_MARK = 9, /*[CM]*/
2117  U_LB_CARRIAGE_RETURN = 10, /*[CR]*/
2118  U_LB_EXCLAMATION = 11, /*[EX]*/
2119  U_LB_GLUE = 12, /*[GL]*/
2120  U_LB_HYPHEN = 13, /*[HY]*/
2121  U_LB_IDEOGRAPHIC = 14, /*[ID]*/
2123  U_LB_INSEPARABLE = 15, /*[IN]*/
2124  U_LB_INSEPERABLE = U_LB_INSEPARABLE,
2125  U_LB_INFIX_NUMERIC = 16, /*[IS]*/
2126  U_LB_LINE_FEED = 17, /*[LF]*/
2127  U_LB_NONSTARTER = 18, /*[NS]*/
2128  U_LB_NUMERIC = 19, /*[NU]*/
2129  U_LB_OPEN_PUNCTUATION = 20, /*[OP]*/
2130  U_LB_POSTFIX_NUMERIC = 21, /*[PO]*/
2131  U_LB_PREFIX_NUMERIC = 22, /*[PR]*/
2132  U_LB_QUOTATION = 23, /*[QU]*/
2133  U_LB_COMPLEX_CONTEXT = 24, /*[SA]*/
2134  U_LB_SURROGATE = 25, /*[SG]*/
2135  U_LB_SPACE = 26, /*[SP]*/
2136  U_LB_BREAK_SYMBOLS = 27, /*[SY]*/
2137  U_LB_ZWSPACE = 28, /*[ZW]*/
2139  U_LB_NEXT_LINE = 29, /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
2141  U_LB_WORD_JOINER = 30, /*[WJ]*/
2143  U_LB_H2 = 31, /*[H2]*/ /* from here on: new in Unicode 4.1/ICU 3.4 */
2145  U_LB_H3 = 32, /*[H3]*/
2147  U_LB_JL = 33, /*[JL]*/
2149  U_LB_JT = 34, /*[JT]*/
2151  U_LB_JV = 35, /*[JV]*/
2153  U_LB_CLOSE_PARENTHESIS = 36, /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */
2155  U_LB_CONDITIONAL_JAPANESE_STARTER = 37,/*[CJ]*/ /* new in Unicode 6.1/ICU 49 */
2157  U_LB_HEBREW_LETTER = 38, /*[HL]*/ /* new in Unicode 6.1/ICU 49 */
2159  U_LB_REGIONAL_INDICATOR = 39,/*[RI]*/ /* new in Unicode 6.2/ICU 50 */
2161  U_LB_E_BASE = 40, /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
2163  U_LB_E_MODIFIER = 41, /*[EM]*/
2165  U_LB_ZWJ = 42, /*[ZWJ]*/
2166 #ifndef U_HIDE_DEPRECATED_API
2167 
2174 #endif // U_HIDE_DEPRECATED_API
2175 } ULineBreak;
2176 
2183 typedef enum UNumericType {
2184  /*
2185  * Note: UNumericType constants are parsed by preparseucd.py.
2186  * It matches lines like
2187  * U_NT_<Unicode Numeric_Type value name>
2188  */
2189 
2190  U_NT_NONE, /*[None]*/
2191  U_NT_DECIMAL, /*[de]*/
2192  U_NT_DIGIT, /*[di]*/
2193  U_NT_NUMERIC, /*[nu]*/
2194 #ifndef U_HIDE_DEPRECATED_API
2195 
2202 #endif // U_HIDE_DEPRECATED_API
2203 } UNumericType;
2204 
2211 typedef enum UHangulSyllableType {
2212  /*
2213  * Note: UHangulSyllableType constants are parsed by preparseucd.py.
2214  * It matches lines like
2215  * U_HST_<Unicode Hangul_Syllable_Type value name>
2216  */
2217 
2218  U_HST_NOT_APPLICABLE, /*[NA]*/
2219  U_HST_LEADING_JAMO, /*[L]*/
2220  U_HST_VOWEL_JAMO, /*[V]*/
2221  U_HST_TRAILING_JAMO, /*[T]*/
2222  U_HST_LV_SYLLABLE, /*[LV]*/
2223  U_HST_LVT_SYLLABLE, /*[LVT]*/
2224 #ifndef U_HIDE_DEPRECATED_API
2225 
2232 #endif // U_HIDE_DEPRECATED_API
2234 
2261 U_STABLE UBool U_EXPORT2
2263 
2276 U_STABLE UBool U_EXPORT2
2278 
2291 U_STABLE UBool U_EXPORT2
2293 
2306 U_STABLE UBool U_EXPORT2
2308 
2327 U_STABLE UBool U_EXPORT2
2329 
2367 U_STABLE int32_t U_EXPORT2
2369 
2388 U_STABLE int32_t U_EXPORT2
2390 
2417 U_STABLE int32_t U_EXPORT2
2419 
2442 U_STABLE double U_EXPORT2
2444 
2452 #define U_NO_NUMERIC_VALUE ((double)-123456789.)
2453 
2477 U_STABLE UBool U_EXPORT2
2478 u_islower(UChar32 c);
2479 
2504 U_STABLE UBool U_EXPORT2
2505 u_isupper(UChar32 c);
2506 
2521 U_STABLE UBool U_EXPORT2
2522 u_istitle(UChar32 c);
2523 
2542 U_STABLE UBool U_EXPORT2
2543 u_isdigit(UChar32 c);
2544 
2563 U_STABLE UBool U_EXPORT2
2564 u_isalpha(UChar32 c);
2565 
2584 U_STABLE UBool U_EXPORT2
2585 u_isalnum(UChar32 c);
2586 
2607 U_STABLE UBool U_EXPORT2
2608 u_isxdigit(UChar32 c);
2609 
2623 U_STABLE UBool U_EXPORT2
2624 u_ispunct(UChar32 c);
2625 
2642 U_STABLE UBool U_EXPORT2
2643 u_isgraph(UChar32 c);
2644 
2671 U_STABLE UBool U_EXPORT2
2672 u_isblank(UChar32 c);
2673 
2696 U_STABLE UBool U_EXPORT2
2697 u_isdefined(UChar32 c);
2698 
2717 U_STABLE UBool U_EXPORT2
2718 u_isspace(UChar32 c);
2719 
2738 U_STABLE UBool U_EXPORT2
2740 
2778 U_STABLE UBool U_EXPORT2
2780 
2802 U_STABLE UBool U_EXPORT2
2803 u_iscntrl(UChar32 c);
2804 
2817 U_STABLE UBool U_EXPORT2
2819 
2835 U_STABLE UBool U_EXPORT2
2836 u_isprint(UChar32 c);
2837 
2856 U_STABLE UBool U_EXPORT2
2857 u_isbase(UChar32 c);
2858 
2875 U_STABLE UCharDirection U_EXPORT2
2877 
2893 U_STABLE UBool U_EXPORT2
2895 
2915 U_STABLE UChar32 U_EXPORT2
2917 
2934 U_STABLE UChar32 U_EXPORT2
2936 
2948 U_STABLE int8_t U_EXPORT2
2949 u_charType(UChar32 c);
2950 
2964 #define U_GET_GC_MASK(c) U_MASK(u_charType(c))
2965 
2983 typedef UBool U_CALLCONV
2984 UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type);
2985 
3005 U_STABLE void U_EXPORT2
3006 u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context);
3007 
3008 #if !UCONFIG_NO_NORMALIZATION
3009 
3017 U_STABLE uint8_t U_EXPORT2
3019 
3020 #endif
3021 
3045 U_STABLE int32_t U_EXPORT2
3047 
3057 U_STABLE UBlockCode U_EXPORT2
3059 
3092 U_STABLE int32_t U_EXPORT2
3093 u_charName(UChar32 code, UCharNameChoice nameChoice,
3094  char *buffer, int32_t bufferLength,
3095  UErrorCode *pErrorCode);
3096 
3097 #ifndef U_HIDE_DEPRECATED_API
3098 
3116 U_DEPRECATED int32_t U_EXPORT2
3118  char *dest, int32_t destCapacity,
3119  UErrorCode *pErrorCode);
3120 #endif /* U_HIDE_DEPRECATED_API */
3121 
3142 U_STABLE UChar32 U_EXPORT2
3143 u_charFromName(UCharNameChoice nameChoice,
3144  const char *name,
3145  UErrorCode *pErrorCode);
3146 
3164 typedef UBool U_CALLCONV UEnumCharNamesFn(void *context,
3165  UChar32 code,
3166  UCharNameChoice nameChoice,
3167  const char *name,
3168  int32_t length);
3169 
3191 U_STABLE void U_EXPORT2
3192 u_enumCharNames(UChar32 start, UChar32 limit,
3193  UEnumCharNamesFn *fn,
3194  void *context,
3195  UCharNameChoice nameChoice,
3196  UErrorCode *pErrorCode);
3197 
3229 U_STABLE const char* U_EXPORT2
3230 u_getPropertyName(UProperty property,
3231  UPropertyNameChoice nameChoice);
3232 
3252 U_STABLE UProperty U_EXPORT2
3253 u_getPropertyEnum(const char* alias);
3254 
3302 U_STABLE const char* U_EXPORT2
3304  int32_t value,
3305  UPropertyNameChoice nameChoice);
3306 
3338 U_STABLE int32_t U_EXPORT2
3340  const char* alias);
3341 
3359 U_STABLE UBool U_EXPORT2
3360 u_isIDStart(UChar32 c);
3361 
3383 U_STABLE UBool U_EXPORT2
3384 u_isIDPart(UChar32 c);
3385 
3406 U_STABLE UBool U_EXPORT2
3408 
3425 U_STABLE UBool U_EXPORT2
3427 
3446 U_STABLE UBool U_EXPORT2
3448 
3471 U_STABLE UChar32 U_EXPORT2
3472 u_tolower(UChar32 c);
3473 
3496 U_STABLE UChar32 U_EXPORT2
3497 u_toupper(UChar32 c);
3498 
3521 U_STABLE UChar32 U_EXPORT2
3522 u_totitle(UChar32 c);
3523 
3525 #define U_FOLD_CASE_DEFAULT 0
3526 
3543 #define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
3544 
3567 U_STABLE UChar32 U_EXPORT2
3568 u_foldCase(UChar32 c, uint32_t options);
3569 
3608 U_STABLE int32_t U_EXPORT2
3609 u_digit(UChar32 ch, int8_t radix);
3610 
3639 U_STABLE UChar32 U_EXPORT2
3640 u_forDigit(int32_t digit, int8_t radix);
3641 
3656 U_STABLE void U_EXPORT2
3657 u_charAge(UChar32 c, UVersionInfo versionArray);
3658 
3670 U_STABLE void U_EXPORT2
3671 u_getUnicodeVersion(UVersionInfo versionArray);
3672 
3673 #if !UCONFIG_NO_NORMALIZATION
3674 
3695 U_STABLE int32_t U_EXPORT2
3696 u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode);
3697 
3698 #endif
3699 
3700 
3702 
3703 #endif /*_UCHAR*/
3704 /*eof*/
uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]
The binary form of a version on ICU APIs is an array of 4 uint8_t.
Definition: uversion.h:59
Binary property Ideographic.
Definition: uchar.h:239
Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
Definition: uchar.h:1266
Binary property Changes_When_Lowercased.
Definition: uchar.h:391
Binary property IDS_Binary_Operator (new in Unicode 3.2).
Definition: uchar.h:243
Binary property Case_Ignorable.
Definition: uchar.h:389
UBool u_isUWhiteSpace(UChar32 c)
Check if a code point has the White_Space Unicode property.
UBool u_istitle(UChar32 c)
Determines whether the specified code point is a titlecase letter.
Enumerated property NFC_Quick_Check.
Definition: uchar.h:485
UChar32 u_totitle(UChar32 c)
The given character is mapped to its titlecase equivalent according to UnicodeData.txt; if none is defined, the character itself is returned.
One more than the highest normal UJoiningGroup value.
Definition: uchar.h:1940
Miscellaneous property Script_Extensions (new in Unicode 6.0).
Definition: uchar.h:626
const char * u_getPropertyName(UProperty property, UPropertyNameChoice nameChoice)
Return the Unicode name for a given property, as given in the Unicode database file PropertyAliases...
Same as UBLOCK_PRIVATE_USE_AREA.
Definition: uchar.h:1208
UChar32 u_foldCase(UChar32 c, uint32_t options)
The given character is mapped to its case folding equivalent according to UnicodeData.txt and CaseFolding.txt; if the character has no case folding equivalent, the character itself is returned.
First constant for enumerated/integer Unicode properties.
Definition: uchar.h:442
UChar32 u_getBidiPairedBracket(UChar32 c)
Maps the specified character to its paired bracket character.
Binary property XID_Start.
Definition: uchar.h:296
Binary property Join_Control.
Definition: uchar.h:250
Binary property Logical_Order_Exception (new in Unicode 3.2).
Definition: uchar.h:254
Binary property White_Space.
Definition: uchar.h:289
String property Titlecase_Mapping.
Definition: uchar.h:599
One more than the highest normal UPropertyNameChoice value.
Definition: uchar.h:1754
One more than the last constant for enumerated/integer Unicode properties.
Definition: uchar.h:528
Enumerated property Numeric_Type.
Definition: uchar.h:470
Binary property xdigit (a C/POSIX character class).
Definition: uchar.h:385
UBlockCode ublock_getCode(UChar32 c)
Returns the Unicode allocation block that contains the character.
Binary property Alphabetic.
Definition: uchar.h:177
First constant for double Unicode properties.
Definition: uchar.h:554
UBool UEnumCharNamesFn(void *context, UChar32 code, UCharNameChoice nameChoice, const char *name, int32_t length)
Type of a callback function for u_enumCharNames() that gets called for each Unicode character with th...
Definition: uchar.h:3164
One more than the highest normal USentenceBreak value.
Definition: uchar.h:2090
UBool u_isgraph(UChar32 c)
Determines whether the specified code point is a "graphic" character (printable, excluding spaces)...
Binary property Emoji.
Definition: uchar.h:408
Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNE...
Definition: uchar.h:658
String property Simple_Case_Folding.
Definition: uchar.h:587
Binary property NFC_Inert.
Definition: uchar.h:332
Binary property graph (a C/POSIX character class).
Definition: uchar.h:375
String property Bidi_Mirroring_Glyph.
Definition: uchar.h:570
One more than the last constant for bit-mask Unicode properties.
Definition: uchar.h:547
UBool u_isdefined(UChar32 c)
Determines whether the specified code point is "defined", which usually means that it is assigned a c...
Enumerated property Block.
Definition: uchar.h:445
Represents a nonexistent or invalid property or property value.
Definition: uchar.h:638
Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0.
Definition: uchar.h:2123
Binary property Radical (new in Unicode 3.2).
Definition: uchar.h:269
UCharDirection
This specifies the language directional property of a character set.
Definition: uchar.h:843
Binary property IDS_Trinary_Operator (new in Unicode 3.2).
Definition: uchar.h:247
Binary property Grapheme_Link (new in Unicode 3.2).
Definition: uchar.h:221
Enumerated property Decomposition_Type.
Definition: uchar.h:451
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: platform.h:871
String property Case_Folding.
Definition: uchar.h:573
String property Name.
Definition: uchar.h:584
String property Simple_Uppercase_Mapping.
Definition: uchar.h:596
UChar32 u_forDigit(int32_t digit, int8_t radix)
Determines the character representation for a specific digit in the specified radix.
Enumerated property Bidi_Class.
Definition: uchar.h:440
int32_t u_charDigitValue(UChar32 c)
Returns the decimal digit value of a decimal digit character.
Enumerated property General_Category.
Definition: uchar.h:458
Sm.
Definition: uchar.h:706
String property Unicode_1_Name.
Definition: uchar.h:605
int32_t u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Get the FC_NFKC_Closure property string for a character.
UNumericType
Numeric Type constants.
Definition: uchar.h:2183
One more than the highest normal UBidiPairedBracketType value.
Definition: uchar.h:934
Binary property Pattern_White_Space (new in Unicode 4.1).
Definition: uchar.h:360
Close paired bracket.
Definition: uchar.h:926
UBool u_iscntrl(UChar32 c)
Determines whether the specified code point is a control character (as defined by this function)...
UBool UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type)
Callback from u_enumCharTypes(), is called for each contiguous range of code points c (where start<=c...
Definition: uchar.h:2984
Binary property Changes_When_Casefolded.
Definition: uchar.h:397
Binary property NFD_Inert.
Definition: uchar.h:318
Binary property Diacritic.
Definition: uchar.h:202
Binary property Terminal_Punctuation.
Definition: uchar.h:278
UChar32 u_charFromName(UCharNameChoice nameChoice, const char *name, UErrorCode *pErrorCode)
Find a Unicode character by its name and return its code point value.
One more than the highest normal UNumericType value.
Definition: uchar.h:2201
UBool u_isUAlphabetic(UChar32 c)
Check if a code point has the Alphabetic Unicode property.
Enumerated property NFD_Quick_Check.
Definition: uchar.h:479
void u_charAge(UChar32 c, UVersionInfo versionArray)
Get the "age" of the code point.
int32_t u_getPropertyValueEnum(UProperty property, const char *alias)
Return the property value integer for a given value name, as specified in the Unicode database file P...
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition: umachine.h:84
Binary property STerm (new in Unicode 4.0.1).
Definition: uchar.h:305
Enumerated property Joining_Group.
Definition: uchar.h:461
Binary property ID_Continue.
Definition: uchar.h:232
Binary property Emoji_Presentation.
Definition: uchar.h:415
Open paired bracket.
Definition: uchar.h:924
Binary property blank (a C/POSIX character class).
Definition: uchar.h:370
Binary property Quotation_Mark.
Definition: uchar.h:265
#define U_DEPRECATED
This is used to declare a function as a deprecated public ICU C API.
Definition: umachine.h:115
Binary property Changes_When_NFKC_Casefolded.
Definition: uchar.h:401
First constant for binary Unicode properties.
Definition: uchar.h:179
Binary property Noncharacter_Code_Point.
Definition: uchar.h:263
Binary property Hyphen.
Definition: uchar.h:227
Enumerated property East_Asian_Width.
Definition: uchar.h:455
ULineBreak
Line Break constants.
Definition: uchar.h:2100
One more than the highest UCharDirection value.
Definition: uchar.h:904
Binary property Full_Composition_Exclusion.
Definition: uchar.h:210
Bitmask property General_Category_Mask.
Definition: uchar.h:539
String property Simple_Titlecase_Mapping.
Definition: uchar.h:593
One more than the highest normal UGraphemeClusterBreak value.
Definition: uchar.h:1990
Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols".
Definition: uchar.h:1085
int32_t u_digit(UChar32 ch, int8_t radix)
Returns the decimal digit value of the code point in the specified radix.
UDecompositionType
Decomposition Type constants.
Definition: uchar.h:1764
UBool u_isprint(UChar32 c)
Determines whether the specified code point is a printable character.
UBool u_isxdigit(UChar32 c)
Determines whether the specified code point is a hexadecimal digit.
One more than the highest normal ULineBreak value.
Definition: uchar.h:2173
UHangulSyllableType
Hangul Syllable Type constants.
Definition: uchar.h:2211
String property Simple_Lowercase_Mapping.
Definition: uchar.h:590
Binary property print (a C/POSIX character class).
Definition: uchar.h:380
Binary property Case_Sensitive.
Definition: uchar.h:300
Standard or synthetic character name.
Definition: uchar.h:1721
Binary property Bidi_Mirrored.
Definition: uchar.h:190
Binary property NFKC_Inert.
Definition: uchar.h:339
int32_t u_getIntPropertyValue(UChar32 c, UProperty which)
Get the property value for an enumerated or integer Unicode property for a code point.
Binary property Changes_When_Casemapped.
Definition: uchar.h:399
First constant for string Unicode properties.
Definition: uchar.h:567
Binary property Grapheme_Extend (new in Unicode 3.2).
Definition: uchar.h:218
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:396
UGraphemeClusterBreak
Grapheme Cluster Break constants.
Definition: uchar.h:1950
New No_Block value in Unicode 4.
Definition: uchar.h:950
Binary property Extender.
Definition: uchar.h:206
Double property Numeric_Value.
Definition: uchar.h:552
Binary property Math.
Definition: uchar.h:259
Unicode character name (Name property).
Definition: uchar.h:1711
Binary property Grapheme_Base (new in Unicode 3.2).
Definition: uchar.h:214
Binary property NFKD_Inert.
Definition: uchar.h:325
uint8_t u_getCombiningClass(UChar32 c)
Returns the combining class of the code point as specified in UnicodeData.txt.
UBool u_isbase(UChar32 c)
Determines whether the specified code point is a base character.
UCharCategory
Data for enumerated Unicode general category types.
Definition: uchar.h:646
const char * u_getPropertyValueName(UProperty property, int32_t value, UPropertyNameChoice nameChoice)
Return the Unicode name for a given property value, as given in the Unicode database file PropertyVal...
Enumerated property Sentence_Break (new in Unicode 4.1).
Definition: uchar.h:512
double u_getNumericValue(UChar32 c)
Get the numeric value for a Unicode code point as defined in the Unicode Character Database...
Binary property Lowercase.
Definition: uchar.h:257
UBool u_isJavaIDStart(UChar32 c)
Determines if the specified character is permissible as the first character in a Java identifier...
First constant for bit-mask Unicode properties.
Definition: uchar.h:541
Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
Definition: uchar.h:522
UBool u_isspace(UChar32 c)
Determines if the specified character is a space character or not.
USentenceBreak
Sentence Break constants.
Definition: uchar.h:2061
Binary property Unified_Ideograph (new in Unicode 3.2).
Definition: uchar.h:282
Enumerated property Canonical_Combining_Class.
Definition: uchar.h:448
UCharNameChoice
Selector constants for u_charName().
Definition: uchar.h:1709
One more than the last constant for binary Unicode properties.
Definition: uchar.h:435
UBool u_isJavaIDPart(UChar32 c)
Determines if the specified character is permissible in a Java identifier.
Enumerated property Script.
Definition: uchar.h:473
Unicode 3.2 renames this block to "Greek and Coptic".
Definition: uchar.h:977
Binary property Hex_Digit.
Definition: uchar.h:224
String property Uppercase_Mapping.
Definition: uchar.h:609
UPropertyNameChoice
Selector constants for u_getPropertyName() and u_getPropertyValueName().
Definition: uchar.h:1746
String property Lowercase_Mapping.
Definition: uchar.h:581
UCharDirection u_charDirection(UChar32 c)
Returns the bidirectional category value for the code point, which is used in the Unicode bidirection...
UBool u_islower(UChar32 c)
Determines whether the specified code point has the general category "Ll" (lowercase letter)...
uint16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:349
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition: umachine.h:85
Enumerated property NFKC_Quick_Check.
Definition: uchar.h:488
Not a paired bracket.
Definition: uchar.h:922
UProperty
Selection constants for Unicode properties.
Definition: uchar.h:163
void u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context)
Enumerate efficiently all code points with their Unicode general categories.
Enumerated property Hangul_Syllable_Type, new in Unicode 4.
Definition: uchar.h:476
Binary property Dash.
Definition: uchar.h:192
Binary property alnum (a C/POSIX character class).
Definition: uchar.h:365
Cf.
Definition: uchar.h:690
Binary property Variation_Selector (new in Unicode 4.0.1).
Definition: uchar.h:311
One more than the highest normal UHangulSyllableType value.
Definition: uchar.h:2231
UBool u_isUUppercase(UChar32 c)
Check if a code point has the Uppercase Unicode property.
UBlockCode
Constants for Unicode blocks, see the Unicode Data file Blocks.txt.
Definition: uchar.h:942
Enumerated property Word_Break (new in Unicode 4.1).
Definition: uchar.h:517
Binary property Deprecated (new in Unicode 3.2).
Definition: uchar.h:199
Binary property Bidi_Control.
Definition: uchar.h:185
Binary property XID_Continue.
Definition: uchar.h:293
Same as UBLOCK_PRIVATE_USE.
Definition: uchar.h:1198
UBool u_hasBinaryProperty(UChar32 c, UProperty which)
Check a binary Unicode property for a code point.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:396
Binary property Uppercase.
Definition: uchar.h:285
void u_getUnicodeVersion(UVersionInfo versionArray)
Gets the Unicode version information.
Binary property Changes_When_Uppercased.
Definition: uchar.h:393
UJoiningGroup
Joining Group constants.
Definition: uchar.h:1836
Binary property Cased.
Definition: uchar.h:387
One more than the highest normal UEastAsianWidth value.
Definition: uchar.h:1694
One more than the highest normal UDecompositionType value.
Definition: uchar.h:1796
Cs.
Definition: uchar.h:694
UEastAsianWidth
East Asian Width constants.
Definition: uchar.h:1674
UBool u_isupper(UChar32 c)
Determines whether the specified code point has the general category "Lu" (uppercase letter)...
Enumerated property Trail_Canonical_Combining_Class.
Definition: uchar.h:502
UBool u_isULowercase(UChar32 c)
Check if a code point has the Lowercase Unicode property.
Non-category for unassigned and non-character code points.
Definition: uchar.h:656
UBool u_ispunct(UChar32 c)
Determines whether the specified code point is a punctuation character.
First constant for Unicode properties with unusual value types.
Definition: uchar.h:628
UWordBreakValues
Word Break constants.
Definition: uchar.h:2001
void u_enumCharNames(UChar32 start, UChar32 limit, UEnumCharNamesFn *fn, void *context, UCharNameChoice nameChoice, UErrorCode *pErrorCode)
Enumerate all assigned Unicode characters between the start and limit code points (start inclusive...
int32_t u_getIntPropertyMaxValue(UProperty which)
Get the maximum value for an enumerated/integer/binary Unicode property.
Enumerated property Joining_Type.
Definition: uchar.h:464
One more than the last constant for double Unicode properties.
Definition: uchar.h:560
Binary property Emoji_Modifier.
Definition: uchar.h:422
Basic definitions for ICU, for both C and C++ APIs.
UBool u_isIDPart(UChar32 c)
Determines if the specified character is permissible in an identifier according to Java...
String property Bidi_Paired_Bracket (new in Unicode 6.3).
Definition: uchar.h:612
Enumerated property Lead_Canonical_Combining_Class.
Definition: uchar.h:495
Binary property ASCII_Hex_Digit.
Definition: uchar.h:181
UJoiningType
Joining Type constants.
Definition: uchar.h:1806
UBool u_isIDStart(UChar32 c)
Determines if the specified character is permissible as the first character in an identifier accordin...
Binary property Soft_Dotted (new in Unicode 3.2).
Definition: uchar.h:274
One more than the last constant for string Unicode properties.
Definition: uchar.h:618
Binary property Emoji_Modifier_Base.
Definition: uchar.h:429
Binary Property Segment_Starter.
Definition: uchar.h:350
UChar32 u_toupper(UChar32 c)
The given character is mapped to its uppercase equivalent according to UnicodeData.txt; if the character has no uppercase equivalent, the character itself is returned.
Binary property ID_Start.
Definition: uchar.h:236
int32_t u_getIntPropertyMinValue(UProperty which)
Get the minimum value for an enumerated/integer/binary Unicode property.
UBool u_isJavaSpaceChar(UChar32 c)
Determine if the specified code point is a space character according to Java.
UBool u_isMirrored(UChar32 c)
Determines whether the code point has the Bidi_Mirrored property.
Corrected name from NameAliases.txt.
Definition: uchar.h:1723
Binary property Changes_When_Titlecased.
Definition: uchar.h:395
UChar32 u_tolower(UChar32 c)
The given character is mapped to its lowercase equivalent according to UnicodeData.txt; if the character has no lowercase equivalent, the character itself is returned.
Enumerated property Line_Break.
Definition: uchar.h:467
One more than the highest normal UJoiningType value.
Definition: uchar.h:1826
UBool u_isIDIgnorable(UChar32 c)
Determines if the specified character should be regarded as an ignorable character in an identifier...
Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1).
Definition: uchar.h:507
UBool u_isdigit(UChar32 c)
Determines whether the specified code point is a digit character according to Java.
UProperty u_getPropertyEnum(const char *alias)
Return the UProperty enum for a given property name, as specified in the Unicode database file Proper...
UBool u_isWhitespace(UChar32 c)
Determines if the specified code point is a whitespace character according to Java/ICU.
Deprecated string property ISO_Comment.
Definition: uchar.h:577
UBool u_isblank(UChar32 c)
Determines whether the specified code point is a "blank" or "horizontal space", a character that visi...
UBool u_isalpha(UChar32 c)
Determines whether the specified code point is a letter character.
Binary property Default_Ignorable_Code_Point (new in Unicode 3.2).
Definition: uchar.h:196
One more than the highest normal UBlockCode value.
Definition: uchar.h:1657
The Unicode_1_Name property value which is of little practical value.
Definition: uchar.h:1718
int32_t u_getISOComment(UChar32 c, char *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Returns an empty string.
One higher than the last enum UCharCategory constant.
Definition: uchar.h:724
One more than the highest normal UWordBreakValues value.
Definition: uchar.h:2051
UBool u_isalnum(UChar32 c)
Determines whether the specified code point is an alphanumeric character (letter or digit) according ...
Binary property Pattern_Syntax (new in Unicode 4.1).
Definition: uchar.h:355
One more than the last constant for Unicode properties with unusual value types.
Definition: uchar.h:634
UBidiPairedBracketType
Bidi Paired Bracket Type constants.
Definition: uchar.h:914
One more than the highest normal UCharNameChoice value.
Definition: uchar.h:1729
UChar32 u_charMirror(UChar32 c)
Maps the specified character to a "mirror-image" character.
UBool u_isISOControl(UChar32 c)
Determines whether the specified code point is an ISO control code.
int8_t u_charType(UChar32 c)
Returns the general category value for the code point.
int32_t u_charName(UChar32 code, UCharNameChoice nameChoice, char *buffer, int32_t bufferLength, UErrorCode *pErrorCode)
Retrieve the name of a Unicode character.
String property Age.
Definition: uchar.h:565
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:111
int8_t UBool
The ICU boolean type.
Definition: umachine.h:236
Enumerated property NFKD_Quick_Check.
Definition: uchar.h:482