ICU 78.3  78.3
uchar.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 1997-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 *
9 * File UCHAR.H
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 04/02/97 aliu Creation.
15 * 03/29/99 helena Updated for C APIs.
16 * 4/15/99 Madhu Updated for C Implementation and Javadoc
17 * 5/20/99 Madhu Added the function u_getVersion()
18 * 8/19/1999 srl Upgraded scripts to Unicode 3.0
19 * 8/27/1999 schererm UCharDirection constants: U_...
20 * 11/11/1999 weiv added u_isalnum(), cleaned comments
21 * 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion().
22 ******************************************************************************
23 */
24 
25 #ifndef UCHAR_H
26 #define UCHAR_H
27 
28 #include <stdbool.h>
29 #include "unicode/utypes.h"
30 #include "unicode/stringoptions.h"
31 #include "unicode/ucpmap.h"
32 
33 #if !defined(USET_DEFINED) && !defined(U_IN_DOXYGEN)
34 
35 #define USET_DEFINED
36 
45 typedef struct USet USet;
46 
47 #endif
48 
49 
51 
52 /*==========================================================================*/
53 /* Unicode version number */
54 /*==========================================================================*/
64 #define U_UNICODE_VERSION "17.0"
65 
158 #define UCHAR_MIN_VALUE 0
159 
168 #define UCHAR_MAX_VALUE 0x10ffff
169 
174 #define U_MASK(x) ((uint32_t)1<<(x))
175 
196 typedef enum UProperty {
197  /*
198  * Note: UProperty constants are parsed by preparseucd.py.
199  * It matches lines like
200  * UCHAR_<Unicode property name>=<integer>,
201  */
202 
203  /* Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that
204  debuggers display UCHAR_ALPHABETIC as the symbolic name for 0,
205  rather than UCHAR_BINARY_START. Likewise for other *_START
206  identifiers. */
207 
561 #ifndef U_HIDE_DEPRECATED_API
562 
567 #endif // U_HIDE_DEPRECATED_API
568 
576  UCHAR_BLOCK=0x1001,
604  UCHAR_SCRIPT=0x100A,
689 #ifndef U_HIDE_DEPRECATED_API
690 
695 #endif // U_HIDE_DEPRECATED_API
696 
708 #ifndef U_HIDE_DEPRECATED_API
709 
714 #endif // U_HIDE_DEPRECATED_API
715 
721 #ifndef U_HIDE_DEPRECATED_API
722 
727 #endif // U_HIDE_DEPRECATED_API
728 
731  UCHAR_AGE=0x4000,
740 #ifndef U_HIDE_DEPRECATED_API
741 
744 #endif /* U_HIDE_DEPRECATED_API */
745 
750  UCHAR_NAME=0x4005,
766 #ifndef U_HIDE_DEPRECATED_API
767 
772 #endif /* U_HIDE_DEPRECATED_API */
773 
779 #ifndef U_HIDE_DEPRECATED_API
780 
785 #endif // U_HIDE_DEPRECATED_API
786 
809 #ifndef U_HIDE_DEPRECATED_API
810 
815 #endif // U_HIDE_DEPRECATED_API
816 
819 } UProperty;
820 
826 typedef enum UCharCategory
827 {
828  /*
829  * Note: UCharCategory constants and their API comments are parsed by preparseucd.py.
830  * It matches pairs of lines like
831  * / ** <Unicode 2-letter General_Category value> comment... * /
832  * U_<[A-Z_]+> = <integer>,
833  */
834 
905 } UCharCategory;
906 
921 #define U_GC_CN_MASK U_MASK(U_GENERAL_OTHER_TYPES)
922 
924 #define U_GC_LU_MASK U_MASK(U_UPPERCASE_LETTER)
925 
926 #define U_GC_LL_MASK U_MASK(U_LOWERCASE_LETTER)
927 
928 #define U_GC_LT_MASK U_MASK(U_TITLECASE_LETTER)
929 
930 #define U_GC_LM_MASK U_MASK(U_MODIFIER_LETTER)
931 
932 #define U_GC_LO_MASK U_MASK(U_OTHER_LETTER)
933 
935 #define U_GC_MN_MASK U_MASK(U_NON_SPACING_MARK)
936 
937 #define U_GC_ME_MASK U_MASK(U_ENCLOSING_MARK)
938 
939 #define U_GC_MC_MASK U_MASK(U_COMBINING_SPACING_MARK)
940 
942 #define U_GC_ND_MASK U_MASK(U_DECIMAL_DIGIT_NUMBER)
943 
944 #define U_GC_NL_MASK U_MASK(U_LETTER_NUMBER)
945 
946 #define U_GC_NO_MASK U_MASK(U_OTHER_NUMBER)
947 
949 #define U_GC_ZS_MASK U_MASK(U_SPACE_SEPARATOR)
950 
951 #define U_GC_ZL_MASK U_MASK(U_LINE_SEPARATOR)
952 
953 #define U_GC_ZP_MASK U_MASK(U_PARAGRAPH_SEPARATOR)
954 
956 #define U_GC_CC_MASK U_MASK(U_CONTROL_CHAR)
957 
958 #define U_GC_CF_MASK U_MASK(U_FORMAT_CHAR)
959 
960 #define U_GC_CO_MASK U_MASK(U_PRIVATE_USE_CHAR)
961 
962 #define U_GC_CS_MASK U_MASK(U_SURROGATE)
963 
965 #define U_GC_PD_MASK U_MASK(U_DASH_PUNCTUATION)
966 
967 #define U_GC_PS_MASK U_MASK(U_START_PUNCTUATION)
968 
969 #define U_GC_PE_MASK U_MASK(U_END_PUNCTUATION)
970 
971 #define U_GC_PC_MASK U_MASK(U_CONNECTOR_PUNCTUATION)
972 
973 #define U_GC_PO_MASK U_MASK(U_OTHER_PUNCTUATION)
974 
976 #define U_GC_SM_MASK U_MASK(U_MATH_SYMBOL)
977 
978 #define U_GC_SC_MASK U_MASK(U_CURRENCY_SYMBOL)
979 
980 #define U_GC_SK_MASK U_MASK(U_MODIFIER_SYMBOL)
981 
982 #define U_GC_SO_MASK U_MASK(U_OTHER_SYMBOL)
983 
985 #define U_GC_PI_MASK U_MASK(U_INITIAL_PUNCTUATION)
986 
987 #define U_GC_PF_MASK U_MASK(U_FINAL_PUNCTUATION)
988 
989 
991 #define U_GC_L_MASK \
992  (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK)
993 
995 #define U_GC_LC_MASK \
996  (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK)
997 
999 #define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK)
1000 
1002 #define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK)
1003 
1005 #define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK)
1006 
1008 #define U_GC_C_MASK \
1009  (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK)
1010 
1012 #define U_GC_P_MASK \
1013  (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \
1014  U_GC_PI_MASK|U_GC_PF_MASK)
1015 
1017 #define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK)
1018 
1023 typedef enum UCharDirection {
1024  /*
1025  * Note: UCharDirection constants and their API comments are parsed by preparseucd.py.
1026  * It matches pairs of lines like
1027  * / ** <Unicode 1..3-letter Bidi_Class value> comment... * /
1028  * U_<[A-Z_]+> = <integer>,
1029  */
1030 
1077 #ifndef U_HIDE_DEPRECATED_API
1078 
1085 #endif // U_HIDE_DEPRECATED_API
1086 } UCharDirection;
1087 
1095  /*
1096  * Note: UBidiPairedBracketType constants are parsed by preparseucd.py.
1097  * It matches lines like
1098  * U_BPT_<Unicode Bidi_Paired_Bracket_Type value name>
1099  */
1100 
1107 #ifndef U_HIDE_DEPRECATED_API
1108 
1114  U_BPT_COUNT /* 3 */
1115 #endif // U_HIDE_DEPRECATED_API
1117 
1123  /*
1124  * Note: UBlockCode constants are parsed by preparseucd.py.
1125  * It matches lines like
1126  * UBLOCK_<Unicode Block value name> = <integer>,
1127  */
1128 
1130  UBLOCK_NO_BLOCK = 0, /*[none]*/ /* Special range indicating No_Block */
1131 
1133  UBLOCK_BASIC_LATIN = 1, /*[0000]*/
1134 
1137 
1140 
1143 
1145  UBLOCK_IPA_EXTENSIONS =5, /*[0250]*/
1146 
1149 
1152 
1157  UBLOCK_GREEK =8, /*[0370]*/
1158 
1160  UBLOCK_CYRILLIC =9, /*[0400]*/
1161 
1163  UBLOCK_ARMENIAN =10, /*[0530]*/
1164 
1166  UBLOCK_HEBREW =11, /*[0590]*/
1167 
1169  UBLOCK_ARABIC =12, /*[0600]*/
1170 
1172  UBLOCK_SYRIAC =13, /*[0700]*/
1173 
1175  UBLOCK_THAANA =14, /*[0780]*/
1176 
1178  UBLOCK_DEVANAGARI =15, /*[0900]*/
1179 
1181  UBLOCK_BENGALI =16, /*[0980]*/
1182 
1184  UBLOCK_GURMUKHI =17, /*[0A00]*/
1185 
1187  UBLOCK_GUJARATI =18, /*[0A80]*/
1188 
1190  UBLOCK_ORIYA =19, /*[0B00]*/
1191 
1193  UBLOCK_TAMIL =20, /*[0B80]*/
1194 
1196  UBLOCK_TELUGU =21, /*[0C00]*/
1197 
1199  UBLOCK_KANNADA =22, /*[0C80]*/
1200 
1202  UBLOCK_MALAYALAM =23, /*[0D00]*/
1203 
1205  UBLOCK_SINHALA =24, /*[0D80]*/
1206 
1208  UBLOCK_THAI =25, /*[0E00]*/
1209 
1211  UBLOCK_LAO =26, /*[0E80]*/
1212 
1214  UBLOCK_TIBETAN =27, /*[0F00]*/
1215 
1217  UBLOCK_MYANMAR =28, /*[1000]*/
1218 
1220  UBLOCK_GEORGIAN =29, /*[10A0]*/
1221 
1223  UBLOCK_HANGUL_JAMO =30, /*[1100]*/
1224 
1226  UBLOCK_ETHIOPIC =31, /*[1200]*/
1227 
1229  UBLOCK_CHEROKEE =32, /*[13A0]*/
1230 
1233 
1235  UBLOCK_OGHAM =34, /*[1680]*/
1236 
1238  UBLOCK_RUNIC =35, /*[16A0]*/
1239 
1241  UBLOCK_KHMER =36, /*[1780]*/
1242 
1244  UBLOCK_MONGOLIAN =37, /*[1800]*/
1245 
1248 
1250  UBLOCK_GREEK_EXTENDED =39, /*[1F00]*/
1251 
1254 
1257 
1259  UBLOCK_CURRENCY_SYMBOLS =42, /*[20A0]*/
1260 
1266 
1269 
1271  UBLOCK_NUMBER_FORMS =45, /*[2150]*/
1272 
1274  UBLOCK_ARROWS =46, /*[2190]*/
1275 
1278 
1281 
1283  UBLOCK_CONTROL_PICTURES =49, /*[2400]*/
1284 
1287 
1290 
1292  UBLOCK_BOX_DRAWING =52, /*[2500]*/
1293 
1295  UBLOCK_BLOCK_ELEMENTS =53, /*[2580]*/
1296 
1298  UBLOCK_GEOMETRIC_SHAPES =54, /*[25A0]*/
1299 
1302 
1304  UBLOCK_DINGBATS =56, /*[2700]*/
1305 
1307  UBLOCK_BRAILLE_PATTERNS =57, /*[2800]*/
1308 
1311 
1313  UBLOCK_KANGXI_RADICALS =59, /*[2F00]*/
1314 
1317 
1320 
1322  UBLOCK_HIRAGANA =62, /*[3040]*/
1323 
1325  UBLOCK_KATAKANA =63, /*[30A0]*/
1326 
1328  UBLOCK_BOPOMOFO =64, /*[3100]*/
1329 
1332 
1334  UBLOCK_KANBUN =66, /*[3190]*/
1335 
1338 
1341 
1344 
1347 
1350 
1352  UBLOCK_YI_SYLLABLES =72, /*[A000]*/
1353 
1355  UBLOCK_YI_RADICALS =73, /*[A490]*/
1356 
1358  UBLOCK_HANGUL_SYLLABLES =74, /*[AC00]*/
1359 
1361  UBLOCK_HIGH_SURROGATES =75, /*[D800]*/
1362 
1365 
1367  UBLOCK_LOW_SURROGATES =77, /*[DC00]*/
1368 
1378  UBLOCK_PRIVATE_USE_AREA =78, /*[E000]*/
1389 
1392 
1395 
1398 
1401 
1404 
1407 
1410 
1412  UBLOCK_SPECIALS =86, /*[FFF0]*/
1413 
1416 
1417  /* New blocks in Unicode 3.1 */
1418 
1420  UBLOCK_OLD_ITALIC = 88, /*[10300]*/
1422  UBLOCK_GOTHIC = 89, /*[10330]*/
1424  UBLOCK_DESERET = 90, /*[10400]*/
1428  UBLOCK_MUSICAL_SYMBOLS = 92, /*[1D100]*/
1436  UBLOCK_TAGS = 96, /*[E0000]*/
1437 
1438  /* New blocks in Unicode 3.2 */
1439 
1448  UBLOCK_TAGALOG = 98, /*[1700]*/
1450  UBLOCK_HANUNOO = 99, /*[1720]*/
1452  UBLOCK_BUHID = 100, /*[1740]*/
1454  UBLOCK_TAGBANWA = 101, /*[1760]*/
1468  UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/
1473 
1474  /* New blocks in Unicode 4 */
1475 
1477  UBLOCK_LIMBU = 111, /*[1900]*/
1479  UBLOCK_TAI_LE = 112, /*[1950]*/
1481  UBLOCK_KHMER_SYMBOLS = 113, /*[19E0]*/
1483  UBLOCK_PHONETIC_EXTENSIONS = 114, /*[1D00]*/
1489  UBLOCK_LINEAR_B_SYLLABARY = 117, /*[10000]*/
1491  UBLOCK_LINEAR_B_IDEOGRAMS = 118, /*[10080]*/
1493  UBLOCK_AEGEAN_NUMBERS = 119, /*[10100]*/
1495  UBLOCK_UGARITIC = 120, /*[10380]*/
1497  UBLOCK_SHAVIAN = 121, /*[10450]*/
1499  UBLOCK_OSMANYA = 122, /*[10480]*/
1501  UBLOCK_CYPRIOT_SYLLABARY = 123, /*[10800]*/
1503  UBLOCK_TAI_XUAN_JING_SYMBOLS = 124, /*[1D300]*/
1506 
1507  /* New blocks in Unicode 4.1 */
1508 
1512  UBLOCK_ANCIENT_GREEK_NUMBERS = 127, /*[10140]*/
1514  UBLOCK_ARABIC_SUPPLEMENT = 128, /*[0750]*/
1516  UBLOCK_BUGINESE = 129, /*[1A00]*/
1518  UBLOCK_CJK_STROKES = 130, /*[31C0]*/
1522  UBLOCK_COPTIC = 132, /*[2C80]*/
1524  UBLOCK_ETHIOPIC_EXTENDED = 133, /*[2D80]*/
1526  UBLOCK_ETHIOPIC_SUPPLEMENT = 134, /*[1380]*/
1528  UBLOCK_GEORGIAN_SUPPLEMENT = 135, /*[2D00]*/
1530  UBLOCK_GLAGOLITIC = 136, /*[2C00]*/
1532  UBLOCK_KHAROSHTHI = 137, /*[10A00]*/
1536  UBLOCK_NEW_TAI_LUE = 139, /*[1980]*/
1538  UBLOCK_OLD_PERSIAN = 140, /*[103A0]*/
1544  UBLOCK_SYLOTI_NAGRI = 143, /*[A800]*/
1546  UBLOCK_TIFINAGH = 144, /*[2D30]*/
1548  UBLOCK_VERTICAL_FORMS = 145, /*[FE10]*/
1549 
1550  /* New blocks in Unicode 5.0 */
1551 
1553  UBLOCK_NKO = 146, /*[07C0]*/
1555  UBLOCK_BALINESE = 147, /*[1B00]*/
1557  UBLOCK_LATIN_EXTENDED_C = 148, /*[2C60]*/
1559  UBLOCK_LATIN_EXTENDED_D = 149, /*[A720]*/
1561  UBLOCK_PHAGS_PA = 150, /*[A840]*/
1563  UBLOCK_PHOENICIAN = 151, /*[10900]*/
1565  UBLOCK_CUNEIFORM = 152, /*[12000]*/
1569  UBLOCK_COUNTING_ROD_NUMERALS = 154, /*[1D360]*/
1570 
1571  /* New blocks in Unicode 5.1 */
1572 
1574  UBLOCK_SUNDANESE = 155, /*[1B80]*/
1576  UBLOCK_LEPCHA = 156, /*[1C00]*/
1578  UBLOCK_OL_CHIKI = 157, /*[1C50]*/
1580  UBLOCK_CYRILLIC_EXTENDED_A = 158, /*[2DE0]*/
1582  UBLOCK_VAI = 159, /*[A500]*/
1584  UBLOCK_CYRILLIC_EXTENDED_B = 160, /*[A640]*/
1586  UBLOCK_SAURASHTRA = 161, /*[A880]*/
1588  UBLOCK_KAYAH_LI = 162, /*[A900]*/
1590  UBLOCK_REJANG = 163, /*[A930]*/
1592  UBLOCK_CHAM = 164, /*[AA00]*/
1594  UBLOCK_ANCIENT_SYMBOLS = 165, /*[10190]*/
1596  UBLOCK_PHAISTOS_DISC = 166, /*[101D0]*/
1598  UBLOCK_LYCIAN = 167, /*[10280]*/
1600  UBLOCK_CARIAN = 168, /*[102A0]*/
1602  UBLOCK_LYDIAN = 169, /*[10920]*/
1604  UBLOCK_MAHJONG_TILES = 170, /*[1F000]*/
1606  UBLOCK_DOMINO_TILES = 171, /*[1F030]*/
1607 
1608  /* New blocks in Unicode 5.2 */
1609 
1611  UBLOCK_SAMARITAN = 172, /*[0800]*/
1615  UBLOCK_TAI_THAM = 174, /*[1A20]*/
1617  UBLOCK_VEDIC_EXTENSIONS = 175, /*[1CD0]*/
1619  UBLOCK_LISU = 176, /*[A4D0]*/
1621  UBLOCK_BAMUM = 177, /*[A6A0]*/
1625  UBLOCK_DEVANAGARI_EXTENDED = 179, /*[A8E0]*/
1629  UBLOCK_JAVANESE = 181, /*[A980]*/
1631  UBLOCK_MYANMAR_EXTENDED_A = 182, /*[AA60]*/
1633  UBLOCK_TAI_VIET = 183, /*[AA80]*/
1635  UBLOCK_MEETEI_MAYEK = 184, /*[ABC0]*/
1639  UBLOCK_IMPERIAL_ARAMAIC = 186, /*[10840]*/
1641  UBLOCK_OLD_SOUTH_ARABIAN = 187, /*[10A60]*/
1643  UBLOCK_AVESTAN = 188, /*[10B00]*/
1647  UBLOCK_INSCRIPTIONAL_PAHLAVI = 190, /*[10B60]*/
1649  UBLOCK_OLD_TURKIC = 191, /*[10C00]*/
1651  UBLOCK_RUMI_NUMERAL_SYMBOLS = 192, /*[10E60]*/
1653  UBLOCK_KAITHI = 193, /*[11080]*/
1655  UBLOCK_EGYPTIAN_HIEROGLYPHS = 194, /*[13000]*/
1662 
1663  /* New blocks in Unicode 6.0 */
1664 
1666  UBLOCK_MANDAIC = 198, /*[0840]*/
1668  UBLOCK_BATAK = 199, /*[1BC0]*/
1670  UBLOCK_ETHIOPIC_EXTENDED_A = 200, /*[AB00]*/
1672  UBLOCK_BRAHMI = 201, /*[11000]*/
1674  UBLOCK_BAMUM_SUPPLEMENT = 202, /*[16800]*/
1676  UBLOCK_KANA_SUPPLEMENT = 203, /*[1B000]*/
1678  UBLOCK_PLAYING_CARDS = 204, /*[1F0A0]*/
1682  UBLOCK_EMOTICONS = 206, /*[1F600]*/
1686  UBLOCK_ALCHEMICAL_SYMBOLS = 208, /*[1F700]*/
1689 
1690  /* New blocks in Unicode 6.1 */
1691 
1693  UBLOCK_ARABIC_EXTENDED_A = 210, /*[08A0]*/
1697  UBLOCK_CHAKMA = 212, /*[11100]*/
1701  UBLOCK_MEROITIC_CURSIVE = 214, /*[109A0]*/
1703  UBLOCK_MEROITIC_HIEROGLYPHS = 215, /*[10980]*/
1705  UBLOCK_MIAO = 216, /*[16F00]*/
1707  UBLOCK_SHARADA = 217, /*[11180]*/
1709  UBLOCK_SORA_SOMPENG = 218, /*[110D0]*/
1713  UBLOCK_TAKRI = 220, /*[11680]*/
1714 
1715  /* New blocks in Unicode 7.0 */
1716 
1718  UBLOCK_BASSA_VAH = 221, /*[16AD0]*/
1720  UBLOCK_CAUCASIAN_ALBANIAN = 222, /*[10530]*/
1722  UBLOCK_COPTIC_EPACT_NUMBERS = 223, /*[102E0]*/
1726  UBLOCK_DUPLOYAN = 225, /*[1BC00]*/
1728  UBLOCK_ELBASAN = 226, /*[10500]*/
1732  UBLOCK_GRANTHA = 228, /*[11300]*/
1734  UBLOCK_KHOJKI = 229, /*[11200]*/
1736  UBLOCK_KHUDAWADI = 230, /*[112B0]*/
1738  UBLOCK_LATIN_EXTENDED_E = 231, /*[AB30]*/
1740  UBLOCK_LINEAR_A = 232, /*[10600]*/
1742  UBLOCK_MAHAJANI = 233, /*[11150]*/
1744  UBLOCK_MANICHAEAN = 234, /*[10AC0]*/
1746  UBLOCK_MENDE_KIKAKUI = 235, /*[1E800]*/
1748  UBLOCK_MODI = 236, /*[11600]*/
1750  UBLOCK_MRO = 237, /*[16A40]*/
1752  UBLOCK_MYANMAR_EXTENDED_B = 238, /*[A9E0]*/
1754  UBLOCK_NABATAEAN = 239, /*[10880]*/
1756  UBLOCK_OLD_NORTH_ARABIAN = 240, /*[10A80]*/
1758  UBLOCK_OLD_PERMIC = 241, /*[10350]*/
1760  UBLOCK_ORNAMENTAL_DINGBATS = 242, /*[1F650]*/
1762  UBLOCK_PAHAWH_HMONG = 243, /*[16B00]*/
1764  UBLOCK_PALMYRENE = 244, /*[10860]*/
1766  UBLOCK_PAU_CIN_HAU = 245, /*[11AC0]*/
1768  UBLOCK_PSALTER_PAHLAVI = 246, /*[10B80]*/
1772  UBLOCK_SIDDHAM = 248, /*[11580]*/
1776  UBLOCK_SUPPLEMENTAL_ARROWS_C = 250, /*[1F800]*/
1778  UBLOCK_TIRHUTA = 251, /*[11480]*/
1780  UBLOCK_WARANG_CITI = 252, /*[118A0]*/
1781 
1782  /* New blocks in Unicode 8.0 */
1783 
1785  UBLOCK_AHOM = 253, /*[11700]*/
1787  UBLOCK_ANATOLIAN_HIEROGLYPHS = 254, /*[14400]*/
1789  UBLOCK_CHEROKEE_SUPPLEMENT = 255, /*[AB70]*/
1795  UBLOCK_HATRAN = 258, /*[108E0]*/
1797  UBLOCK_MULTANI = 259, /*[11280]*/
1799  UBLOCK_OLD_HUNGARIAN = 260, /*[10C80]*/
1803  UBLOCK_SUTTON_SIGNWRITING = 262, /*[1D800]*/
1804 
1805  /* New blocks in Unicode 9.0 */
1806 
1808  UBLOCK_ADLAM = 263, /*[1E900]*/
1810  UBLOCK_BHAIKSUKI = 264, /*[11C00]*/
1812  UBLOCK_CYRILLIC_EXTENDED_C = 265, /*[1C80]*/
1814  UBLOCK_GLAGOLITIC_SUPPLEMENT = 266, /*[1E000]*/
1818  UBLOCK_MARCHEN = 268, /*[11C70]*/
1820  UBLOCK_MONGOLIAN_SUPPLEMENT = 269, /*[11660]*/
1822  UBLOCK_NEWA = 270, /*[11400]*/
1824  UBLOCK_OSAGE = 271, /*[104B0]*/
1826  UBLOCK_TANGUT = 272, /*[17000]*/
1828  UBLOCK_TANGUT_COMPONENTS = 273, /*[18800]*/
1829 
1830  // New blocks in Unicode 10.0
1831 
1835  UBLOCK_KANA_EXTENDED_A = 275, /*[1B100]*/
1837  UBLOCK_MASARAM_GONDI = 276, /*[11D00]*/
1839  UBLOCK_NUSHU = 277, /*[1B170]*/
1841  UBLOCK_SOYOMBO = 278, /*[11A50]*/
1843  UBLOCK_SYRIAC_SUPPLEMENT = 279, /*[0860]*/
1845  UBLOCK_ZANABAZAR_SQUARE = 280, /*[11A00]*/
1846 
1847  // New blocks in Unicode 11.0
1848 
1850  UBLOCK_CHESS_SYMBOLS = 281, /*[1FA00]*/
1852  UBLOCK_DOGRA = 282, /*[11800]*/
1854  UBLOCK_GEORGIAN_EXTENDED = 283, /*[1C90]*/
1856  UBLOCK_GUNJALA_GONDI = 284, /*[11D60]*/
1858  UBLOCK_HANIFI_ROHINGYA = 285, /*[10D00]*/
1860  UBLOCK_INDIC_SIYAQ_NUMBERS = 286, /*[1EC70]*/
1862  UBLOCK_MAKASAR = 287, /*[11EE0]*/
1864  UBLOCK_MAYAN_NUMERALS = 288, /*[1D2E0]*/
1866  UBLOCK_MEDEFAIDRIN = 289, /*[16E40]*/
1868  UBLOCK_OLD_SOGDIAN = 290, /*[10F00]*/
1870  UBLOCK_SOGDIAN = 291, /*[10F30]*/
1871 
1872  // New blocks in Unicode 12.0
1873 
1877  UBLOCK_ELYMAIC = 293, /*[10FE0]*/
1879  UBLOCK_NANDINAGARI = 294, /*[119A0]*/
1883  UBLOCK_OTTOMAN_SIYAQ_NUMBERS = 296, /*[1ED00]*/
1885  UBLOCK_SMALL_KANA_EXTENSION = 297, /*[1B130]*/
1889  UBLOCK_TAMIL_SUPPLEMENT = 299, /*[11FC0]*/
1891  UBLOCK_WANCHO = 300, /*[1E2C0]*/
1892 
1893  // New blocks in Unicode 13.0
1894 
1896  UBLOCK_CHORASMIAN = 301, /*[10FB0]*/
1900  UBLOCK_DIVES_AKURU = 303, /*[11900]*/
1902  UBLOCK_KHITAN_SMALL_SCRIPT = 304, /*[18B00]*/
1904  UBLOCK_LISU_SUPPLEMENT = 305, /*[11FB0]*/
1908  UBLOCK_TANGUT_SUPPLEMENT = 307, /*[18D00]*/
1910  UBLOCK_YEZIDI = 308, /*[10E80]*/
1911 
1912  // New blocks in Unicode 14.0
1913 
1915  UBLOCK_ARABIC_EXTENDED_B = 309, /*[0870]*/
1917  UBLOCK_CYPRO_MINOAN = 310, /*[12F90]*/
1919  UBLOCK_ETHIOPIC_EXTENDED_B = 311, /*[1E7E0]*/
1921  UBLOCK_KANA_EXTENDED_B = 312, /*[1AFF0]*/
1923  UBLOCK_LATIN_EXTENDED_F = 313, /*[10780]*/
1925  UBLOCK_LATIN_EXTENDED_G = 314, /*[1DF00]*/
1927  UBLOCK_OLD_UYGHUR = 315, /*[10F70]*/
1929  UBLOCK_TANGSA = 316, /*[16A70]*/
1931  UBLOCK_TOTO = 317, /*[1E290]*/
1935  UBLOCK_VITHKUQI = 319, /*[10570]*/
1938 
1939  // New blocks in Unicode 15.0
1940 
1942  UBLOCK_ARABIC_EXTENDED_C = 321, /*[10EC0]*/
1946  UBLOCK_CYRILLIC_EXTENDED_D = 323, /*[1E030]*/
1948  UBLOCK_DEVANAGARI_EXTENDED_A = 324, /*[11B00]*/
1950  UBLOCK_KAKTOVIK_NUMERALS = 325, /*[1D2C0]*/
1952  UBLOCK_KAWI = 326, /*[11F00]*/
1954  UBLOCK_NAG_MUNDARI = 327, /*[1E4D0]*/
1955 
1956  // New block in Unicode 15.1
1957 
1960 
1961  // New blocks in Unicode 16.0
1962 
1966  UBLOCK_GARAY = 330, /*[10D40]*/
1968  UBLOCK_GURUNG_KHEMA = 331, /*[16100]*/
1970  UBLOCK_KIRAT_RAI = 332, /*[16D40]*/
1972  UBLOCK_MYANMAR_EXTENDED_C = 333, /*[116D0]*/
1974  UBLOCK_OL_ONAL = 334, /*[1E5D0]*/
1976  UBLOCK_SUNUWAR = 335, /*[11BC0]*/
1980  UBLOCK_TODHRI = 337, /*[105C0]*/
1982  UBLOCK_TULU_TIGALARI = 338, /*[11380]*/
1983 
1984  // New blocks in Unicode 17.0.0
1985 
1987  UBLOCK_BERIA_ERFE = 339, /*[16EA0]*/
1993  UBLOCK_SHARADA_SUPPLEMENT = 342, /*[11B60]*/
1995  UBLOCK_SIDETIC = 343, /*[10940]*/
1997  UBLOCK_TAI_YO = 344, /*[1E6C0]*/
2001  UBLOCK_TOLONG_SIKI = 346, /*[11DB0]*/
2002 
2003 #ifndef U_HIDE_DEPRECATED_API
2004 
2011 #endif // U_HIDE_DEPRECATED_API
2012 
2015 };
2016 
2018 typedef enum UBlockCode UBlockCode;
2019 
2027 typedef enum UEastAsianWidth {
2028  /*
2029  * Note: UEastAsianWidth constants are parsed by preparseucd.py.
2030  * It matches lines like
2031  * U_EA_<Unicode East_Asian_Width value name>
2032  */
2033 
2034  U_EA_NEUTRAL, /*[N]*/
2035  U_EA_AMBIGUOUS, /*[A]*/
2036  U_EA_HALFWIDTH, /*[H]*/
2037  U_EA_FULLWIDTH, /*[F]*/
2038  U_EA_NARROW, /*[Na]*/
2039  U_EA_WIDE, /*[W]*/
2040 #ifndef U_HIDE_DEPRECATED_API
2041 
2048 #endif // U_HIDE_DEPRECATED_API
2049 } UEastAsianWidth;
2050 
2062 typedef enum UCharNameChoice {
2065 #ifndef U_HIDE_DEPRECATED_API
2066 
2072 #endif /* U_HIDE_DEPRECATED_API */
2073 
2077 #ifndef U_HIDE_DEPRECATED_API
2078 
2083 #endif // U_HIDE_DEPRECATED_API
2084 } UCharNameChoice;
2085 
2099 typedef enum UPropertyNameChoice {
2100  U_SHORT_PROPERTY_NAME,
2101  U_LONG_PROPERTY_NAME,
2102 #ifndef U_HIDE_DEPRECATED_API
2103 
2108 #endif // U_HIDE_DEPRECATED_API
2110 
2117 typedef enum UDecompositionType {
2118  /*
2119  * Note: UDecompositionType constants are parsed by preparseucd.py.
2120  * It matches lines like
2121  * U_DT_<Unicode Decomposition_Type value name>
2122  */
2123 
2124  U_DT_NONE, /*[none]*/
2125  U_DT_CANONICAL, /*[can]*/
2126  U_DT_COMPAT, /*[com]*/
2127  U_DT_CIRCLE, /*[enc]*/
2128  U_DT_FINAL, /*[fin]*/
2129  U_DT_FONT, /*[font]*/
2130  U_DT_FRACTION, /*[fra]*/
2131  U_DT_INITIAL, /*[init]*/
2132  U_DT_ISOLATED, /*[iso]*/
2133  U_DT_MEDIAL, /*[med]*/
2134  U_DT_NARROW, /*[nar]*/
2135  U_DT_NOBREAK, /*[nb]*/
2136  U_DT_SMALL, /*[sml]*/
2137  U_DT_SQUARE, /*[sqr]*/
2138  U_DT_SUB, /*[sub]*/
2139  U_DT_SUPER, /*[sup]*/
2140  U_DT_VERTICAL, /*[vert]*/
2141  U_DT_WIDE, /*[wide]*/
2142 #ifndef U_HIDE_DEPRECATED_API
2143 
2149  U_DT_COUNT /* 18 */
2150 #endif // U_HIDE_DEPRECATED_API
2152 
2159 typedef enum UJoiningType {
2160  /*
2161  * Note: UJoiningType constants are parsed by preparseucd.py.
2162  * It matches lines like
2163  * U_JT_<Unicode Joining_Type value name>
2164  */
2165 
2166  U_JT_NON_JOINING, /*[U]*/
2167  U_JT_JOIN_CAUSING, /*[C]*/
2168  U_JT_DUAL_JOINING, /*[D]*/
2169  U_JT_LEFT_JOINING, /*[L]*/
2170  U_JT_RIGHT_JOINING, /*[R]*/
2171  U_JT_TRANSPARENT, /*[T]*/
2172 #ifndef U_HIDE_DEPRECATED_API
2173 
2179  U_JT_COUNT /* 6 */
2180 #endif // U_HIDE_DEPRECATED_API
2181 } UJoiningType;
2182 
2189 typedef enum UJoiningGroup {
2190  /*
2191  * Note: UJoiningGroup constants are parsed by preparseucd.py.
2192  * It matches lines like
2193  * U_JG_<Unicode Joining_Group value name>
2194  */
2195 
2196  U_JG_NO_JOINING_GROUP,
2197  U_JG_AIN,
2198  U_JG_ALAPH,
2199  U_JG_ALEF,
2200  U_JG_BEH,
2201  U_JG_BETH,
2202  U_JG_DAL,
2203  U_JG_DALATH_RISH,
2204  U_JG_E,
2205  U_JG_FEH,
2206  U_JG_FINAL_SEMKATH,
2207  U_JG_GAF,
2208  U_JG_GAMAL,
2209  U_JG_HAH,
2211  U_JG_HAMZA_ON_HEH_GOAL=U_JG_TEH_MARBUTA_GOAL,
2212  U_JG_HE,
2213  U_JG_HEH,
2214  U_JG_HEH_GOAL,
2215  U_JG_HETH,
2216  U_JG_KAF,
2217  U_JG_KAPH,
2218  U_JG_KNOTTED_HEH,
2219  U_JG_LAM,
2220  U_JG_LAMADH,
2221  U_JG_MEEM,
2222  U_JG_MIM,
2223  U_JG_NOON,
2224  U_JG_NUN,
2225  U_JG_PE,
2226  U_JG_QAF,
2227  U_JG_QAPH,
2228  U_JG_REH,
2229  U_JG_REVERSED_PE,
2230  U_JG_SAD,
2231  U_JG_SADHE,
2232  U_JG_SEEN,
2233  U_JG_SEMKATH,
2234  U_JG_SHIN,
2235  U_JG_SWASH_KAF,
2236  U_JG_SYRIAC_WAW,
2237  U_JG_TAH,
2238  U_JG_TAW,
2239  U_JG_TEH_MARBUTA,
2240  U_JG_TETH,
2241  U_JG_WAW,
2242  U_JG_YEH,
2243  U_JG_YEH_BARREE,
2244  U_JG_YEH_WITH_TAIL,
2245  U_JG_YUDH,
2246  U_JG_YUDH_HE,
2247  U_JG_ZAIN,
2309 #ifndef U_HIDE_DEPRECATED_API
2310 
2317 #endif // U_HIDE_DEPRECATED_API
2318 } UJoiningGroup;
2319 
2327  /*
2328  * Note: UGraphemeClusterBreak constants are parsed by preparseucd.py.
2329  * It matches lines like
2330  * U_GCB_<Unicode Grapheme_Cluster_Break value name>
2331  */
2332 
2333  U_GCB_OTHER = 0, /*[XX]*/
2334  U_GCB_CONTROL = 1, /*[CN]*/
2335  U_GCB_CR = 2, /*[CR]*/
2336  U_GCB_EXTEND = 3, /*[EX]*/
2337  U_GCB_L = 4, /*[L]*/
2338  U_GCB_LF = 5, /*[LF]*/
2339  U_GCB_LV = 6, /*[LV]*/
2340  U_GCB_LVT = 7, /*[LVT]*/
2341  U_GCB_T = 8, /*[T]*/
2342  U_GCB_V = 9, /*[V]*/
2344  U_GCB_SPACING_MARK = 10, /*[SM]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
2346  U_GCB_PREPEND = 11, /*[PP]*/
2348  U_GCB_REGIONAL_INDICATOR = 12, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
2350  U_GCB_E_BASE = 13, /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
2352  U_GCB_E_BASE_GAZ = 14, /*[EBG]*/
2354  U_GCB_E_MODIFIER = 15, /*[EM]*/
2356  U_GCB_GLUE_AFTER_ZWJ = 16, /*[GAZ]*/
2358  U_GCB_ZWJ = 17, /*[ZWJ]*/
2359 
2360 #ifndef U_HIDE_DEPRECATED_API
2361 
2368 #endif // U_HIDE_DEPRECATED_API
2370 
2378 typedef enum UWordBreakValues {
2379  /*
2380  * Note: UWordBreakValues constants are parsed by preparseucd.py.
2381  * It matches lines like
2382  * U_WB_<Unicode Word_Break value name>
2383  */
2384 
2385  U_WB_OTHER = 0, /*[XX]*/
2386  U_WB_ALETTER = 1, /*[LE]*/
2387  U_WB_FORMAT = 2, /*[FO]*/
2388  U_WB_KATAKANA = 3, /*[KA]*/
2389  U_WB_MIDLETTER = 4, /*[ML]*/
2390  U_WB_MIDNUM = 5, /*[MN]*/
2391  U_WB_NUMERIC = 6, /*[NU]*/
2392  U_WB_EXTENDNUMLET = 7, /*[EX]*/
2394  U_WB_CR = 8, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
2396  U_WB_EXTEND = 9, /*[Extend]*/
2398  U_WB_LF = 10, /*[LF]*/
2400  U_WB_MIDNUMLET =11, /*[MB]*/
2402  U_WB_NEWLINE =12, /*[NL]*/
2404  U_WB_REGIONAL_INDICATOR = 13, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
2406  U_WB_HEBREW_LETTER = 14, /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */
2408  U_WB_SINGLE_QUOTE = 15, /*[SQ]*/
2410  U_WB_DOUBLE_QUOTE = 16, /*[DQ]*/
2412  U_WB_E_BASE = 17, /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
2414  U_WB_E_BASE_GAZ = 18, /*[EBG]*/
2416  U_WB_E_MODIFIER = 19, /*[EM]*/
2418  U_WB_GLUE_AFTER_ZWJ = 20, /*[GAZ]*/
2420  U_WB_ZWJ = 21, /*[ZWJ]*/
2422  U_WB_WSEGSPACE = 22, /*[WSEGSPACE]*/
2423 
2424 #ifndef U_HIDE_DEPRECATED_API
2425 
2432 #endif // U_HIDE_DEPRECATED_API
2434 
2441 typedef enum USentenceBreak {
2442  /*
2443  * Note: USentenceBreak constants are parsed by preparseucd.py.
2444  * It matches lines like
2445  * U_SB_<Unicode Sentence_Break value name>
2446  */
2447 
2448  U_SB_OTHER = 0, /*[XX]*/
2449  U_SB_ATERM = 1, /*[AT]*/
2450  U_SB_CLOSE = 2, /*[CL]*/
2451  U_SB_FORMAT = 3, /*[FO]*/
2452  U_SB_LOWER = 4, /*[LO]*/
2453  U_SB_NUMERIC = 5, /*[NU]*/
2454  U_SB_OLETTER = 6, /*[LE]*/
2455  U_SB_SEP = 7, /*[SE]*/
2456  U_SB_SP = 8, /*[SP]*/
2457  U_SB_STERM = 9, /*[ST]*/
2458  U_SB_UPPER = 10, /*[UP]*/
2459  U_SB_CR = 11, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
2460  U_SB_EXTEND = 12, /*[EX]*/
2461  U_SB_LF = 13, /*[LF]*/
2462  U_SB_SCONTINUE = 14, /*[SC]*/
2463 #ifndef U_HIDE_DEPRECATED_API
2464 
2471 #endif // U_HIDE_DEPRECATED_API
2472 } USentenceBreak;
2473 
2480 typedef enum ULineBreak {
2481  /*
2482  * Note: ULineBreak constants are parsed by preparseucd.py.
2483  * It matches lines like
2484  * U_LB_<Unicode Line_Break value name>
2485  */
2486 
2487  U_LB_UNKNOWN = 0, /*[XX]*/
2488  U_LB_AMBIGUOUS = 1, /*[AI]*/
2489  U_LB_ALPHABETIC = 2, /*[AL]*/
2490  U_LB_BREAK_BOTH = 3, /*[B2]*/
2491  U_LB_BREAK_AFTER = 4, /*[BA]*/
2492  U_LB_BREAK_BEFORE = 5, /*[BB]*/
2493  U_LB_MANDATORY_BREAK = 6, /*[BK]*/
2494  U_LB_CONTINGENT_BREAK = 7, /*[CB]*/
2495  U_LB_CLOSE_PUNCTUATION = 8, /*[CL]*/
2496  U_LB_COMBINING_MARK = 9, /*[CM]*/
2497  U_LB_CARRIAGE_RETURN = 10, /*[CR]*/
2498  U_LB_EXCLAMATION = 11, /*[EX]*/
2499  U_LB_GLUE = 12, /*[GL]*/
2500  U_LB_HYPHEN = 13, /*[HY]*/
2501  U_LB_IDEOGRAPHIC = 14, /*[ID]*/
2503  U_LB_INSEPARABLE = 15, /*[IN]*/
2504  U_LB_INSEPERABLE = U_LB_INSEPARABLE,
2505  U_LB_INFIX_NUMERIC = 16, /*[IS]*/
2506  U_LB_LINE_FEED = 17, /*[LF]*/
2507  U_LB_NONSTARTER = 18, /*[NS]*/
2508  U_LB_NUMERIC = 19, /*[NU]*/
2509  U_LB_OPEN_PUNCTUATION = 20, /*[OP]*/
2510  U_LB_POSTFIX_NUMERIC = 21, /*[PO]*/
2511  U_LB_PREFIX_NUMERIC = 22, /*[PR]*/
2512  U_LB_QUOTATION = 23, /*[QU]*/
2513  U_LB_COMPLEX_CONTEXT = 24, /*[SA]*/
2514  U_LB_SURROGATE = 25, /*[SG]*/
2515  U_LB_SPACE = 26, /*[SP]*/
2516  U_LB_BREAK_SYMBOLS = 27, /*[SY]*/
2517  U_LB_ZWSPACE = 28, /*[ZW]*/
2519  U_LB_NEXT_LINE = 29, /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
2521  U_LB_WORD_JOINER = 30, /*[WJ]*/
2523  U_LB_H2 = 31, /*[H2]*/ /* from here on: new in Unicode 4.1/ICU 3.4 */
2525  U_LB_H3 = 32, /*[H3]*/
2527  U_LB_JL = 33, /*[JL]*/
2529  U_LB_JT = 34, /*[JT]*/
2531  U_LB_JV = 35, /*[JV]*/
2533  U_LB_CLOSE_PARENTHESIS = 36, /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */
2535  U_LB_CONDITIONAL_JAPANESE_STARTER = 37,/*[CJ]*/ /* new in Unicode 6.1/ICU 49 */
2537  U_LB_HEBREW_LETTER = 38, /*[HL]*/ /* new in Unicode 6.1/ICU 49 */
2539  U_LB_REGIONAL_INDICATOR = 39,/*[RI]*/ /* new in Unicode 6.2/ICU 50 */
2541  U_LB_E_BASE = 40, /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
2543  U_LB_E_MODIFIER = 41, /*[EM]*/
2545  U_LB_ZWJ = 42, /*[ZWJ]*/
2547  U_LB_AKSARA = 43, /*[AK]*/
2549  U_LB_AKSARA_PREBASE = 44, /*[AP]*/
2551  U_LB_AKSARA_START = 45, /*[AS]*/
2553  U_LB_VIRAMA_FINAL = 46, /*[VF]*/
2555  U_LB_VIRAMA = 47, /*[VI]*/
2558 #ifndef U_HIDE_DEPRECATED_API
2559 
2566 #endif // U_HIDE_DEPRECATED_API
2567 } ULineBreak;
2568 
2575 typedef enum UNumericType {
2576  /*
2577  * Note: UNumericType constants are parsed by preparseucd.py.
2578  * It matches lines like
2579  * U_NT_<Unicode Numeric_Type value name>
2580  */
2581 
2582  U_NT_NONE, /*[None]*/
2583  U_NT_DECIMAL, /*[de]*/
2584  U_NT_DIGIT, /*[di]*/
2585  U_NT_NUMERIC, /*[nu]*/
2586 #ifndef U_HIDE_DEPRECATED_API
2587 
2594 #endif // U_HIDE_DEPRECATED_API
2595 } UNumericType;
2596 
2603 typedef enum UHangulSyllableType {
2604  /*
2605  * Note: UHangulSyllableType constants are parsed by preparseucd.py.
2606  * It matches lines like
2607  * U_HST_<Unicode Hangul_Syllable_Type value name>
2608  */
2609 
2610  U_HST_NOT_APPLICABLE, /*[NA]*/
2611  U_HST_LEADING_JAMO, /*[L]*/
2612  U_HST_VOWEL_JAMO, /*[V]*/
2613  U_HST_TRAILING_JAMO, /*[T]*/
2614  U_HST_LV_SYLLABLE, /*[LV]*/
2615  U_HST_LVT_SYLLABLE, /*[LVT]*/
2616 #ifndef U_HIDE_DEPRECATED_API
2617 
2624 #endif // U_HIDE_DEPRECATED_API
2626 
2634  /*
2635  * Note: UIndicPositionalCategory constants are parsed by preparseucd.py.
2636  * It matches lines like
2637  * U_INPC_<Unicode Indic_Positional_Category value name>
2638  */
2639 
2673 
2681  /*
2682  * Note: UIndicSyllabicCategory constants are parsed by preparseucd.py.
2683  * It matches lines like
2684  * U_INSC_<Unicode Indic_Syllabic_Category value name>
2685  */
2686 
2762 
2769 typedef enum UIndicConjunctBreak {
2770  /*
2771  * Note: UIndicConjunctBreak constants are parsed by preparseucd.py.
2772  * It matches lines like
2773  * U_INCB_<Unicode Indic_Conjunct_Break value name>
2774  */
2775 
2785 
2792 typedef enum UVerticalOrientation {
2793  /*
2794  * Note: UVerticalOrientation constants are parsed by preparseucd.py.
2795  * It matches lines like
2796  * U_VO_<Unicode Vertical_Orientation value name>
2797  */
2798 
2808 
2816 typedef enum UIdentifierStatus {
2817  /*
2818  * Note: UIdentifierStatus constants are parsed by preparseucd.py.
2819  * It matches lines like
2820  * U_ID_STATUS_<Unicode Identifier_Status value name>
2821  */
2822 
2828 
2836 typedef enum UIdentifierType {
2837  /*
2838  * Note: UIdentifierType constants are parsed by preparseucd.py.
2839  * It matches lines like
2840  * U_ID_TYPE_<Unicode Identifier_Type value name>
2841  */
2842 
2867 } UIdentifierType;
2868 
2896 U_CAPI UBool U_EXPORT2
2898 
2923 U_CAPI UBool U_EXPORT2
2924 u_stringHasBinaryProperty(const UChar *s, int32_t length, UProperty which);
2925 
2941 U_CAPI const USet * U_EXPORT2
2942 u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode);
2943 
2956 U_CAPI UBool U_EXPORT2
2958 
2971 U_CAPI UBool U_EXPORT2
2973 
2986 U_CAPI UBool U_EXPORT2
2988 
3007 U_CAPI UBool U_EXPORT2
3009 
3048 U_CAPI int32_t U_EXPORT2
3050 
3069 U_CAPI int32_t U_EXPORT2
3071 
3098 U_CAPI int32_t U_EXPORT2
3100 
3116 U_CAPI const UCPMap * U_EXPORT2
3117 u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode);
3118 
3141 U_CAPI double U_EXPORT2
3143 
3151 #define U_NO_NUMERIC_VALUE ((double)-123456789.)
3152 
3176 U_CAPI UBool U_EXPORT2
3177 u_islower(UChar32 c);
3178 
3203 U_CAPI UBool U_EXPORT2
3204 u_isupper(UChar32 c);
3205 
3220 U_CAPI UBool U_EXPORT2
3221 u_istitle(UChar32 c);
3222 
3241 U_CAPI UBool U_EXPORT2
3242 u_isdigit(UChar32 c);
3243 
3262 U_CAPI UBool U_EXPORT2
3263 u_isalpha(UChar32 c);
3264 
3283 U_CAPI UBool U_EXPORT2
3284 u_isalnum(UChar32 c);
3285 
3306 U_CAPI UBool U_EXPORT2
3307 u_isxdigit(UChar32 c);
3308 
3322 U_CAPI UBool U_EXPORT2
3323 u_ispunct(UChar32 c);
3324 
3341 U_CAPI UBool U_EXPORT2
3342 u_isgraph(UChar32 c);
3343 
3369 U_CAPI UBool U_EXPORT2
3370 u_isblank(UChar32 c);
3371 
3394 U_CAPI UBool U_EXPORT2
3395 u_isdefined(UChar32 c);
3396 
3415 U_CAPI UBool U_EXPORT2
3416 u_isspace(UChar32 c);
3417 
3436 U_CAPI UBool U_EXPORT2
3438 
3476 U_CAPI UBool U_EXPORT2
3478 
3500 U_CAPI UBool U_EXPORT2
3501 u_iscntrl(UChar32 c);
3502 
3515 U_CAPI UBool U_EXPORT2
3517 
3533 U_CAPI UBool U_EXPORT2
3534 u_isprint(UChar32 c);
3535 
3553 U_CAPI UBool U_EXPORT2
3554 u_isbase(UChar32 c);
3555 
3572 U_CAPI UCharDirection U_EXPORT2
3574 
3590 U_CAPI UBool U_EXPORT2
3592 
3612 U_CAPI UChar32 U_EXPORT2
3614 
3631 U_CAPI UChar32 U_EXPORT2
3633 
3645 U_CAPI int8_t U_EXPORT2
3646 u_charType(UChar32 c);
3647 
3661 #define U_GET_GC_MASK(c) U_MASK(u_charType(c))
3662 
3680 typedef UBool U_CALLCONV
3681 UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type);
3682 
3702 U_CAPI void U_EXPORT2
3703 u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context);
3704 
3705 #if !UCONFIG_NO_NORMALIZATION
3706 
3714 U_CAPI uint8_t U_EXPORT2
3716 
3717 #endif
3718 
3742 U_CAPI int32_t U_EXPORT2
3744 
3754 U_CAPI UBlockCode U_EXPORT2
3756 
3789 U_CAPI int32_t U_EXPORT2
3790 u_charName(UChar32 code, UCharNameChoice nameChoice,
3791  char *buffer, int32_t bufferLength,
3792  UErrorCode *pErrorCode);
3793 
3794 #ifndef U_HIDE_DEPRECATED_API
3795 
3813 U_DEPRECATED int32_t U_EXPORT2
3815  char *dest, int32_t destCapacity,
3816  UErrorCode *pErrorCode);
3817 #endif /* U_HIDE_DEPRECATED_API */
3818 
3839 U_CAPI UChar32 U_EXPORT2
3840 u_charFromName(UCharNameChoice nameChoice,
3841  const char *name,
3842  UErrorCode *pErrorCode);
3843 
3861 typedef UBool U_CALLCONV UEnumCharNamesFn(void *context,
3862  UChar32 code,
3863  UCharNameChoice nameChoice,
3864  const char *name,
3865  int32_t length);
3866 
3888 U_CAPI void U_EXPORT2
3889 u_enumCharNames(UChar32 start, UChar32 limit,
3890  UEnumCharNamesFn *fn,
3891  void *context,
3892  UCharNameChoice nameChoice,
3893  UErrorCode *pErrorCode);
3894 
3926 U_CAPI const char* U_EXPORT2
3927 u_getPropertyName(UProperty property,
3928  UPropertyNameChoice nameChoice);
3929 
3949 U_CAPI UProperty U_EXPORT2
3950 u_getPropertyEnum(const char* alias);
3951 
3999 U_CAPI const char* U_EXPORT2
4001  int32_t value,
4002  UPropertyNameChoice nameChoice);
4003 
4035 U_CAPI int32_t U_EXPORT2
4037  const char* alias);
4038 
4053 U_CAPI UBool U_EXPORT2
4054 u_isIDStart(UChar32 c);
4055 
4070 U_CAPI UBool U_EXPORT2
4071 u_isIDPart(UChar32 c);
4072 
4086 U_CAPI bool U_EXPORT2
4088 
4121 U_CAPI int32_t U_EXPORT2
4122 u_getIDTypes(UChar32 c, UIdentifierType *types, int32_t capacity, UErrorCode *pErrorCode);
4123 
4144 U_CAPI UBool U_EXPORT2
4146 
4163 U_CAPI UBool U_EXPORT2
4165 
4184 U_CAPI UBool U_EXPORT2
4186 
4209 U_CAPI UChar32 U_EXPORT2
4210 u_tolower(UChar32 c);
4211 
4234 U_CAPI UChar32 U_EXPORT2
4235 u_toupper(UChar32 c);
4236 
4259 U_CAPI UChar32 U_EXPORT2
4260 u_totitle(UChar32 c);
4261 
4284 U_CAPI UChar32 U_EXPORT2
4285 u_foldCase(UChar32 c, uint32_t options);
4286 
4325 U_CAPI int32_t U_EXPORT2
4326 u_digit(UChar32 ch, int8_t radix);
4327 
4356 U_CAPI UChar32 U_EXPORT2
4357 u_forDigit(int32_t digit, int8_t radix);
4358 
4373 U_CAPI void U_EXPORT2
4374 u_charAge(UChar32 c, UVersionInfo versionArray);
4375 
4387 U_CAPI void U_EXPORT2
4388 u_getUnicodeVersion(UVersionInfo versionArray);
4389 
4390 #if !UCONFIG_NO_NORMALIZATION
4391 
4412 U_CAPI int32_t U_EXPORT2
4413 u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode);
4414 
4415 #endif
4416 
4417 
4419 
4420 #endif /*_UCHAR*/
4421 /*eof*/
uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]
The binary form of a version on ICU APIs is an array of 4 uint8_t.
Definition: uversion.h:59
Binary property Ideographic.
Definition: uchar.h:272
Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
Definition: uchar.h:1446
Enumerated property Indic_Positional_Category.
Definition: uchar.h:661
Binary property Changes_When_Lowercased.
Definition: uchar.h:424
Binary property IDS_Binary_Operator (new in Unicode 3.2).
Definition: uchar.h:276
Binary property ID_Compat_Math_Continue.
Definition: uchar.h:554
U_CAPI UBool u_isalnum(UChar32 c)
Determines whether the specified code point is an alphanumeric character (letter or digit) according ...
Binary property Case_Ignorable.
Definition: uchar.h:422
Miscellaneous property Identifier_Type.
Definition: uchar.h:808
Enumerated property NFC_Quick_Check.
Definition: uchar.h:616
U_CAPI UBool u_isbase(UChar32 c)
Non-standard: Determines whether the specified code point is a base character.
One more than the highest normal UJoiningGroup value.
Definition: uchar.h:2316
Miscellaneous property Script_Extensions (new in Unicode 6.0).
Definition: uchar.h:792
UIndicConjunctBreak
Indic Conjunct Break constants.
Definition: uchar.h:2769
Same as UBLOCK_PRIVATE_USE_AREA.
Definition: uchar.h:1388
First constant for enumerated/integer Unicode properties.
Definition: uchar.h:573
Binary property Modifier_Combining_Mark.
Definition: uchar.h:560
Binary property XID_Start.
Definition: uchar.h:329
Binary property Join_Control.
Definition: uchar.h:283
Binary property Logical_Order_Exception (new in Unicode 3.2).
Definition: uchar.h:287
struct UCPMap UCPMap
Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values.
Definition: ucpmap.h:30
Binary property White_Space.
Definition: uchar.h:322
String property Titlecase_Mapping.
Definition: uchar.h:765
U_CAPI UBool u_isISOControl(UChar32 c)
Determines whether the specified code point is an ISO control code.
U_CAPI int32_t u_getIntPropertyValue(UChar32 c, UProperty which)
Get the property value for an enumerated or integer Unicode property for a code point.
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition: umachine.h:110
U_CAPI UBool u_isblank(UChar32 c)
Determines whether the specified code point is a "blank" or "horizontal space", a character that visi...
One more than the highest normal UPropertyNameChoice value.
Definition: uchar.h:2107
U_CAPI UChar32 u_getBidiPairedBracket(UChar32 c)
Maps the specified character to its paired bracket character.
UIdentifierType
Identifier Type constants.
Definition: uchar.h:2836
One more than the last constant for enumerated/integer Unicode properties.
Definition: uchar.h:694
Enumerated property Numeric_Type.
Definition: uchar.h:601
Binary property xdigit (a C/POSIX character class).
Definition: uchar.h:418
Binary property Alphabetic.
Definition: uchar.h:210
First constant for double Unicode properties.
Definition: uchar.h:720
C API: Bit set option bit constants for various string and character processing functions.
U_CAPI UBool u_isdefined(UChar32 c)
Determines whether the specified code point is "defined", which usually means that it is assigned a c...
UBool UEnumCharNamesFn(void *context, UChar32 code, UCharNameChoice nameChoice, const char *name, int32_t length)
Type of a callback function for u_enumCharNames() that gets called for each Unicode character with th...
Definition: uchar.h:3861
Binary property of strings RGI_Emoji.
Definition: uchar.h:535
U_CAPI bool u_hasIDType(UChar32 c, UIdentifierType type)
Does the set of Identifier_Type values code point c contain the given type?
U_CAPI UChar32 u_totitle(UChar32 c)
The given character is mapped to its titlecase equivalent according to UnicodeData.txt; if none is defined, the character itself is returned.
U_CAPI UBool u_isspace(UChar32 c)
Determines if the specified character is a space character or not.
One more than the highest normal USentenceBreak value.
Definition: uchar.h:2470
Binary property Emoji_Component.
Definition: uchar.h:469
Binary property of strings Emoji_Keycap_Sequence.
Definition: uchar.h:500
Binary property Emoji.
Definition: uchar.h:441
Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNE...
Definition: uchar.h:838
U_CAPI UProperty u_getPropertyEnum(const char *alias)
Return the UProperty enum for a given property name, as specified in the Unicode database file Proper...
U_CAPI int32_t u_getIDTypes(UChar32 c, UIdentifierType *types, int32_t capacity, UErrorCode *pErrorCode)
Writes code point c&#39;s Identifier_Type as a list of UIdentifierType values to the output types array a...
String property Simple_Case_Folding.
Definition: uchar.h:753
U_CAPI int32_t u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Get the FC_NFKC_Closure property string for a character.
Binary property NFC_Inert.
Definition: uchar.h:365
Binary property graph (a C/POSIX character class).
Definition: uchar.h:408
String property Bidi_Mirroring_Glyph.
Definition: uchar.h:736
One more than the last constant for bit-mask Unicode properties.
Definition: uchar.h:713
U_CAPI UBool u_isULowercase(UChar32 c)
Check if a code point has the Lowercase Unicode property.
Enumerated property Block.
Definition: uchar.h:576
Represents a nonexistent or invalid property or property value.
Definition: uchar.h:818
Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0.
Definition: uchar.h:2503
Binary property IDS_Unary_Operator.
Definition: uchar.h:542
Binary property Radical (new in Unicode 3.2).
Definition: uchar.h:302
UCharDirection
This specifies the language directional property of a character set.
Definition: uchar.h:1023
Binary property IDS_Trinary_Operator (new in Unicode 3.2).
Definition: uchar.h:280
Binary property Grapheme_Link (new in Unicode 3.2).
Definition: uchar.h:254
Enumerated property Decomposition_Type.
Definition: uchar.h:582
Binary property Regional_Indicator.
Definition: uchar.h:474
U_CAPI UChar32 u_foldCase(UChar32 c, uint32_t options)
The given character is mapped to its case folding equivalent according to UnicodeData.txt and CaseFolding.txt; if the character has no case folding equivalent, the character itself is returned.
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: platform.h:837
String property Case_Folding.
Definition: uchar.h:739
String property Name.
Definition: uchar.h:750
C API: This file defines an abstract map from Unicode code points to integer values.
String property Simple_Uppercase_Mapping.
Definition: uchar.h:762
U_CAPI UBool u_isxdigit(UChar32 c)
Determines whether the specified code point is a hexadecimal digit.
U_CAPI UBool u_isgraph(UChar32 c)
Determines whether the specified code point is a "graphic" character (printable, excluding spaces)...
Enumerated property Bidi_Class.
Definition: uchar.h:571
Enumerated property General_Category.
Definition: uchar.h:589
Binary property Prepended_Concatenation_Mark.
Definition: uchar.h:479
U_CAPI UBool u_isJavaIDPart(UChar32 c)
Determines if the specified character is permissible in a Java identifier.
Sm.
Definition: uchar.h:886
U_CAPI void u_charAge(UChar32 c, UVersionInfo versionArray)
Get the "age" of the code point.
String property Unicode_1_Name.
Definition: uchar.h:771
U_CAPI int32_t u_getIntPropertyMinValue(UProperty which)
Get the minimum value for an enumerated/integer/binary Unicode property.
U_CAPI void u_getUnicodeVersion(UVersionInfo versionArray)
Gets the Unicode version information.
UNumericType
Numeric Type constants.
Definition: uchar.h:2575
U_CAPI UChar32 u_charMirror(UChar32 c)
Maps the specified character to a "mirror-image" character.
One more than the highest normal UBidiPairedBracketType value.
Definition: uchar.h:1114
Binary property Pattern_White_Space (new in Unicode 4.1).
Definition: uchar.h:393
Enumerated property Vertical_Orientation.
Definition: uchar.h:675
Close paired bracket.
Definition: uchar.h:1106
UBool UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type)
Callback from u_enumCharTypes(), is called for each contiguous range of code points c (where start<=c...
Definition: uchar.h:3681
Binary property Changes_When_Casefolded.
Definition: uchar.h:430
U_CAPI void u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context)
Enumerate efficiently all code points with their Unicode general categories.
Binary property NFD_Inert.
Definition: uchar.h:351
Binary property Diacritic.
Definition: uchar.h:235
Binary property Terminal_Punctuation.
Definition: uchar.h:311
U_CAPI UBool u_isMirrored(UChar32 c)
Determines whether the code point has the Bidi_Mirrored property.
U_CAPI const UCPMap * u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode)
Returns an immutable UCPMap for an enumerated/catalog/int-valued property.
One more than the highest normal UNumericType value.
Definition: uchar.h:2593
U_CAPI UBool u_isUWhiteSpace(UChar32 c)
Check if a code point has the White_Space Unicode property.
Enumerated property NFD_Quick_Check.
Definition: uchar.h:610
U_CAPI int32_t u_digit(UChar32 ch, int8_t radix)
Returns the decimal digit value of the code point in the specified radix.
U_CAPI uint8_t u_getCombiningClass(UChar32 c)
Returns the combining class of the code point as specified in UnicodeData.txt.
Enumerated property Identifier_Status.
Definition: uchar.h:682
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition: umachine.h:85
Binary property STerm (new in Unicode 4.0.1).
Definition: uchar.h:338
Enumerated property Joining_Group.
Definition: uchar.h:592
Binary property ID_Continue.
Definition: uchar.h:265
Binary property Emoji_Presentation.
Definition: uchar.h:448
U_CAPI int32_t u_getPropertyValueEnum(UProperty property, const char *alias)
Return the property value integer for a given value name, as specified in the Unicode database file P...
Open paired bracket.
Definition: uchar.h:1104
Binary property blank (a C/POSIX character class).
Definition: uchar.h:403
Binary property Quotation_Mark.
Definition: uchar.h:298
#define U_DEPRECATED
This is used to declare a function as a deprecated public ICU C API.
Definition: umachine.h:116
Binary property Changes_When_NFKC_Casefolded.
Definition: uchar.h:434
U_CAPI UBlockCode ublock_getCode(UChar32 c)
Returns the Unicode allocation block that contains the character.
First constant for binary Unicode properties.
Definition: uchar.h:212
U_CAPI UBool u_isIDStart(UChar32 c)
Determines if the specified character is permissible as the first character in an identifier accordin...
Binary property Noncharacter_Code_Point.
Definition: uchar.h:296
Binary property Hyphen.
Definition: uchar.h:260
U_CAPI UBool u_ispunct(UChar32 c)
Determines whether the specified code point is a punctuation character.
U_CAPI int32_t u_charName(UChar32 code, UCharNameChoice nameChoice, char *buffer, int32_t bufferLength, UErrorCode *pErrorCode)
Retrieve the name of a Unicode character.
Enumerated property East_Asian_Width.
Definition: uchar.h:586
ULineBreak
Line Break constants.
Definition: uchar.h:2480
U_CAPI double u_getNumericValue(UChar32 c)
Get the numeric value for a Unicode code point as defined in the Unicode Character Database...
U_CAPI UBool u_isUUppercase(UChar32 c)
Check if a code point has the Uppercase Unicode property.
One more than the highest UCharDirection value.
Definition: uchar.h:1084
U_CAPI UChar32 u_tolower(UChar32 c)
The given character is mapped to its lowercase equivalent according to UnicodeData.txt; if the character has no lowercase equivalent, the character itself is returned.
Binary property Full_Composition_Exclusion.
Definition: uchar.h:243
Bitmask property General_Category_Mask.
Definition: uchar.h:705
String property Simple_Titlecase_Mapping.
Definition: uchar.h:759
One more than the highest normal UGraphemeClusterBreak value.
Definition: uchar.h:2367
Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols".
Definition: uchar.h:1265
UDecompositionType
Decomposition Type constants.
Definition: uchar.h:2117
One more than the highest normal ULineBreak value.
Definition: uchar.h:2565
UHangulSyllableType
Hangul Syllable Type constants.
Definition: uchar.h:2603
String property Simple_Lowercase_Mapping.
Definition: uchar.h:756
UIndicPositionalCategory
Indic Positional Category constants.
Definition: uchar.h:2633
Binary property print (a C/POSIX character class).
Definition: uchar.h:413
Binary property Case_Sensitive.
Definition: uchar.h:333
Standard or synthetic character name.
Definition: uchar.h:2074
Binary property Bidi_Mirrored.
Definition: uchar.h:223
UVerticalOrientation
Vertical Orientation constants.
Definition: uchar.h:2792
Binary property NFKC_Inert.
Definition: uchar.h:372
U_CAPI void u_enumCharNames(UChar32 start, UChar32 limit, UEnumCharNamesFn *fn, void *context, UCharNameChoice nameChoice, UErrorCode *pErrorCode)
Enumerate all assigned Unicode characters between the start and limit code points (start inclusive...
Binary property Changes_When_Casemapped.
Definition: uchar.h:432
First constant for string Unicode properties.
Definition: uchar.h:733
Binary property Grapheme_Extend (new in Unicode 3.2).
Definition: uchar.h:251
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:449
U_CAPI UBool u_isUAlphabetic(UChar32 c)
Check if a code point has the Alphabetic Unicode property.
UGraphemeClusterBreak
Grapheme Cluster Break constants.
Definition: uchar.h:2326
New No_Block value in Unicode 4.
Definition: uchar.h:1130
Binary property Extender.
Definition: uchar.h:239
Double property Numeric_Value.
Definition: uchar.h:718
Binary property Math.
Definition: uchar.h:292
Unicode character name (Name property).
Definition: uchar.h:2064
U_CAPI const char * u_getPropertyName(UProperty property, UPropertyNameChoice nameChoice)
Return the Unicode name for a given property, as given in the Unicode database file PropertyAliases...
Binary property Grapheme_Base (new in Unicode 3.2).
Definition: uchar.h:247
Binary property NFKD_Inert.
Definition: uchar.h:358
U_CAPI UBool u_isIDIgnorable(UChar32 c)
Determines if the specified character should be regarded as an ignorable character in an identifier...
Enumerated property Indic_Conjunct_Break.
Definition: uchar.h:688
UCharCategory
Data for enumerated Unicode general category types.
Definition: uchar.h:826
Enumerated property Sentence_Break (new in Unicode 4.1).
Definition: uchar.h:643
Binary property Lowercase.
Definition: uchar.h:290
First constant for bit-mask Unicode properties.
Definition: uchar.h:707
Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
Definition: uchar.h:653
Binary property of strings Basic_Emoji.
Definition: uchar.h:493
U_CAPI UChar32 u_toupper(UChar32 c)
The given character is mapped to its uppercase equivalent according to UnicodeData.txt; if the character has no uppercase equivalent, the character itself is returned.
USentenceBreak
Sentence Break constants.
Definition: uchar.h:2441
Binary property Extended_Pictographic.
Definition: uchar.h:486
Enumerated property Indic_Syllabic_Category.
Definition: uchar.h:668
Binary property Unified_Ideograph (new in Unicode 3.2).
Definition: uchar.h:315
Enumerated property Canonical_Combining_Class.
Definition: uchar.h:579
UCharNameChoice
Selector constants for u_charName().
Definition: uchar.h:2062
One more than the last constant for binary Unicode properties.
Definition: uchar.h:566
Enumerated property Script.
Definition: uchar.h:604
Unicode 3.2 renames this block to "Greek and Coptic".
Definition: uchar.h:1157
Binary property Hex_Digit.
Definition: uchar.h:257
String property Uppercase_Mapping.
Definition: uchar.h:775
UPropertyNameChoice
Selector constants for u_getPropertyName() and u_getPropertyValueName().
Definition: uchar.h:2099
U_CAPI UBool u_isupper(UChar32 c)
Determines whether the specified code point has the general category "Lu" (uppercase letter)...
String property Lowercase_Mapping.
Definition: uchar.h:747
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:400
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition: umachine.h:86
Enumerated property NFKC_Quick_Check.
Definition: uchar.h:619
Not a paired bracket.
Definition: uchar.h:1102
U_CAPI UBool u_stringHasBinaryProperty(const UChar *s, int32_t length, UProperty which)
Returns true if the property is true for the string.
UProperty
Selection constants for Unicode properties.
Definition: uchar.h:196
U_CAPI const USet * u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode)
Returns a frozen USet for a binary property.
Enumerated property Hangul_Syllable_Type, new in Unicode 4.
Definition: uchar.h:607
Binary property Dash.
Definition: uchar.h:225
Binary property alnum (a C/POSIX character class).
Definition: uchar.h:398
Cf.
Definition: uchar.h:870
Binary property Variation_Selector (new in Unicode 4.0.1).
Definition: uchar.h:344
Binary property of strings RGI_Emoji_ZWJ_Sequence.
Definition: uchar.h:528
One more than the highest normal UHangulSyllableType value.
Definition: uchar.h:2623
UBlockCode
Constants for Unicode blocks, see the Unicode Data file Blocks.txt.
Definition: uchar.h:1122
Enumerated property Word_Break (new in Unicode 4.1).
Definition: uchar.h:648
Binary property Deprecated (new in Unicode 3.2).
Definition: uchar.h:232
Binary property Bidi_Control.
Definition: uchar.h:218
Binary property XID_Continue.
Definition: uchar.h:326
Same as UBLOCK_PRIVATE_USE.
Definition: uchar.h:1378
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:509
Binary property Uppercase.
Definition: uchar.h:318
Binary property Changes_When_Uppercased.
Definition: uchar.h:426
UJoiningGroup
Joining Group constants.
Definition: uchar.h:2189
Binary property Cased.
Definition: uchar.h:420
struct USet USet
USet is the C API type corresponding to C++ class UnicodeSet.
Definition: uset.h:54
One more than the highest normal UEastAsianWidth value.
Definition: uchar.h:2047
U_CAPI UCharDirection u_charDirection(UChar32 c)
Returns the bidirectional category value for the code point, which is used in the Unicode bidirection...
One more than the highest normal UDecompositionType value.
Definition: uchar.h:2149
Cs.
Definition: uchar.h:874
UEastAsianWidth
East Asian Width constants.
Definition: uchar.h:2027
Enumerated property Trail_Canonical_Combining_Class.
Definition: uchar.h:633
U_CAPI int8_t u_charType(UChar32 c)
Returns the general category value for the code point.
U_CAPI UBool u_hasBinaryProperty(UChar32 c, UProperty which)
Check a binary Unicode property for a code point.
Non-category for unassigned and non-character code points.
Definition: uchar.h:836
First constant for Unicode properties with unusual value types.
Definition: uchar.h:794
U_CAPI int32_t u_charDigitValue(UChar32 c)
Returns the decimal digit value of a decimal digit character.
U_CAPI int32_t u_getIntPropertyMaxValue(UProperty which)
Get the maximum value for an enumerated/integer/binary Unicode property.
UWordBreakValues
Word Break constants.
Definition: uchar.h:2378
Binary property of strings RGI_Emoji_Modifier_Sequence.
Definition: uchar.h:507
U_CAPI UBool u_isJavaIDStart(UChar32 c)
Determines if the specified character is permissible as the first character in a Java identifier...
Enumerated property Joining_Type.
Definition: uchar.h:595
One more than the last constant for double Unicode properties.
Definition: uchar.h:726
Binary property Emoji_Modifier.
Definition: uchar.h:455
Basic definitions for ICU, for both C and C++ APIs.
U_CAPI UBool u_isWhitespace(UChar32 c)
Determines if the specified code point is a whitespace character according to Java/ICU.
U_CAPI UBool u_isJavaSpaceChar(UChar32 c)
Determine if the specified code point is a space character according to Java.
String property Bidi_Paired_Bracket (new in Unicode 6.3).
Definition: uchar.h:778
Enumerated property Lead_Canonical_Combining_Class.
Definition: uchar.h:626
Binary property ASCII_Hex_Digit.
Definition: uchar.h:214
UJoiningType
Joining Type constants.
Definition: uchar.h:2159
Binary property Soft_Dotted (new in Unicode 3.2).
Definition: uchar.h:307
One more than the last constant for string Unicode properties.
Definition: uchar.h:784
Binary property Emoji_Modifier_Base.
Definition: uchar.h:462
Binary Property Segment_Starter.
Definition: uchar.h:383
U_CAPI UChar32 u_forDigit(int32_t digit, int8_t radix)
Determines the character representation for a specific digit in the specified radix.
Binary property ID_Start.
Definition: uchar.h:269
Corrected name from NameAliases.txt.
Definition: uchar.h:2076
Binary property Changes_When_Titlecased.
Definition: uchar.h:428
Enumerated property Line_Break.
Definition: uchar.h:598
One more than the highest normal UJoiningType value.
Definition: uchar.h:2179
U_CAPI UBool u_isalpha(UChar32 c)
Determines whether the specified code point is a letter character.
U_CAPI const char * u_getPropertyValueName(UProperty property, int32_t value, UPropertyNameChoice nameChoice)
Return the Unicode name for a given property value, as given in the Unicode database file PropertyVal...
UIdentifierStatus
Identifier Status constants.
Definition: uchar.h:2816
Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1).
Definition: uchar.h:638
Binary property ID_Compat_Math_Start.
Definition: uchar.h:548
U_CAPI UBool u_iscntrl(UChar32 c)
Determines whether the specified code point is a control character (as defined by this function)...
Deprecated string property ISO_Comment.
Definition: uchar.h:743
Binary property of strings RGI_Emoji_Flag_Sequence.
Definition: uchar.h:514
U_CAPI UBool u_isIDPart(UChar32 c)
Determines if the specified character is permissible as a non-initial character of an identifier acco...
U_CAPI UBool u_islower(UChar32 c)
Determines whether the specified code point has the general category "Ll" (lowercase letter)...
Binary property Default_Ignorable_Code_Point (new in Unicode 3.2).
Definition: uchar.h:229
One more than the highest normal UBlockCode value.
Definition: uchar.h:2010
The Unicode_1_Name property value which is of little practical value.
Definition: uchar.h:2071
int32_t u_getISOComment(UChar32 c, char *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Returns an empty string.
One higher than the last enum UCharCategory constant.
Definition: uchar.h:904
Binary property of strings RGI_Emoji_Tag_Sequence.
Definition: uchar.h:521
One more than the highest normal UWordBreakValues value.
Definition: uchar.h:2431
Binary property Pattern_Syntax (new in Unicode 4.1).
Definition: uchar.h:388
One more than the last constant for Unicode properties with unusual value types.
Definition: uchar.h:814
U_CAPI UBool u_isprint(UChar32 c)
Determines whether the specified code point is a printable character.
UBidiPairedBracketType
Bidi Paired Bracket Type constants.
Definition: uchar.h:1094
One more than the highest normal UCharNameChoice value.
Definition: uchar.h:2082
UIndicSyllabicCategory
Indic Syllabic Category constants.
Definition: uchar.h:2680
U_CAPI UBool u_isdigit(UChar32 c)
Determines whether the specified code point is a digit character according to Java.
String property Age.
Definition: uchar.h:731
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:269
U_CAPI UChar32 u_charFromName(UCharNameChoice nameChoice, const char *name, UErrorCode *pErrorCode)
Find a Unicode character by its name and return its code point value.
Enumerated property NFKD_Quick_Check.
Definition: uchar.h:613
U_CAPI UBool u_istitle(UChar32 c)
Determines whether the specified code point is a titlecase letter.