/[pcre]/code/trunk/ucp.h
ViewVC logotype

Contents of /code/trunk/ucp.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1015 - (show annotations)
Sun Aug 26 16:07:14 2012 UTC (7 years, 1 month ago) by ph10
File MIME type: text/plain
File size: 4599 byte(s)
Improve extended grapheme clusters using a bit table.
1 /*************************************************
2 * Unicode Property Table handler *
3 *************************************************/
4
5 #ifndef _UCP_H
6 #define _UCP_H
7
8 /* This file contains definitions of the property values that are returned by
9 the UCD access macros. New values that are added for new releases of Unicode
10 should always be at the end of each enum, for backwards compatibility.
11
12 IMPORTANT: Note also that the specific numeric values of the enums have to be
13 the same as the values that are generated by the maint/MultiStage2.py script,
14 where the equivalent property descriptive names are listed in vectors. */
15
16 /* These are the general character categories. */
17
18 enum {
19 ucp_C, /* Other */
20 ucp_L, /* Letter */
21 ucp_M, /* Mark */
22 ucp_N, /* Number */
23 ucp_P, /* Punctuation */
24 ucp_S, /* Symbol */
25 ucp_Z /* Separator */
26 };
27
28 /* These are the particular character categories. */
29
30 enum {
31 ucp_Cc, /* Control */
32 ucp_Cf, /* Format */
33 ucp_Cn, /* Unassigned */
34 ucp_Co, /* Private use */
35 ucp_Cs, /* Surrogate */
36 ucp_Ll, /* Lower case letter */
37 ucp_Lm, /* Modifier letter */
38 ucp_Lo, /* Other letter */
39 ucp_Lt, /* Title case letter */
40 ucp_Lu, /* Upper case letter */
41 ucp_Mc, /* Spacing mark */
42 ucp_Me, /* Enclosing mark */
43 ucp_Mn, /* Non-spacing mark */
44 ucp_Nd, /* Decimal number */
45 ucp_Nl, /* Letter number */
46 ucp_No, /* Other number */
47 ucp_Pc, /* Connector punctuation */
48 ucp_Pd, /* Dash punctuation */
49 ucp_Pe, /* Close punctuation */
50 ucp_Pf, /* Final punctuation */
51 ucp_Pi, /* Initial punctuation */
52 ucp_Po, /* Other punctuation */
53 ucp_Ps, /* Open punctuation */
54 ucp_Sc, /* Currency symbol */
55 ucp_Sk, /* Modifier symbol */
56 ucp_Sm, /* Mathematical symbol */
57 ucp_So, /* Other symbol */
58 ucp_Zl, /* Line separator */
59 ucp_Zp, /* Paragraph separator */
60 ucp_Zs /* Space separator */
61 };
62
63 /* These are grapheme break properties. Note that the code for processing them
64 assumes that the values are less than 16. If more values are added that take
65 the number to 16 or more, the code will have to be rewritten. */
66
67 enum {
68 ucp_gbCR, /* 0 */
69 ucp_gbLF, /* 1 */
70 ucp_gbControl, /* 2 */
71 ucp_gbExtend, /* 3 */
72 ucp_gbPrepend, /* 4 */
73 ucp_gbSpacingMark, /* 5 */
74 ucp_gbL, /* 6 Hangul syllable type L */
75 ucp_gbV, /* 7 Hangul syllable type V */
76 ucp_gbT, /* 8 Hangul syllable type T */
77 ucp_gbLV, /* 9 Hangul syllable type LV */
78 ucp_gbLVT, /* 10 Hangul syllable type LVT */
79 ucp_gbOther /* 11 */
80 };
81
82 /* These are the script identifications. */
83
84 enum {
85 ucp_Arabic,
86 ucp_Armenian,
87 ucp_Bengali,
88 ucp_Bopomofo,
89 ucp_Braille,
90 ucp_Buginese,
91 ucp_Buhid,
92 ucp_Canadian_Aboriginal,
93 ucp_Cherokee,
94 ucp_Common,
95 ucp_Coptic,
96 ucp_Cypriot,
97 ucp_Cyrillic,
98 ucp_Deseret,
99 ucp_Devanagari,
100 ucp_Ethiopic,
101 ucp_Georgian,
102 ucp_Glagolitic,
103 ucp_Gothic,
104 ucp_Greek,
105 ucp_Gujarati,
106 ucp_Gurmukhi,
107 ucp_Han,
108 ucp_Hangul,
109 ucp_Hanunoo,
110 ucp_Hebrew,
111 ucp_Hiragana,
112 ucp_Inherited,
113 ucp_Kannada,
114 ucp_Katakana,
115 ucp_Kharoshthi,
116 ucp_Khmer,
117 ucp_Lao,
118 ucp_Latin,
119 ucp_Limbu,
120 ucp_Linear_B,
121 ucp_Malayalam,
122 ucp_Mongolian,
123 ucp_Myanmar,
124 ucp_New_Tai_Lue,
125 ucp_Ogham,
126 ucp_Old_Italic,
127 ucp_Old_Persian,
128 ucp_Oriya,
129 ucp_Osmanya,
130 ucp_Runic,
131 ucp_Shavian,
132 ucp_Sinhala,
133 ucp_Syloti_Nagri,
134 ucp_Syriac,
135 ucp_Tagalog,
136 ucp_Tagbanwa,
137 ucp_Tai_Le,
138 ucp_Tamil,
139 ucp_Telugu,
140 ucp_Thaana,
141 ucp_Thai,
142 ucp_Tibetan,
143 ucp_Tifinagh,
144 ucp_Ugaritic,
145 ucp_Yi,
146 /* New for Unicode 5.0: */
147 ucp_Balinese,
148 ucp_Cuneiform,
149 ucp_Nko,
150 ucp_Phags_Pa,
151 ucp_Phoenician,
152 /* New for Unicode 5.1: */
153 ucp_Carian,
154 ucp_Cham,
155 ucp_Kayah_Li,
156 ucp_Lepcha,
157 ucp_Lycian,
158 ucp_Lydian,
159 ucp_Ol_Chiki,
160 ucp_Rejang,
161 ucp_Saurashtra,
162 ucp_Sundanese,
163 ucp_Vai,
164 /* New for Unicode 5.2: */
165 ucp_Avestan,
166 ucp_Bamum,
167 ucp_Egyptian_Hieroglyphs,
168 ucp_Imperial_Aramaic,
169 ucp_Inscriptional_Pahlavi,
170 ucp_Inscriptional_Parthian,
171 ucp_Javanese,
172 ucp_Kaithi,
173 ucp_Lisu,
174 ucp_Meetei_Mayek,
175 ucp_Old_South_Arabian,
176 ucp_Old_Turkic,
177 ucp_Samaritan,
178 ucp_Tai_Tham,
179 ucp_Tai_Viet,
180 /* New for Unicode 6.0.0: */
181 ucp_Batak,
182 ucp_Brahmi,
183 ucp_Mandaic,
184 /* New for Unicode 6.1.0: */
185 ucp_Chakma,
186 ucp_Meroitic_Cursive,
187 ucp_Meroitic_Hieroglyphs,
188 ucp_Miao,
189 ucp_Sharada,
190 ucp_Sora_Sompeng,
191 ucp_Takri
192 };
193
194 #endif
195
196 /* End of ucp.h */

Properties

Name Value
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5