/[pcre]/code/trunk/maint/ucptest.c
ViewVC logotype

Contents of /code/trunk/maint/ucptest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 351 - (show annotations)
Fri Jul 4 18:27:16 2008 UTC (6 years, 10 months ago) by ph10
File MIME type: text/plain
File size: 8346 byte(s)
Error occurred while calculating annotation data.
Final tidies for new Unicode property code; upgrade to Unicode 5.1.0.
1 /***************************************************
2 * A program for testing the Unicode property table *
3 ***************************************************/
4
5 /* Copyright (c) University of Cambridge 2008 */
6
7 /* Compile thus:
8 gcc -DHAVE_CONFIG_H -o ucptest ucptest.c ../pcre_ucd.c ../pcre_tables.c
9 */
10
11 /* The program expects to read commands on stdin, and it writes output
12 to stdout. There is only one command, "findprop", followed by a list of Unicode
13 code points as hex numbers (without any prefixes). The output is one line per
14 character, giving its Unicode properties followed by its other case if there is
15 one. */
16
17 #ifdef HAVE_CONFIG_H
18 #include "../config.h"
19 #endif
20
21 #include <ctype.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include "../pcre_internal.h"
26 #include "../ucp.h"
27
28
29 /* -------------------------------------------------------------------*/
30
31 #define CS (char *)
32 #define CCS (const char *)
33 #define CSS (char **)
34 #define US (unsigned char *)
35 #define CUS (const unsigned char *)
36 #define USS (unsigned char **)
37
38 /* -------------------------------------------------------------------*/
39
40
41
42
43 /*************************************************
44 * Print Unicode property info for a char *
45 *************************************************/
46
47 static void
48 print_prop(int c)
49 {
50 int type = UCD_CATEGORY(c);
51 int fulltype = UCD_CHARTYPE(c);
52 int script = UCD_SCRIPT(c);
53 int othercase = UCD_OTHERCASE(c);
54
55 uschar *fulltypename = US"??";
56 uschar *typename = US"??";
57 uschar *scriptname = US"??";
58
59 switch (type)
60 {
61 case ucp_C: typename = US"Control"; break;
62 case ucp_L: typename = US"Letter"; break;
63 case ucp_M: typename = US"Mark"; break;
64 case ucp_N: typename = US"Number"; break;
65 case ucp_P: typename = US"Punctuation"; break;
66 case ucp_S: typename = US"Symbol"; break;
67 case ucp_Z: typename = US"Separator"; break;
68 }
69
70 switch (fulltype)
71 {
72 case ucp_Cc: fulltypename = US"Control"; break;
73 case ucp_Cf: fulltypename = US"Format"; break;
74 case ucp_Cn: fulltypename = US"Unassigned"; break;
75 case ucp_Co: fulltypename = US"Private use"; break;
76 case ucp_Cs: fulltypename = US"Surrogate"; break;
77 case ucp_Ll: fulltypename = US"Lower case letter"; break;
78 case ucp_Lm: fulltypename = US"Modifier letter"; break;
79 case ucp_Lo: fulltypename = US"Other letter"; break;
80 case ucp_Lt: fulltypename = US"Title case letter"; break;
81 case ucp_Lu: fulltypename = US"Upper case letter"; break;
82 case ucp_Mc: fulltypename = US"Spacing mark"; break;
83 case ucp_Me: fulltypename = US"Enclosing mark"; break;
84 case ucp_Mn: fulltypename = US"Non-spacing mark"; break;
85 case ucp_Nd: fulltypename = US"Decimal number"; break;
86 case ucp_Nl: fulltypename = US"Letter number"; break;
87 case ucp_No: fulltypename = US"Other number"; break;
88 case ucp_Pc: fulltypename = US"Connector punctuation"; break;
89 case ucp_Pd: fulltypename = US"Dash punctuation"; break;
90 case ucp_Pe: fulltypename = US"Close punctuation"; break;
91 case ucp_Pf: fulltypename = US"Final punctuation"; break;
92 case ucp_Pi: fulltypename = US"Initial punctuation"; break;
93 case ucp_Po: fulltypename = US"Other punctuation"; break;
94 case ucp_Ps: fulltypename = US"Open punctuation"; break;
95 case ucp_Sc: fulltypename = US"Currency symbol"; break;
96 case ucp_Sk: fulltypename = US"Modifier symbol"; break;
97 case ucp_Sm: fulltypename = US"Mathematical symbol"; break;
98 case ucp_So: fulltypename = US"Other symbol"; break;
99 case ucp_Zl: fulltypename = US"Line separator"; break;
100 case ucp_Zp: fulltypename = US"Paragraph separator"; break;
101 case ucp_Zs: fulltypename = US"Space separator"; break;
102 }
103
104 switch(script)
105 {
106 case ucp_Arabic: scriptname = US"Arabic"; break;
107 case ucp_Armenian: scriptname = US"Armenian"; break;
108 case ucp_Balinese: scriptname = US"Balinese"; break;
109 case ucp_Bengali: scriptname = US"Bengali"; break;
110 case ucp_Bopomofo: scriptname = US"Bopomofo"; break;
111 case ucp_Braille: scriptname = US"Braille"; break;
112 case ucp_Buginese: scriptname = US"Buginese"; break;
113 case ucp_Buhid: scriptname = US"Buhid"; break;
114 case ucp_Canadian_Aboriginal: scriptname = US"Canadian_Aboriginal"; break;
115 case ucp_Cherokee: scriptname = US"Cherokee"; break;
116 case ucp_Common: scriptname = US"Common"; break;
117 case ucp_Coptic: scriptname = US"Coptic"; break;
118 case ucp_Cuneiform: scriptname = US"Cuneiform"; break;
119 case ucp_Cypriot: scriptname = US"Cypriot"; break;
120 case ucp_Cyrillic: scriptname = US"Cyrillic"; break;
121 case ucp_Deseret: scriptname = US"Deseret"; break;
122 case ucp_Devanagari: scriptname = US"Devanagari"; break;
123 case ucp_Ethiopic: scriptname = US"Ethiopic"; break;
124 case ucp_Georgian: scriptname = US"Georgian"; break;
125 case ucp_Glagolitic: scriptname = US"Glagolitic"; break;
126 case ucp_Gothic: scriptname = US"Gothic"; break;
127 case ucp_Greek: scriptname = US"Greek"; break;
128 case ucp_Gujarati: scriptname = US"Gujarati"; break;
129 case ucp_Gurmukhi: scriptname = US"Gurmukhi"; break;
130 case ucp_Han: scriptname = US"Han"; break;
131 case ucp_Hangul: scriptname = US"Hangul"; break;
132 case ucp_Hanunoo: scriptname = US"Hanunoo"; break;
133 case ucp_Hebrew: scriptname = US"Hebrew"; break;
134 case ucp_Hiragana: scriptname = US"Hiragana"; break;
135 case ucp_Inherited: scriptname = US"Inherited"; break;
136 case ucp_Kannada: scriptname = US"Kannada"; break;
137 case ucp_Katakana: scriptname = US"Katakana"; break;
138 case ucp_Kharoshthi: scriptname = US"Kharoshthi"; break;
139 case ucp_Khmer: scriptname = US"Khmer"; break;
140 case ucp_Lao: scriptname = US"Lao"; break;
141 case ucp_Latin: scriptname = US"Latin"; break;
142 case ucp_Limbu: scriptname = US"Limbu"; break;
143 case ucp_Linear_B: scriptname = US"Linear_B"; break;
144 case ucp_Malayalam: scriptname = US"Malayalam"; break;
145 case ucp_Mongolian: scriptname = US"Mongolian"; break;
146 case ucp_Myanmar: scriptname = US"Myanmar"; break;
147 case ucp_New_Tai_Lue: scriptname = US"New_Tai_Lue"; break;
148 case ucp_Nko: scriptname = US"Nko"; break;
149 case ucp_Ogham: scriptname = US"Ogham"; break;
150 case ucp_Old_Italic: scriptname = US"Old_Italic"; break;
151 case ucp_Old_Persian: scriptname = US"Old_Persian"; break;
152 case ucp_Oriya: scriptname = US"Oriya"; break;
153 case ucp_Osmanya: scriptname = US"Osmanya"; break;
154 case ucp_Phags_Pa: scriptname = US"Phags_Pa"; break;
155 case ucp_Phoenician: scriptname = US"Phoenician"; break;
156 case ucp_Runic: scriptname = US"Runic"; break;
157 case ucp_Shavian: scriptname = US"Shavian"; break;
158 case ucp_Sinhala: scriptname = US"Sinhala"; break;
159 case ucp_Syloti_Nagri: scriptname = US"Syloti_Nagri"; break;
160 case ucp_Syriac: scriptname = US"Syriac"; break;
161 case ucp_Tagalog: scriptname = US"Tagalog"; break;
162 case ucp_Tagbanwa: scriptname = US"Tagbanwa"; break;
163 case ucp_Tai_Le: scriptname = US"Tai_Le"; break;
164 case ucp_Tamil: scriptname = US"Tamil"; break;
165 case ucp_Telugu: scriptname = US"Telugu"; break;
166 case ucp_Thaana: scriptname = US"Thaana"; break;
167 case ucp_Thai: scriptname = US"Thai"; break;
168 case ucp_Tibetan: scriptname = US"Tibetan"; break;
169 case ucp_Tifinagh: scriptname = US"Tifinagh"; break;
170 case ucp_Ugaritic: scriptname = US"Ugaritic"; break;
171 case ucp_Yi: scriptname = US"Yi"; break;
172 }
173
174 printf("%04x %s: %s %s", c, typename, fulltypename, scriptname);
175 if (othercase != c) printf(" %04x", othercase);
176 printf("\n");
177 }
178
179
180
181 /*************************************************
182 * Main program *
183 *************************************************/
184
185 int
186 main(void)
187 {
188 uschar buffer[1024];
189 while (fgets(CS buffer, sizeof(buffer), stdin) != NULL)
190 {
191 uschar name[24];
192 uschar *s, *t;
193
194 printf("%s", buffer);
195 s = buffer;
196 while (isspace(*s)) s++;
197 if (*s == 0) continue;
198
199 for (t = name; *s != 0 && !isspace(*s); s++) *t++ = *s;
200 *t = 0;
201 while (isspace(*s)) s++;
202
203 if (strcmp(CS name, "findprop") == 0)
204 {
205 while (*s != 0)
206 {
207 uschar *endptr;
208 int c = strtoul(CS s, CSS(&endptr), 16);
209 print_prop(c);
210 s = endptr;
211 while (isspace(*s)) s++;
212 }
213 }
214
215 else printf("Unknown test command %s\n", name);
216 }
217
218 return 0;
219 }
220
221 /* End */

  ViewVC Help
Powered by ViewVC 1.1.5