/[pcre]/code/trunk/maint/ucptest.c
ViewVC logotype

Contents of /code/trunk/maint/ucptest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 97 - (show annotations)
Mon Mar 5 12:36:47 2007 UTC (8 years, 2 months ago) by ph10
Original Path: code/trunk/maintain/ucptest.c
File MIME type: text/plain
File size: 8275 byte(s)
Error occurred while calculating annotation data.
Applied Bob and Daniel's patches to convert the build system to automake. Added 
the maintain directory, containing files that are used for maintenance, but are 
not distributed. This is an intermediate step.
1 /***************************************************
2 * A program for testing the Unicode property table *
3 ***************************************************/
4
5 /* Copyright (c) University of Cambridge 2006 */
6
7 /* Compile thus:
8 gcc -o ucptest maintain/ucptest.c pcre_ucp_searchfuncs.c
9 */
10
11 #include <ctype.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include "pcre_internal.h"
16 #include "ucp.h"
17 #include "ucpinternal.h"
18
19
20 /* -------------------------------------------------------------------*/
21
22 #define CS (char *)
23 #define CCS (const char *)
24 #define CSS (char **)
25 #define US (unsigned char *)
26 #define CUS (const unsigned char *)
27 #define USS (unsigned char **)
28
29 /* -------------------------------------------------------------------*/
30
31
32
33
34 /*************************************************
35 * Print Unicode property info for a char *
36 *************************************************/
37
38 static void
39 print_prop(int c)
40 {
41 int fulltype, script, othercase;
42 int type = _pcre_ucp_findprop(c, &fulltype, &script);
43
44 printf("%04x ", c);
45 if (type < 0) printf("not found\n"); else
46 {
47 uschar *fulltypename = US"??";
48 uschar *typename = US"??";
49 uschar *scriptname = US"??";
50 switch (type)
51 {
52 case ucp_C: typename = US"Control"; break;
53 case ucp_L: typename = US"Letter"; break;
54 case ucp_M: typename = US"Mark"; break;
55 case ucp_N: typename = US"Number"; break;
56 case ucp_P: typename = US"Punctuation"; break;
57 case ucp_S: typename = US"Symbol"; break;
58 case ucp_Z: typename = US"Separator"; break;
59 }
60 switch (fulltype)
61 {
62 case ucp_Cc: fulltypename = US"Control"; break;
63 case ucp_Cf: fulltypename = US"Format"; break;
64 case ucp_Cn: fulltypename = US"Unassigned"; break;
65 case ucp_Co: fulltypename = US"Private use"; break;
66 case ucp_Cs: fulltypename = US"Surrogate"; break;
67 case ucp_Ll: fulltypename = US"Lower case letter"; break;
68 case ucp_Lm: fulltypename = US"Modifier letter"; break;
69 case ucp_Lo: fulltypename = US"Other letter"; break;
70 case ucp_Lt: fulltypename = US"Title case letter"; break;
71 case ucp_Lu: fulltypename = US"Upper case letter"; break;
72 case ucp_Mc: fulltypename = US"Spacing mark"; break;
73 case ucp_Me: fulltypename = US"Enclosing mark"; break;
74 case ucp_Mn: fulltypename = US"Non-spacing mark"; break;
75 case ucp_Nd: fulltypename = US"Decimal number"; break;
76 case ucp_Nl: fulltypename = US"Letter number"; break;
77 case ucp_No: fulltypename = US"Other number"; break;
78 case ucp_Pc: fulltypename = US"Connector punctuation"; break;
79 case ucp_Pd: fulltypename = US"Dash punctuation"; break;
80 case ucp_Pe: fulltypename = US"Close punctuation"; break;
81 case ucp_Pf: fulltypename = US"Final punctuation"; break;
82 case ucp_Pi: fulltypename = US"Initial punctuation"; break;
83 case ucp_Po: fulltypename = US"Other punctuation"; break;
84 case ucp_Ps: fulltypename = US"Open punctuation"; break;
85 case ucp_Sc: fulltypename = US"Currency symbol"; break;
86 case ucp_Sk: fulltypename = US"Modifier symbol"; break;
87 case ucp_Sm: fulltypename = US"Mathematical symbol"; break;
88 case ucp_So: fulltypename = US"Other symbol"; break;
89 case ucp_Zl: fulltypename = US"Line separator"; break;
90 case ucp_Zp: fulltypename = US"Paragraph separator"; break;
91 case ucp_Zs: fulltypename = US"Space separator"; break;
92 }
93 switch(script)
94 {
95 case ucp_Arabic: scriptname = US"Arabic"; break;
96 case ucp_Armenian: scriptname = US"Armenian"; break;
97 case ucp_Balinese: scriptname = US"Balinese"; break;
98 case ucp_Bengali: scriptname = US"Bengali"; break;
99 case ucp_Bopomofo: scriptname = US"Bopomofo"; break;
100 case ucp_Braille: scriptname = US"Braille"; break;
101 case ucp_Buginese: scriptname = US"Buginese"; break;
102 case ucp_Buhid: scriptname = US"Buhid"; break;
103 case ucp_Canadian_Aboriginal: scriptname = US"Canadian_Aboriginal"; break;
104 case ucp_Cherokee: scriptname = US"Cherokee"; break;
105 case ucp_Common: scriptname = US"Common"; break;
106 case ucp_Coptic: scriptname = US"Coptic"; break;
107 case ucp_Cuneiform: scriptname = US"Cuneiform"; break;
108 case ucp_Cypriot: scriptname = US"Cypriot"; break;
109 case ucp_Cyrillic: scriptname = US"Cyrillic"; break;
110 case ucp_Deseret: scriptname = US"Deseret"; break;
111 case ucp_Devanagari: scriptname = US"Devanagari"; break;
112 case ucp_Ethiopic: scriptname = US"Ethiopic"; break;
113 case ucp_Georgian: scriptname = US"Georgian"; break;
114 case ucp_Glagolitic: scriptname = US"Glagolitic"; break;
115 case ucp_Gothic: scriptname = US"Gothic"; break;
116 case ucp_Greek: scriptname = US"Greek"; break;
117 case ucp_Gujarati: scriptname = US"Gujarati"; break;
118 case ucp_Gurmukhi: scriptname = US"Gurmukhi"; break;
119 case ucp_Han: scriptname = US"Han"; break;
120 case ucp_Hangul: scriptname = US"Hangul"; break;
121 case ucp_Hanunoo: scriptname = US"Hanunoo"; break;
122 case ucp_Hebrew: scriptname = US"Hebrew"; break;
123 case ucp_Hiragana: scriptname = US"Hiragana"; break;
124 case ucp_Inherited: scriptname = US"Inherited"; break;
125 case ucp_Kannada: scriptname = US"Kannada"; break;
126 case ucp_Katakana: scriptname = US"Katakana"; break;
127 case ucp_Kharoshthi: scriptname = US"Kharoshthi"; break;
128 case ucp_Khmer: scriptname = US"Khmer"; break;
129 case ucp_Lao: scriptname = US"Lao"; break;
130 case ucp_Latin: scriptname = US"Latin"; break;
131 case ucp_Limbu: scriptname = US"Limbu"; break;
132 case ucp_Linear_B: scriptname = US"Linear_B"; break;
133 case ucp_Malayalam: scriptname = US"Malayalam"; break;
134 case ucp_Mongolian: scriptname = US"Mongolian"; break;
135 case ucp_Myanmar: scriptname = US"Myanmar"; break;
136 case ucp_New_Tai_Lue: scriptname = US"New_Tai_Lue"; break;
137 case ucp_Nko: scriptname = US"Nko"; break;
138 case ucp_Ogham: scriptname = US"Ogham"; break;
139 case ucp_Old_Italic: scriptname = US"Old_Italic"; break;
140 case ucp_Old_Persian: scriptname = US"Old_Persian"; break;
141 case ucp_Oriya: scriptname = US"Oriya"; break;
142 case ucp_Osmanya: scriptname = US"Osmanya"; break;
143 case ucp_Phags_Pa: scriptname = US"Phags_Pa"; break;
144 case ucp_Phoenician: scriptname = US"Phoenician"; break;
145 case ucp_Runic: scriptname = US"Runic"; break;
146 case ucp_Shavian: scriptname = US"Shavian"; break;
147 case ucp_Sinhala: scriptname = US"Sinhala"; break;
148 case ucp_Syloti_Nagri: scriptname = US"Syloti_Nagri"; break;
149 case ucp_Syriac: scriptname = US"Syriac"; break;
150 case ucp_Tagalog: scriptname = US"Tagalog"; break;
151 case ucp_Tagbanwa: scriptname = US"Tagbanwa"; break;
152 case ucp_Tai_Le: scriptname = US"Tai_Le"; break;
153 case ucp_Tamil: scriptname = US"Tamil"; break;
154 case ucp_Telugu: scriptname = US"Telugu"; break;
155 case ucp_Thaana: scriptname = US"Thaana"; break;
156 case ucp_Thai: scriptname = US"Thai"; break;
157 case ucp_Tibetan: scriptname = US"Tibetan"; break;
158 case ucp_Tifinagh: scriptname = US"Tifinagh"; break;
159 case ucp_Ugaritic: scriptname = US"Ugaritic"; break;
160 case ucp_Yi: scriptname = US"Yi"; break;
161 }
162
163 printf("%s: %s %s", typename, fulltypename, scriptname);
164 othercase = _pcre_ucp_othercase(c);
165 if (othercase >= 0) printf(" %04x", othercase);
166 printf("\n");
167 }
168 }
169
170
171
172 /*************************************************
173 * Main program *
174 *************************************************/
175
176 int
177 main(void)
178 {
179 uschar buffer[1024];
180 while (fgets(CS buffer, sizeof(buffer), stdin) != NULL)
181 {
182 uschar name[24];
183 uschar *s, *t;
184
185 printf("%s", buffer);
186 s = buffer;
187 while (isspace(*s)) s++;
188 if (*s == 0) continue;
189
190 for (t = name; *s != 0 && !isspace(*s); s++) *t++ = *s;
191 *t = 0;
192 while (isspace(*s)) s++;
193
194 if (strcmp(CS name, "findprop") == 0)
195 {
196 while (*s != 0)
197 {
198 uschar *endptr;
199 int c = strtoul(CS s, CSS(&endptr), 16);
200 print_prop(c);
201 s = endptr;
202 while (isspace(*s)) s++;
203 }
204 }
205
206 else printf("Unknown test command %s\n", name);
207 }
208
209 return 0;
210 }
211
212 /* End */

  ViewVC Help
Powered by ViewVC 1.1.5