/[pcre]/code/tags/pcre-6.2/pcre_printint.c
ViewVC logotype

Contents of /code/tags/pcre-6.2/pcre_printint.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 82 - (show annotations)
Sat Feb 24 21:41:01 2007 UTC (12 years, 7 months ago) by nigel
File MIME type: text/plain
File size: 12105 byte(s)
Tag code/trunk as code/tags/pcre-6.2.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2005 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains an PCRE private debugging function for printing out the
42 internal form of a compiled regular expression, along with some supporting
43 local functions. */
44
45
46 #include "pcre_internal.h"
47
48
49 static const char *OP_names[] = { OP_NAME_LIST };
50
51
52 /*************************************************
53 * Print single- or multi-byte character *
54 *************************************************/
55
56 static int
57 print_char(FILE *f, uschar *ptr, BOOL utf8)
58 {
59 int c = *ptr;
60
61 if (!utf8 || (c & 0xc0) != 0xc0)
62 {
63 if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
64 return 0;
65 }
66 else
67 {
68 int i;
69 int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
70 int s = 6*a;
71 c = (c & _pcre_utf8_table3[a]) << s;
72 for (i = 1; i <= a; i++)
73 {
74 /* This is a check for malformed UTF-8; it should only occur if the sanity
75 check has been turned off. Rather than swallow random bytes, just stop if
76 we hit a bad one. Print it with \X instead of \x as an indication. */
77
78 if ((ptr[i] & 0xc0) != 0x80)
79 {
80 fprintf(f, "\\X{%x}", c);
81 return i - 1;
82 }
83
84 /* The byte is OK */
85
86 s -= 6;
87 c |= (ptr[i] & 0x3f) << s;
88 }
89 if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
90 return a;
91 }
92 }
93
94
95
96 /*************************************************
97 * Find Unicode property name *
98 *************************************************/
99
100 static const char *
101 get_ucpname(int property)
102 {
103 #ifdef SUPPORT_UCP
104 int i;
105 for (i = _pcre_utt_size; i >= 0; i--)
106 {
107 if (property == _pcre_utt[i].value) break;
108 }
109 return (i >= 0)? _pcre_utt[i].name : "??";
110 #else
111 return "??";
112 #endif
113 }
114
115
116
117 /*************************************************
118 * Print compiled regex *
119 *************************************************/
120
121 /* Make this function work for a regex with integers either byte order.
122 However, we assume that what we are passed is a compiled regex. */
123
124 EXPORT void
125 _pcre_printint(pcre *external_re, FILE *f)
126 {
127 real_pcre *re = (real_pcre *)external_re;
128 uschar *codestart, *code;
129 BOOL utf8;
130
131 unsigned int options = re->options;
132 int offset = re->name_table_offset;
133 int count = re->name_count;
134 int size = re->name_entry_size;
135
136 if (re->magic_number != MAGIC_NUMBER)
137 {
138 offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
139 count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
140 size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
141 options = ((options << 24) & 0xff000000) |
142 ((options << 8) & 0x00ff0000) |
143 ((options >> 8) & 0x0000ff00) |
144 ((options >> 24) & 0x000000ff);
145 }
146
147 code = codestart = (uschar *)re + offset + count * size;
148 utf8 = (options & PCRE_UTF8) != 0;
149
150 for(;;)
151 {
152 uschar *ccode;
153 int c;
154 int extra = 0;
155
156 fprintf(f, "%3d ", (int)(code - codestart));
157
158 if (*code >= OP_BRA)
159 {
160 if (*code - OP_BRA > EXTRACT_BASIC_MAX)
161 fprintf(f, "%3d Bra extra\n", GET(code, 1));
162 else
163 fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
164 code += _pcre_OP_lengths[OP_BRA];
165 continue;
166 }
167
168 switch(*code)
169 {
170 case OP_END:
171 fprintf(f, " %s\n", OP_names[*code]);
172 fprintf(f, "------------------------------------------------------------------\n");
173 return;
174
175 case OP_OPT:
176 fprintf(f, " %.2x %s", code[1], OP_names[*code]);
177 break;
178
179 case OP_CHAR:
180 {
181 fprintf(f, " ");
182 do
183 {
184 code++;
185 code += 1 + print_char(f, code, utf8);
186 }
187 while (*code == OP_CHAR);
188 fprintf(f, "\n");
189 continue;
190 }
191 break;
192
193 case OP_CHARNC:
194 {
195 fprintf(f, " NC ");
196 do
197 {
198 code++;
199 code += 1 + print_char(f, code, utf8);
200 }
201 while (*code == OP_CHARNC);
202 fprintf(f, "\n");
203 continue;
204 }
205 break;
206
207 case OP_KETRMAX:
208 case OP_KETRMIN:
209 case OP_ALT:
210 case OP_KET:
211 case OP_ASSERT:
212 case OP_ASSERT_NOT:
213 case OP_ASSERTBACK:
214 case OP_ASSERTBACK_NOT:
215 case OP_ONCE:
216 case OP_COND:
217 case OP_REVERSE:
218 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
219 break;
220
221 case OP_BRANUMBER:
222 printf("%3d %s", GET2(code, 1), OP_names[*code]);
223 break;
224
225 case OP_CREF:
226 if (GET2(code, 1) == CREF_RECURSE)
227 fprintf(f, " Cond recurse");
228 else
229 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
230 break;
231
232 case OP_STAR:
233 case OP_MINSTAR:
234 case OP_PLUS:
235 case OP_MINPLUS:
236 case OP_QUERY:
237 case OP_MINQUERY:
238 case OP_TYPESTAR:
239 case OP_TYPEMINSTAR:
240 case OP_TYPEPLUS:
241 case OP_TYPEMINPLUS:
242 case OP_TYPEQUERY:
243 case OP_TYPEMINQUERY:
244 fprintf(f, " ");
245 if (*code >= OP_TYPESTAR)
246 {
247 fprintf(f, "%s", OP_names[code[1]]);
248 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
249 {
250 fprintf(f, " %s ", get_ucpname(code[2]));
251 extra = 1;
252 }
253 }
254 else extra = print_char(f, code+1, utf8);
255 fprintf(f, "%s", OP_names[*code]);
256 break;
257
258 case OP_EXACT:
259 case OP_UPTO:
260 case OP_MINUPTO:
261 fprintf(f, " ");
262 extra = print_char(f, code+3, utf8);
263 fprintf(f, "{");
264 if (*code != OP_EXACT) fprintf(f, ",");
265 fprintf(f, "%d}", GET2(code,1));
266 if (*code == OP_MINUPTO) fprintf(f, "?");
267 break;
268
269 case OP_TYPEEXACT:
270 case OP_TYPEUPTO:
271 case OP_TYPEMINUPTO:
272 fprintf(f, " %s", OP_names[code[3]]);
273 if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
274 {
275 fprintf(f, " %s ", get_ucpname(code[4]));
276 extra = 1;
277 }
278 fprintf(f, "{");
279 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
280 fprintf(f, "%d}", GET2(code,1));
281 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
282 break;
283
284 case OP_NOT:
285 if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
286 else fprintf(f, " [^\\x%02x]", c);
287 break;
288
289 case OP_NOTSTAR:
290 case OP_NOTMINSTAR:
291 case OP_NOTPLUS:
292 case OP_NOTMINPLUS:
293 case OP_NOTQUERY:
294 case OP_NOTMINQUERY:
295 if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
296 else fprintf(f, " [^\\x%02x]", c);
297 fprintf(f, "%s", OP_names[*code]);
298 break;
299
300 case OP_NOTEXACT:
301 case OP_NOTUPTO:
302 case OP_NOTMINUPTO:
303 if (isprint(c = code[3])) fprintf(f, " [^%c]{", c);
304 else fprintf(f, " [^\\x%02x]{", c);
305 if (*code != OP_NOTEXACT) fprintf(f, "0,");
306 fprintf(f, "%d}", GET2(code,1));
307 if (*code == OP_NOTMINUPTO) fprintf(f, "?");
308 break;
309
310 case OP_RECURSE:
311 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
312 break;
313
314 case OP_REF:
315 fprintf(f, " \\%d", GET2(code,1));
316 ccode = code + _pcre_OP_lengths[*code];
317 goto CLASS_REF_REPEAT;
318
319 case OP_CALLOUT:
320 fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
321 GET(code, 2 + LINK_SIZE));
322 break;
323
324 case OP_PROP:
325 case OP_NOTPROP:
326 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1]));
327 break;
328
329 /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
330 having this code always here, and it makes it less messy without all those
331 #ifdefs. */
332
333 case OP_CLASS:
334 case OP_NCLASS:
335 case OP_XCLASS:
336 {
337 int i, min, max;
338 BOOL printmap;
339
340 fprintf(f, " [");
341
342 if (*code == OP_XCLASS)
343 {
344 extra = GET(code, 1);
345 ccode = code + LINK_SIZE + 1;
346 printmap = (*ccode & XCL_MAP) != 0;
347 if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
348 }
349 else
350 {
351 printmap = TRUE;
352 ccode = code + 1;
353 }
354
355 /* Print a bit map */
356
357 if (printmap)
358 {
359 for (i = 0; i < 256; i++)
360 {
361 if ((ccode[i/8] & (1 << (i&7))) != 0)
362 {
363 int j;
364 for (j = i+1; j < 256; j++)
365 if ((ccode[j/8] & (1 << (j&7))) == 0) break;
366 if (i == '-' || i == ']') fprintf(f, "\\");
367 if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
368 if (--j > i)
369 {
370 if (j != i + 1) fprintf(f, "-");
371 if (j == '-' || j == ']') fprintf(f, "\\");
372 if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
373 }
374 i = j;
375 }
376 }
377 ccode += 32;
378 }
379
380 /* For an XCLASS there is always some additional data */
381
382 if (*code == OP_XCLASS)
383 {
384 int ch;
385 while ((ch = *ccode++) != XCL_END)
386 {
387 if (ch == XCL_PROP)
388 {
389 fprintf(f, "\\p{%s}", get_ucpname(*ccode++));
390 }
391 else if (ch == XCL_NOTPROP)
392 {
393 fprintf(f, "\\P{%s}", get_ucpname(*ccode++));
394 }
395 else
396 {
397 ccode += 1 + print_char(f, ccode, TRUE);
398 if (ch == XCL_RANGE)
399 {
400 fprintf(f, "-");
401 ccode += 1 + print_char(f, ccode, TRUE);
402 }
403 }
404 }
405 }
406
407 /* Indicate a non-UTF8 class which was created by negation */
408
409 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
410
411 /* Handle repeats after a class or a back reference */
412
413 CLASS_REF_REPEAT:
414 switch(*ccode)
415 {
416 case OP_CRSTAR:
417 case OP_CRMINSTAR:
418 case OP_CRPLUS:
419 case OP_CRMINPLUS:
420 case OP_CRQUERY:
421 case OP_CRMINQUERY:
422 fprintf(f, "%s", OP_names[*ccode]);
423 extra += _pcre_OP_lengths[*ccode];
424 break;
425
426 case OP_CRRANGE:
427 case OP_CRMINRANGE:
428 min = GET2(ccode,1);
429 max = GET2(ccode,3);
430 if (max == 0) fprintf(f, "{%d,}", min);
431 else fprintf(f, "{%d,%d}", min, max);
432 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
433 extra += _pcre_OP_lengths[*ccode];
434 break;
435 }
436 }
437 break;
438
439 /* Anything else is just an item with no data*/
440
441 default:
442 fprintf(f, " %s", OP_names[*code]);
443 break;
444 }
445
446 code += _pcre_OP_lengths[*code] + extra;
447 fprintf(f, "\n");
448 }
449 }
450
451 /* End of pcre_printint.c */

  ViewVC Help
Powered by ViewVC 1.1.5