/[pcre]/code/tags/pcre-7.9/pcre_printint.src
ViewVC logotype

Contents of /code/tags/pcre-7.9/pcre_printint.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 417 - (show annotations)
Sat Apr 11 16:44:43 2009 UTC (6 years, 2 months ago) by ph10
File MIME type: application/x-wais-source
File size: 14464 byte(s)
Error occurred while calculating annotation data.
Tag release 7.9.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2009 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains a PCRE private debugging function for printing out the
42 internal form of a compiled regular expression, along with some supporting
43 local functions. This source file is used in two places:
44
45 (1) It is #included by pcre_compile.c when it is compiled in debugging mode
46 (DEBUG defined in pcre_internal.h). It is not included in production compiles.
47
48 (2) It is always #included by pcretest.c, which can be asked to print out a
49 compiled regex for debugging purposes. */
50
51
52 /* Macro that decides whether a character should be output as a literal or in
53 hexadecimal. We don't use isprint() because that can vary from system to system
54 (even without the use of locales) and we want the output always to be the same,
55 for testing purposes. This macro is used in pcretest as well as in this file. */
56
57 #ifdef EBCDIC
58 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
59 #else
60 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
61 #endif
62
63 /* The table of operator names. */
64
65 static const char *OP_names[] = { OP_NAME_LIST };
66
67
68
69 /*************************************************
70 * Print single- or multi-byte character *
71 *************************************************/
72
73 static int
74 print_char(FILE *f, uschar *ptr, BOOL utf8)
75 {
76 int c = *ptr;
77
78 #ifndef SUPPORT_UTF8
79 utf8 = utf8; /* Avoid compiler warning */
80 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
81 return 0;
82
83 #else
84 if (!utf8 || (c & 0xc0) != 0xc0)
85 {
86 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
87 return 0;
88 }
89 else
90 {
91 int i;
92 int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
93 int s = 6*a;
94 c = (c & _pcre_utf8_table3[a]) << s;
95 for (i = 1; i <= a; i++)
96 {
97 /* This is a check for malformed UTF-8; it should only occur if the sanity
98 check has been turned off. Rather than swallow random bytes, just stop if
99 we hit a bad one. Print it with \X instead of \x as an indication. */
100
101 if ((ptr[i] & 0xc0) != 0x80)
102 {
103 fprintf(f, "\\X{%x}", c);
104 return i - 1;
105 }
106
107 /* The byte is OK */
108
109 s -= 6;
110 c |= (ptr[i] & 0x3f) << s;
111 }
112 if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
113 return a;
114 }
115 #endif
116 }
117
118
119
120 /*************************************************
121 * Find Unicode property name *
122 *************************************************/
123
124 static const char *
125 get_ucpname(int ptype, int pvalue)
126 {
127 #ifdef SUPPORT_UCP
128 int i;
129 for (i = _pcre_utt_size - 1; i >= 0; i--)
130 {
131 if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
132 }
133 return (i >= 0)? _pcre_utt_names + _pcre_utt[i].name_offset : "??";
134 #else
135 /* It gets harder and harder to shut off unwanted compiler warnings. */
136 ptype = ptype * pvalue;
137 return (ptype == pvalue)? "??" : "??";
138 #endif
139 }
140
141
142
143 /*************************************************
144 * Print compiled regex *
145 *************************************************/
146
147 /* Make this function work for a regex with integers either byte order.
148 However, we assume that what we are passed is a compiled regex. The
149 print_lengths flag controls whether offsets and lengths of items are printed.
150 They can be turned off from pcretest so that automatic tests on bytecode can be
151 written that do not depend on the value of LINK_SIZE. */
152
153 static void
154 pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
155 {
156 real_pcre *re = (real_pcre *)external_re;
157 uschar *codestart, *code;
158 BOOL utf8;
159
160 unsigned int options = re->options;
161 int offset = re->name_table_offset;
162 int count = re->name_count;
163 int size = re->name_entry_size;
164
165 if (re->magic_number != MAGIC_NUMBER)
166 {
167 offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
168 count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
169 size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
170 options = ((options << 24) & 0xff000000) |
171 ((options << 8) & 0x00ff0000) |
172 ((options >> 8) & 0x0000ff00) |
173 ((options >> 24) & 0x000000ff);
174 }
175
176 code = codestart = (uschar *)re + offset + count * size;
177 utf8 = (options & PCRE_UTF8) != 0;
178
179 for(;;)
180 {
181 uschar *ccode;
182 int c;
183 int extra = 0;
184
185 if (print_lengths)
186 fprintf(f, "%3d ", (int)(code - codestart));
187 else
188 fprintf(f, " ");
189
190 switch(*code)
191 {
192 case OP_END:
193 fprintf(f, " %s\n", OP_names[*code]);
194 fprintf(f, "------------------------------------------------------------------\n");
195 return;
196
197 case OP_OPT:
198 fprintf(f, " %.2x %s", code[1], OP_names[*code]);
199 break;
200
201 case OP_CHAR:
202 fprintf(f, " ");
203 do
204 {
205 code++;
206 code += 1 + print_char(f, code, utf8);
207 }
208 while (*code == OP_CHAR);
209 fprintf(f, "\n");
210 continue;
211
212 case OP_CHARNC:
213 fprintf(f, " NC ");
214 do
215 {
216 code++;
217 code += 1 + print_char(f, code, utf8);
218 }
219 while (*code == OP_CHARNC);
220 fprintf(f, "\n");
221 continue;
222
223 case OP_CBRA:
224 case OP_SCBRA:
225 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
226 else fprintf(f, " ");
227 fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
228 break;
229
230 case OP_BRA:
231 case OP_SBRA:
232 case OP_KETRMAX:
233 case OP_KETRMIN:
234 case OP_ALT:
235 case OP_KET:
236 case OP_ASSERT:
237 case OP_ASSERT_NOT:
238 case OP_ASSERTBACK:
239 case OP_ASSERTBACK_NOT:
240 case OP_ONCE:
241 case OP_COND:
242 case OP_SCOND:
243 case OP_REVERSE:
244 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
245 else fprintf(f, " ");
246 fprintf(f, "%s", OP_names[*code]);
247 break;
248
249 case OP_CREF:
250 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
251 break;
252
253 case OP_RREF:
254 c = GET2(code, 1);
255 if (c == RREF_ANY)
256 fprintf(f, " Cond recurse any");
257 else
258 fprintf(f, " Cond recurse %d", c);
259 break;
260
261 case OP_DEF:
262 fprintf(f, " Cond def");
263 break;
264
265 case OP_STAR:
266 case OP_MINSTAR:
267 case OP_POSSTAR:
268 case OP_PLUS:
269 case OP_MINPLUS:
270 case OP_POSPLUS:
271 case OP_QUERY:
272 case OP_MINQUERY:
273 case OP_POSQUERY:
274 case OP_TYPESTAR:
275 case OP_TYPEMINSTAR:
276 case OP_TYPEPOSSTAR:
277 case OP_TYPEPLUS:
278 case OP_TYPEMINPLUS:
279 case OP_TYPEPOSPLUS:
280 case OP_TYPEQUERY:
281 case OP_TYPEMINQUERY:
282 case OP_TYPEPOSQUERY:
283 fprintf(f, " ");
284 if (*code >= OP_TYPESTAR)
285 {
286 fprintf(f, "%s", OP_names[code[1]]);
287 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
288 {
289 fprintf(f, " %s ", get_ucpname(code[2], code[3]));
290 extra = 2;
291 }
292 }
293 else extra = print_char(f, code+1, utf8);
294 fprintf(f, "%s", OP_names[*code]);
295 break;
296
297 case OP_EXACT:
298 case OP_UPTO:
299 case OP_MINUPTO:
300 case OP_POSUPTO:
301 fprintf(f, " ");
302 extra = print_char(f, code+3, utf8);
303 fprintf(f, "{");
304 if (*code != OP_EXACT) fprintf(f, "0,");
305 fprintf(f, "%d}", GET2(code,1));
306 if (*code == OP_MINUPTO) fprintf(f, "?");
307 else if (*code == OP_POSUPTO) fprintf(f, "+");
308 break;
309
310 case OP_TYPEEXACT:
311 case OP_TYPEUPTO:
312 case OP_TYPEMINUPTO:
313 case OP_TYPEPOSUPTO:
314 fprintf(f, " %s", OP_names[code[3]]);
315 if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
316 {
317 fprintf(f, " %s ", get_ucpname(code[4], code[5]));
318 extra = 2;
319 }
320 fprintf(f, "{");
321 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
322 fprintf(f, "%d}", GET2(code,1));
323 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
324 else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
325 break;
326
327 case OP_NOT:
328 c = code[1];
329 if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
330 else fprintf(f, " [^\\x%02x]", c);
331 break;
332
333 case OP_NOTSTAR:
334 case OP_NOTMINSTAR:
335 case OP_NOTPOSSTAR:
336 case OP_NOTPLUS:
337 case OP_NOTMINPLUS:
338 case OP_NOTPOSPLUS:
339 case OP_NOTQUERY:
340 case OP_NOTMINQUERY:
341 case OP_NOTPOSQUERY:
342 c = code[1];
343 if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
344 else fprintf(f, " [^\\x%02x]", c);
345 fprintf(f, "%s", OP_names[*code]);
346 break;
347
348 case OP_NOTEXACT:
349 case OP_NOTUPTO:
350 case OP_NOTMINUPTO:
351 case OP_NOTPOSUPTO:
352 c = code[3];
353 if (PRINTABLE(c)) fprintf(f, " [^%c]{", c);
354 else fprintf(f, " [^\\x%02x]{", c);
355 if (*code != OP_NOTEXACT) fprintf(f, "0,");
356 fprintf(f, "%d}", GET2(code,1));
357 if (*code == OP_NOTMINUPTO) fprintf(f, "?");
358 else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
359 break;
360
361 case OP_RECURSE:
362 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
363 else fprintf(f, " ");
364 fprintf(f, "%s", OP_names[*code]);
365 break;
366
367 case OP_REF:
368 fprintf(f, " \\%d", GET2(code,1));
369 ccode = code + _pcre_OP_lengths[*code];
370 goto CLASS_REF_REPEAT;
371
372 case OP_CALLOUT:
373 fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
374 GET(code, 2 + LINK_SIZE));
375 break;
376
377 case OP_PROP:
378 case OP_NOTPROP:
379 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
380 break;
381
382 /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
383 having this code always here, and it makes it less messy without all those
384 #ifdefs. */
385
386 case OP_CLASS:
387 case OP_NCLASS:
388 case OP_XCLASS:
389 {
390 int i, min, max;
391 BOOL printmap;
392
393 fprintf(f, " [");
394
395 if (*code == OP_XCLASS)
396 {
397 extra = GET(code, 1);
398 ccode = code + LINK_SIZE + 1;
399 printmap = (*ccode & XCL_MAP) != 0;
400 if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
401 }
402 else
403 {
404 printmap = TRUE;
405 ccode = code + 1;
406 }
407
408 /* Print a bit map */
409
410 if (printmap)
411 {
412 for (i = 0; i < 256; i++)
413 {
414 if ((ccode[i/8] & (1 << (i&7))) != 0)
415 {
416 int j;
417 for (j = i+1; j < 256; j++)
418 if ((ccode[j/8] & (1 << (j&7))) == 0) break;
419 if (i == '-' || i == ']') fprintf(f, "\\");
420 if (PRINTABLE(i)) fprintf(f, "%c", i);
421 else fprintf(f, "\\x%02x", i);
422 if (--j > i)
423 {
424 if (j != i + 1) fprintf(f, "-");
425 if (j == '-' || j == ']') fprintf(f, "\\");
426 if (PRINTABLE(j)) fprintf(f, "%c", j);
427 else fprintf(f, "\\x%02x", j);
428 }
429 i = j;
430 }
431 }
432 ccode += 32;
433 }
434
435 /* For an XCLASS there is always some additional data */
436
437 if (*code == OP_XCLASS)
438 {
439 int ch;
440 while ((ch = *ccode++) != XCL_END)
441 {
442 if (ch == XCL_PROP)
443 {
444 int ptype = *ccode++;
445 int pvalue = *ccode++;
446 fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
447 }
448 else if (ch == XCL_NOTPROP)
449 {
450 int ptype = *ccode++;
451 int pvalue = *ccode++;
452 fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
453 }
454 else
455 {
456 ccode += 1 + print_char(f, ccode, TRUE);
457 if (ch == XCL_RANGE)
458 {
459 fprintf(f, "-");
460 ccode += 1 + print_char(f, ccode, TRUE);
461 }
462 }
463 }
464 }
465
466 /* Indicate a non-UTF8 class which was created by negation */
467
468 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
469
470 /* Handle repeats after a class or a back reference */
471
472 CLASS_REF_REPEAT:
473 switch(*ccode)
474 {
475 case OP_CRSTAR:
476 case OP_CRMINSTAR:
477 case OP_CRPLUS:
478 case OP_CRMINPLUS:
479 case OP_CRQUERY:
480 case OP_CRMINQUERY:
481 fprintf(f, "%s", OP_names[*ccode]);
482 extra += _pcre_OP_lengths[*ccode];
483 break;
484
485 case OP_CRRANGE:
486 case OP_CRMINRANGE:
487 min = GET2(ccode,1);
488 max = GET2(ccode,3);
489 if (max == 0) fprintf(f, "{%d,}", min);
490 else fprintf(f, "{%d,%d}", min, max);
491 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
492 extra += _pcre_OP_lengths[*ccode];
493 break;
494
495 /* Do nothing if it's not a repeat; this code stops picky compilers
496 warning about the lack of a default code path. */
497
498 default:
499 break;
500 }
501 }
502 break;
503
504 /* Anything else is just an item with no data*/
505
506 default:
507 fprintf(f, " %s", OP_names[*code]);
508 break;
509 }
510
511 code += _pcre_OP_lengths[*code] + extra;
512 fprintf(f, "\n");
513 }
514 }
515
516 /* End of pcre_printint.src */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5