/[pcre]/code/branches/pcre16/pcre_printint.src
ViewVC logotype

Contents of /code/branches/pcre16/pcre_printint.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 782 - (show annotations)
Sat Dec 3 23:58:37 2011 UTC (8 years, 4 months ago) by zherczeg
File MIME type: application/x-wais-source
File size: 18493 byte(s)
Error occurred while calculating annotation data.
Start working on UTF-16. Updating macros and adding new ones.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2010 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains a PCRE private debugging function for printing out the
42 internal form of a compiled regular expression, along with some supporting
43 local functions. This source file is used in two places:
44
45 (1) It is #included by pcre_compile.c when it is compiled in debugging mode
46 (PCRE_DEBUG defined in pcre_internal.h). It is not included in production
47 compiles.
48
49 (2) It is always #included by pcretest.c, which can be asked to print out a
50 compiled regex for debugging purposes. */
51
52
53 /* Macro that decides whether a character should be output as a literal or in
54 hexadecimal. We don't use isprint() because that can vary from system to system
55 (even without the use of locales) and we want the output always to be the same,
56 for testing purposes. This macro is used in pcretest as well as in this file. */
57
58 #ifdef EBCDIC
59 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
60 #else
61 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
62 #endif
63
64 /* The table of operator names. */
65
66 static const char *OP_names[] = { OP_NAME_LIST };
67
68
69
70 /*************************************************
71 * Print single- or multi-byte character *
72 *************************************************/
73
74 static int
75 print_char(FILE *f, pcre_uchar *ptr, BOOL utf)
76 {
77 int c = *ptr;
78
79 #ifndef SUPPORT_UTF
80 (void)utf; /* Avoid compiler warning */
81 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
82 return 0;
83
84 #else
85
86 #ifdef COMPILE_PCRE8
87
88 if (!utf || (c & 0xc0) != 0xc0)
89 {
90 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
91 return 0;
92 }
93 else
94 {
95 int i;
96 int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */
97 int s = 6*a;
98 c = (c & PRIV(utf8_table3)[a]) << s;
99 for (i = 1; i <= a; i++)
100 {
101 /* This is a check for malformed UTF-8; it should only occur if the sanity
102 check has been turned off. Rather than swallow random bytes, just stop if
103 we hit a bad one. Print it with \X instead of \x as an indication. */
104
105 if ((ptr[i] & 0xc0) != 0x80)
106 {
107 fprintf(f, "\\X{%x}", c);
108 return i - 1;
109 }
110
111 /* The byte is OK */
112
113 s -= 6;
114 c |= (ptr[i] & 0x3f) << s;
115 }
116 fprintf(f, "\\x{%x}", c);
117 return a;
118 }
119
120 #else
121
122 #ifdef COMPILE_PCRE16
123
124 if (!utf || (c & 0xfc00) != 0xd800)
125 {
126 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
127 return 0;
128 }
129 else
130 {
131 /* This is a check for malformed UTF-16; it should only occur if the sanity
132 check has been turned off. Rather than swallow a low surrogate, just stop if
133 we hit a bad one. Print it with \X instead of \x as an indication. */
134
135 if ((ptr[1] & 0xfc00) != 0xdc00)
136 {
137 fprintf(f, "\\X{%x}", c);
138 return 0;
139 }
140
141 c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
142 fprintf(f, "\\x{%x}", c);
143 return 1;
144 }
145
146 #endif /* COMPILE_PCRE16 */
147
148 #endif /* COMPILE_PCRE8 */
149
150 #endif /* SUPPORT_UTF */
151 }
152
153 /*************************************************
154 * Print uchar string (regardless of utf) *
155 *************************************************/
156
157 static void
158 print_puchar(FILE *f, PCRE_PUCHAR ptr)
159 {
160 while (*ptr != '\0')
161 {
162 register int c = *ptr++;
163 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
164 }
165 }
166
167 /*************************************************
168 * Find Unicode property name *
169 *************************************************/
170
171 static const char *
172 get_ucpname(int ptype, int pvalue)
173 {
174 #ifdef SUPPORT_UCP
175 int i;
176 for (i = PRIV(utt_size) - 1; i >= 0; i--)
177 {
178 if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break;
179 }
180 return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??";
181 #else
182 /* It gets harder and harder to shut off unwanted compiler warnings. */
183 ptype = ptype * pvalue;
184 return (ptype == pvalue)? "??" : "??";
185 #endif
186 }
187
188
189
190 /*************************************************
191 * Print compiled regex *
192 *************************************************/
193
194 /* Make this function work for a regex with integers either byte order.
195 However, we assume that what we are passed is a compiled regex. The
196 print_lengths flag controls whether offsets and lengths of items are printed.
197 They can be turned off from pcretest so that automatic tests on bytecode can be
198 written that do not depend on the value of LINK_SIZE. */
199
200 static void
201 pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
202 {
203 real_pcre *re = (real_pcre *)external_re;
204 pcre_uchar *codestart, *code;
205 BOOL utf;
206
207 unsigned int options = re->options;
208 int offset = re->name_table_offset;
209 int count = re->name_count;
210 int size = re->name_entry_size;
211
212 if (re->magic_number != MAGIC_NUMBER)
213 {
214 offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
215 count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
216 size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
217 options = ((options << 24) & 0xff000000) |
218 ((options << 8) & 0x00ff0000) |
219 ((options >> 8) & 0x0000ff00) |
220 ((options >> 24) & 0x000000ff);
221 }
222
223 code = codestart = (pcre_uchar *)re + offset + count * size;
224 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
225 utf = (options & PCRE_UTF8) != 0;
226
227 for(;;)
228 {
229 pcre_uchar *ccode;
230 const char *flag = " ";
231 int c;
232 int extra = 0;
233
234 if (print_lengths)
235 fprintf(f, "%3d ", (int)(code - codestart));
236 else
237 fprintf(f, " ");
238
239 switch(*code)
240 {
241 /* ========================================================================== */
242 /* These cases are never obeyed. This is a fudge that causes a compile-
243 time error if the vectors OP_names or PRIV(OP_lengths), which are indexed
244 by opcode, are not the correct length. It seems to be the only way to do
245 such a check at compile time, as the sizeof() operator does not work in
246 the C preprocessor. We do this while compiling pcretest, because that
247 #includes pcre_tables.c, which holds PRIV(OP_lengths). We can't do this
248 when building pcre_compile.c with PCRE_DEBUG set, because it doesn't then
249 know the size of PRIV(OP_lengths). */
250
251 #ifdef COMPILING_PCRETEST
252 case OP_TABLE_LENGTH:
253 case OP_TABLE_LENGTH +
254 ((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
255 (sizeof(PRIV(OP_lengths)) == OP_TABLE_LENGTH)):
256 break;
257 #endif
258 /* ========================================================================== */
259
260 case OP_END:
261 fprintf(f, " %s\n", OP_names[*code]);
262 fprintf(f, "------------------------------------------------------------------\n");
263 return;
264
265 case OP_CHAR:
266 fprintf(f, " ");
267 do
268 {
269 code++;
270 code += 1 + print_char(f, code, utf);
271 }
272 while (*code == OP_CHAR);
273 fprintf(f, "\n");
274 continue;
275
276 case OP_CHARI:
277 fprintf(f, " /i ");
278 do
279 {
280 code++;
281 code += 1 + print_char(f, code, utf);
282 }
283 while (*code == OP_CHARI);
284 fprintf(f, "\n");
285 continue;
286
287 case OP_CBRA:
288 case OP_CBRAPOS:
289 case OP_SCBRA:
290 case OP_SCBRAPOS:
291 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
292 else fprintf(f, " ");
293 fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
294 break;
295
296 case OP_BRA:
297 case OP_BRAPOS:
298 case OP_SBRA:
299 case OP_SBRAPOS:
300 case OP_KETRMAX:
301 case OP_KETRMIN:
302 case OP_KETRPOS:
303 case OP_ALT:
304 case OP_KET:
305 case OP_ASSERT:
306 case OP_ASSERT_NOT:
307 case OP_ASSERTBACK:
308 case OP_ASSERTBACK_NOT:
309 case OP_ONCE:
310 case OP_ONCE_NC:
311 case OP_COND:
312 case OP_SCOND:
313 case OP_REVERSE:
314 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
315 else fprintf(f, " ");
316 fprintf(f, "%s", OP_names[*code]);
317 break;
318
319 case OP_CLOSE:
320 fprintf(f, " %s %d", OP_names[*code], GET2(code, 1));
321 break;
322
323 case OP_CREF:
324 case OP_NCREF:
325 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
326 break;
327
328 case OP_RREF:
329 c = GET2(code, 1);
330 if (c == RREF_ANY)
331 fprintf(f, " Cond recurse any");
332 else
333 fprintf(f, " Cond recurse %d", c);
334 break;
335
336 case OP_NRREF:
337 c = GET2(code, 1);
338 if (c == RREF_ANY)
339 fprintf(f, " Cond nrecurse any");
340 else
341 fprintf(f, " Cond nrecurse %d", c);
342 break;
343
344 case OP_DEF:
345 fprintf(f, " Cond def");
346 break;
347
348 case OP_STARI:
349 case OP_MINSTARI:
350 case OP_POSSTARI:
351 case OP_PLUSI:
352 case OP_MINPLUSI:
353 case OP_POSPLUSI:
354 case OP_QUERYI:
355 case OP_MINQUERYI:
356 case OP_POSQUERYI:
357 flag = "/i";
358 /* Fall through */
359 case OP_STAR:
360 case OP_MINSTAR:
361 case OP_POSSTAR:
362 case OP_PLUS:
363 case OP_MINPLUS:
364 case OP_POSPLUS:
365 case OP_QUERY:
366 case OP_MINQUERY:
367 case OP_POSQUERY:
368 case OP_TYPESTAR:
369 case OP_TYPEMINSTAR:
370 case OP_TYPEPOSSTAR:
371 case OP_TYPEPLUS:
372 case OP_TYPEMINPLUS:
373 case OP_TYPEPOSPLUS:
374 case OP_TYPEQUERY:
375 case OP_TYPEMINQUERY:
376 case OP_TYPEPOSQUERY:
377 fprintf(f, " %s ", flag);
378 if (*code >= OP_TYPESTAR)
379 {
380 fprintf(f, "%s", OP_names[code[1]]);
381 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
382 {
383 fprintf(f, " %s ", get_ucpname(code[2], code[3]));
384 extra = 2;
385 }
386 }
387 else extra = print_char(f, code+1, utf);
388 fprintf(f, "%s", OP_names[*code]);
389 break;
390
391 case OP_EXACTI:
392 case OP_UPTOI:
393 case OP_MINUPTOI:
394 case OP_POSUPTOI:
395 flag = "/i";
396 /* Fall through */
397 case OP_EXACT:
398 case OP_UPTO:
399 case OP_MINUPTO:
400 case OP_POSUPTO:
401 fprintf(f, " %s ", flag);
402 extra = print_char(f, code + 1 + IMM2_SIZE, utf);
403 fprintf(f, "{");
404 if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,");
405 fprintf(f, "%d}", GET2(code,1));
406 if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?");
407 else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+");
408 break;
409
410 case OP_TYPEEXACT:
411 case OP_TYPEUPTO:
412 case OP_TYPEMINUPTO:
413 case OP_TYPEPOSUPTO:
414 fprintf(f, " %s", OP_names[code[1 + IMM2_SIZE]]);
415 if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
416 {
417 fprintf(f, " %s ", get_ucpname(code[1 + IMM2_SIZE + 1],
418 code[1 + IMM2_SIZE + 2]));
419 extra = 2;
420 }
421 fprintf(f, "{");
422 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
423 fprintf(f, "%d}", GET2(code,1));
424 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
425 else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
426 break;
427
428 case OP_NOTI:
429 flag = "/i";
430 /* Fall through */
431 case OP_NOT:
432 c = code[1];
433 if (PRINTABLE(c)) fprintf(f, " %s [^%c]", flag, c);
434 else fprintf(f, " %s [^\\x%02x]", flag, c);
435 break;
436
437 case OP_NOTSTARI:
438 case OP_NOTMINSTARI:
439 case OP_NOTPOSSTARI:
440 case OP_NOTPLUSI:
441 case OP_NOTMINPLUSI:
442 case OP_NOTPOSPLUSI:
443 case OP_NOTQUERYI:
444 case OP_NOTMINQUERYI:
445 case OP_NOTPOSQUERYI:
446 flag = "/i";
447 /* Fall through */
448
449 case OP_NOTSTAR:
450 case OP_NOTMINSTAR:
451 case OP_NOTPOSSTAR:
452 case OP_NOTPLUS:
453 case OP_NOTMINPLUS:
454 case OP_NOTPOSPLUS:
455 case OP_NOTQUERY:
456 case OP_NOTMINQUERY:
457 case OP_NOTPOSQUERY:
458 c = code[1];
459 if (PRINTABLE(c)) fprintf(f, " %s [^%c]", flag, c);
460 else fprintf(f, " %s [^\\x%02x]", flag, c);
461 fprintf(f, "%s", OP_names[*code]);
462 break;
463
464 case OP_NOTEXACTI:
465 case OP_NOTUPTOI:
466 case OP_NOTMINUPTOI:
467 case OP_NOTPOSUPTOI:
468 flag = "/i";
469 /* Fall through */
470
471 case OP_NOTEXACT:
472 case OP_NOTUPTO:
473 case OP_NOTMINUPTO:
474 case OP_NOTPOSUPTO:
475 c = code[1 + IMM2_SIZE];
476 if (PRINTABLE(c)) fprintf(f, " %s [^%c]{", flag, c);
477 else fprintf(f, " %s [^\\x%02x]{", flag, c);
478 if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
479 fprintf(f, "%d}", GET2(code,1));
480 if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
481 else
482 if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+");
483 break;
484
485 case OP_RECURSE:
486 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
487 else fprintf(f, " ");
488 fprintf(f, "%s", OP_names[*code]);
489 break;
490
491 case OP_REFI:
492 flag = "/i";
493 /* Fall through */
494 case OP_REF:
495 fprintf(f, " %s \\%d", flag, GET2(code,1));
496 ccode = code + PRIV(OP_lengths)[*code];
497 goto CLASS_REF_REPEAT;
498
499 case OP_CALLOUT:
500 fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
501 GET(code, 2 + LINK_SIZE));
502 break;
503
504 case OP_PROP:
505 case OP_NOTPROP:
506 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
507 break;
508
509 /* OP_XCLASS can only occur in UTF or PCRE16 modes. However, there's no
510 harm in having this code always here, and it makes it less messy without
511 all those #ifdefs. */
512
513 case OP_CLASS:
514 case OP_NCLASS:
515 case OP_XCLASS:
516 {
517 int i, min, max;
518 BOOL printmap;
519 pcre_uint8 *map;
520
521 fprintf(f, " [");
522
523 if (*code == OP_XCLASS)
524 {
525 extra = GET(code, 1);
526 ccode = code + LINK_SIZE + 1;
527 printmap = (*ccode & XCL_MAP) != 0;
528 if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
529 }
530 else
531 {
532 printmap = TRUE;
533 ccode = code + 1;
534 }
535
536 /* Print a bit map */
537
538 if (printmap)
539 {
540 map = (pcre_uint8 *)ccode;
541 for (i = 0; i < 256; i++)
542 {
543 if ((map[i/8] & (1 << (i&7))) != 0)
544 {
545 int j;
546 for (j = i+1; j < 256; j++)
547 if ((map[j/8] & (1 << (j&7))) == 0) break;
548 if (i == '-' || i == ']') fprintf(f, "\\");
549 if (PRINTABLE(i)) fprintf(f, "%c", i);
550 else fprintf(f, "\\x%02x", i);
551 if (--j > i)
552 {
553 if (j != i + 1) fprintf(f, "-");
554 if (j == '-' || j == ']') fprintf(f, "\\");
555 if (PRINTABLE(j)) fprintf(f, "%c", j);
556 else fprintf(f, "\\x%02x", j);
557 }
558 i = j;
559 }
560 }
561 ccode += 32 / sizeof(pcre_uchar);
562 }
563
564 /* For an XCLASS there is always some additional data */
565
566 if (*code == OP_XCLASS)
567 {
568 int ch;
569 while ((ch = *ccode++) != XCL_END)
570 {
571 if (ch == XCL_PROP)
572 {
573 int ptype = *ccode++;
574 int pvalue = *ccode++;
575 fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
576 }
577 else if (ch == XCL_NOTPROP)
578 {
579 int ptype = *ccode++;
580 int pvalue = *ccode++;
581 fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
582 }
583 else
584 {
585 ccode += 1 + print_char(f, ccode, TRUE);
586 if (ch == XCL_RANGE)
587 {
588 fprintf(f, "-");
589 ccode += 1 + print_char(f, ccode, TRUE);
590 }
591 }
592 }
593 }
594
595 /* Indicate a non-UTF class which was created by negation */
596
597 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
598
599 /* Handle repeats after a class or a back reference */
600
601 CLASS_REF_REPEAT:
602 switch(*ccode)
603 {
604 case OP_CRSTAR:
605 case OP_CRMINSTAR:
606 case OP_CRPLUS:
607 case OP_CRMINPLUS:
608 case OP_CRQUERY:
609 case OP_CRMINQUERY:
610 fprintf(f, "%s", OP_names[*ccode]);
611 extra += PRIV(OP_lengths)[*ccode];
612 break;
613
614 case OP_CRRANGE:
615 case OP_CRMINRANGE:
616 min = GET2(ccode,1);
617 max = GET2(ccode,1 + IMM2_SIZE);
618 if (max == 0) fprintf(f, "{%d,}", min);
619 else fprintf(f, "{%d,%d}", min, max);
620 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
621 extra += PRIV(OP_lengths)[*ccode];
622 break;
623
624 /* Do nothing if it's not a repeat; this code stops picky compilers
625 warning about the lack of a default code path. */
626
627 default:
628 break;
629 }
630 }
631 break;
632
633 case OP_MARK:
634 case OP_PRUNE_ARG:
635 case OP_SKIP_ARG:
636 case OP_THEN_ARG:
637 fprintf(f, " %s ", OP_names[*code]);
638 print_puchar(f, code + 2);
639 extra += code[1];
640 break;
641
642 case OP_THEN:
643 fprintf(f, " %s", OP_names[*code]);
644 break;
645
646 case OP_CIRCM:
647 case OP_DOLLM:
648 flag = "/m";
649 /* Fall through */
650
651 /* Anything else is just an item with no data, but possibly a flag. */
652
653 default:
654 fprintf(f, " %s %s", flag, OP_names[*code]);
655 break;
656 }
657
658 code += PRIV(OP_lengths)[*code] + extra;
659 fprintf(f, "\n");
660 }
661 }
662
663 /* End of pcre_printint.src */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5