/[pcre]/code/trunk/pcre_printint.src
ViewVC logotype

Contents of /code/trunk/pcre_printint.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 91 - (show annotations)
Sat Feb 24 21:41:34 2007 UTC (8 years, 6 months ago) by nigel
File MIME type: application/x-wais-source
File size: 12800 byte(s)
Error occurred while calculating annotation data.
Load pcre-6.7 into code/trunk.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2005 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains a PCRE private debugging function for printing out the
42 internal form of a compiled regular expression, along with some supporting
43 local functions. This source file is used in two places:
44
45 (1) It is #included by pcre_compile.c when it is compiled in debugging mode
46 (DEBUG defined in pcre_internal.h). It is not included in production compiles.
47
48 (2) It is always #included by pcretest.c, which can be asked to print out a
49 compiled regex for debugging purposes. */
50
51
52 static const char *OP_names[] = { OP_NAME_LIST };
53
54
55 /*************************************************
56 * Print single- or multi-byte character *
57 *************************************************/
58
59 static int
60 print_char(FILE *f, uschar *ptr, BOOL utf8)
61 {
62 int c = *ptr;
63
64 if (!utf8 || (c & 0xc0) != 0xc0)
65 {
66 if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
67 return 0;
68 }
69 else
70 {
71 int i;
72 int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
73 int s = 6*a;
74 c = (c & _pcre_utf8_table3[a]) << s;
75 for (i = 1; i <= a; i++)
76 {
77 /* This is a check for malformed UTF-8; it should only occur if the sanity
78 check has been turned off. Rather than swallow random bytes, just stop if
79 we hit a bad one. Print it with \X instead of \x as an indication. */
80
81 if ((ptr[i] & 0xc0) != 0x80)
82 {
83 fprintf(f, "\\X{%x}", c);
84 return i - 1;
85 }
86
87 /* The byte is OK */
88
89 s -= 6;
90 c |= (ptr[i] & 0x3f) << s;
91 }
92 if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
93 return a;
94 }
95 }
96
97
98
99 /*************************************************
100 * Find Unicode property name *
101 *************************************************/
102
103 static const char *
104 get_ucpname(int ptype, int pvalue)
105 {
106 #ifdef SUPPORT_UCP
107 int i;
108 for (i = _pcre_utt_size; i >= 0; i--)
109 {
110 if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
111 }
112 return (i >= 0)? _pcre_utt[i].name : "??";
113 #else
114 /* It gets harder and harder to shut off unwanted compiler warnings. */
115 ptype = ptype * pvalue;
116 return (ptype == pvalue)? "??" : "??";
117 #endif
118 }
119
120
121
122 /*************************************************
123 * Print compiled regex *
124 *************************************************/
125
126 /* Make this function work for a regex with integers either byte order.
127 However, we assume that what we are passed is a compiled regex. */
128
129 static void
130 pcre_printint(pcre *external_re, FILE *f)
131 {
132 real_pcre *re = (real_pcre *)external_re;
133 uschar *codestart, *code;
134 BOOL utf8;
135
136 unsigned int options = re->options;
137 int offset = re->name_table_offset;
138 int count = re->name_count;
139 int size = re->name_entry_size;
140
141 if (re->magic_number != MAGIC_NUMBER)
142 {
143 offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
144 count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
145 size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
146 options = ((options << 24) & 0xff000000) |
147 ((options << 8) & 0x00ff0000) |
148 ((options >> 8) & 0x0000ff00) |
149 ((options >> 24) & 0x000000ff);
150 }
151
152 code = codestart = (uschar *)re + offset + count * size;
153 utf8 = (options & PCRE_UTF8) != 0;
154
155 for(;;)
156 {
157 uschar *ccode;
158 int c;
159 int extra = 0;
160
161 fprintf(f, "%3d ", (int)(code - codestart));
162
163 if (*code >= OP_BRA)
164 {
165 if (*code - OP_BRA > EXTRACT_BASIC_MAX)
166 fprintf(f, "%3d Bra extra\n", GET(code, 1));
167 else
168 fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
169 code += _pcre_OP_lengths[OP_BRA];
170 continue;
171 }
172
173 switch(*code)
174 {
175 case OP_END:
176 fprintf(f, " %s\n", OP_names[*code]);
177 fprintf(f, "------------------------------------------------------------------\n");
178 return;
179
180 case OP_OPT:
181 fprintf(f, " %.2x %s", code[1], OP_names[*code]);
182 break;
183
184 case OP_CHAR:
185 fprintf(f, " ");
186 do
187 {
188 code++;
189 code += 1 + print_char(f, code, utf8);
190 }
191 while (*code == OP_CHAR);
192 fprintf(f, "\n");
193 continue;
194
195 case OP_CHARNC:
196 fprintf(f, " NC ");
197 do
198 {
199 code++;
200 code += 1 + print_char(f, code, utf8);
201 }
202 while (*code == OP_CHARNC);
203 fprintf(f, "\n");
204 continue;
205
206 case OP_KETRMAX:
207 case OP_KETRMIN:
208 case OP_ALT:
209 case OP_KET:
210 case OP_ASSERT:
211 case OP_ASSERT_NOT:
212 case OP_ASSERTBACK:
213 case OP_ASSERTBACK_NOT:
214 case OP_ONCE:
215 case OP_COND:
216 case OP_REVERSE:
217 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
218 break;
219
220 case OP_BRANUMBER:
221 printf("%3d %s", GET2(code, 1), OP_names[*code]);
222 break;
223
224 case OP_CREF:
225 if (GET2(code, 1) == CREF_RECURSE)
226 fprintf(f, " Cond recurse");
227 else
228 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
229 break;
230
231 case OP_STAR:
232 case OP_MINSTAR:
233 case OP_PLUS:
234 case OP_MINPLUS:
235 case OP_QUERY:
236 case OP_MINQUERY:
237 case OP_TYPESTAR:
238 case OP_TYPEMINSTAR:
239 case OP_TYPEPLUS:
240 case OP_TYPEMINPLUS:
241 case OP_TYPEQUERY:
242 case OP_TYPEMINQUERY:
243 fprintf(f, " ");
244 if (*code >= OP_TYPESTAR)
245 {
246 fprintf(f, "%s", OP_names[code[1]]);
247 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
248 {
249 fprintf(f, " %s ", get_ucpname(code[2], code[3]));
250 extra = 2;
251 }
252 }
253 else extra = print_char(f, code+1, utf8);
254 fprintf(f, "%s", OP_names[*code]);
255 break;
256
257 case OP_EXACT:
258 case OP_UPTO:
259 case OP_MINUPTO:
260 fprintf(f, " ");
261 extra = print_char(f, code+3, utf8);
262 fprintf(f, "{");
263 if (*code != OP_EXACT) fprintf(f, ",");
264 fprintf(f, "%d}", GET2(code,1));
265 if (*code == OP_MINUPTO) fprintf(f, "?");
266 break;
267
268 case OP_TYPEEXACT:
269 case OP_TYPEUPTO:
270 case OP_TYPEMINUPTO:
271 fprintf(f, " %s", OP_names[code[3]]);
272 if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
273 {
274 fprintf(f, " %s ", get_ucpname(code[4], code[5]));
275 extra = 2;
276 }
277 fprintf(f, "{");
278 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
279 fprintf(f, "%d}", GET2(code,1));
280 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
281 break;
282
283 case OP_NOT:
284 if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
285 else fprintf(f, " [^\\x%02x]", c);
286 break;
287
288 case OP_NOTSTAR:
289 case OP_NOTMINSTAR:
290 case OP_NOTPLUS:
291 case OP_NOTMINPLUS:
292 case OP_NOTQUERY:
293 case OP_NOTMINQUERY:
294 if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
295 else fprintf(f, " [^\\x%02x]", c);
296 fprintf(f, "%s", OP_names[*code]);
297 break;
298
299 case OP_NOTEXACT:
300 case OP_NOTUPTO:
301 case OP_NOTMINUPTO:
302 if (isprint(c = code[3])) fprintf(f, " [^%c]{", c);
303 else fprintf(f, " [^\\x%02x]{", c);
304 if (*code != OP_NOTEXACT) fprintf(f, "0,");
305 fprintf(f, "%d}", GET2(code,1));
306 if (*code == OP_NOTMINUPTO) fprintf(f, "?");
307 break;
308
309 case OP_RECURSE:
310 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
311 break;
312
313 case OP_REF:
314 fprintf(f, " \\%d", GET2(code,1));
315 ccode = code + _pcre_OP_lengths[*code];
316 goto CLASS_REF_REPEAT;
317
318 case OP_CALLOUT:
319 fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
320 GET(code, 2 + LINK_SIZE));
321 break;
322
323 case OP_PROP:
324 case OP_NOTPROP:
325 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
326 break;
327
328 /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
329 having this code always here, and it makes it less messy without all those
330 #ifdefs. */
331
332 case OP_CLASS:
333 case OP_NCLASS:
334 case OP_XCLASS:
335 {
336 int i, min, max;
337 BOOL printmap;
338
339 fprintf(f, " [");
340
341 if (*code == OP_XCLASS)
342 {
343 extra = GET(code, 1);
344 ccode = code + LINK_SIZE + 1;
345 printmap = (*ccode & XCL_MAP) != 0;
346 if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
347 }
348 else
349 {
350 printmap = TRUE;
351 ccode = code + 1;
352 }
353
354 /* Print a bit map */
355
356 if (printmap)
357 {
358 for (i = 0; i < 256; i++)
359 {
360 if ((ccode[i/8] & (1 << (i&7))) != 0)
361 {
362 int j;
363 for (j = i+1; j < 256; j++)
364 if ((ccode[j/8] & (1 << (j&7))) == 0) break;
365 if (i == '-' || i == ']') fprintf(f, "\\");
366 if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
367 if (--j > i)
368 {
369 if (j != i + 1) fprintf(f, "-");
370 if (j == '-' || j == ']') fprintf(f, "\\");
371 if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
372 }
373 i = j;
374 }
375 }
376 ccode += 32;
377 }
378
379 /* For an XCLASS there is always some additional data */
380
381 if (*code == OP_XCLASS)
382 {
383 int ch;
384 while ((ch = *ccode++) != XCL_END)
385 {
386 if (ch == XCL_PROP)
387 {
388 int ptype = *ccode++;
389 int pvalue = *ccode++;
390 fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
391 }
392 else if (ch == XCL_NOTPROP)
393 {
394 int ptype = *ccode++;
395 int pvalue = *ccode++;
396 fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
397 }
398 else
399 {
400 ccode += 1 + print_char(f, ccode, TRUE);
401 if (ch == XCL_RANGE)
402 {
403 fprintf(f, "-");
404 ccode += 1 + print_char(f, ccode, TRUE);
405 }
406 }
407 }
408 }
409
410 /* Indicate a non-UTF8 class which was created by negation */
411
412 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
413
414 /* Handle repeats after a class or a back reference */
415
416 CLASS_REF_REPEAT:
417 switch(*ccode)
418 {
419 case OP_CRSTAR:
420 case OP_CRMINSTAR:
421 case OP_CRPLUS:
422 case OP_CRMINPLUS:
423 case OP_CRQUERY:
424 case OP_CRMINQUERY:
425 fprintf(f, "%s", OP_names[*ccode]);
426 extra += _pcre_OP_lengths[*ccode];
427 break;
428
429 case OP_CRRANGE:
430 case OP_CRMINRANGE:
431 min = GET2(ccode,1);
432 max = GET2(ccode,3);
433 if (max == 0) fprintf(f, "{%d,}", min);
434 else fprintf(f, "{%d,%d}", min, max);
435 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
436 extra += _pcre_OP_lengths[*ccode];
437 break;
438
439 /* Do nothing if it's not a repeat; this code stops picky compilers
440 warning about the lack of a default code path. */
441
442 default:
443 break;
444 }
445 }
446 break;
447
448 /* Anything else is just an item with no data*/
449
450 default:
451 fprintf(f, " %s", OP_names[*code]);
452 break;
453 }
454
455 code += _pcre_OP_lengths[*code] + extra;
456 fprintf(f, "\n");
457 }
458 }
459
460 /* End of pcre_printint.src */

  ViewVC Help
Powered by ViewVC 1.1.5