49 |
compiled regex for debugging purposes. */ |
compiled regex for debugging purposes. */ |
50 |
|
|
51 |
|
|
52 |
|
/* Macro that decides whether a character should be output as a literal or in |
53 |
|
hexadecimal. We don't use isprint() because that can vary from system to system |
54 |
|
(even without the use of locales) and we want the output always to be the same, |
55 |
|
for testing purposes. This macro is used in pcretest as well as in this file. */ |
56 |
|
|
57 |
|
#define PRINTABLE(c) ((c) >= 32 && (c) < 127) |
58 |
|
|
59 |
|
/* The table of operator names. */ |
60 |
|
|
61 |
static const char *OP_names[] = { OP_NAME_LIST }; |
static const char *OP_names[] = { OP_NAME_LIST }; |
62 |
|
|
63 |
|
|
64 |
|
|
65 |
/************************************************* |
/************************************************* |
66 |
* Print single- or multi-byte character * |
* Print single- or multi-byte character * |
67 |
*************************************************/ |
*************************************************/ |
71 |
{ |
{ |
72 |
int c = *ptr; |
int c = *ptr; |
73 |
|
|
74 |
|
#ifndef SUPPORT_UTF8 |
75 |
|
utf8 = utf8; /* Avoid compiler warning */ |
76 |
|
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c); |
77 |
|
return 0; |
78 |
|
|
79 |
|
#else |
80 |
if (!utf8 || (c & 0xc0) != 0xc0) |
if (!utf8 || (c & 0xc0) != 0xc0) |
81 |
{ |
{ |
82 |
if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c); |
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c); |
83 |
return 0; |
return 0; |
84 |
} |
} |
85 |
else |
else |
108 |
if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c); |
if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c); |
109 |
return a; |
return a; |
110 |
} |
} |
111 |
|
#endif |
112 |
} |
} |
113 |
|
|
114 |
|
|
118 |
*************************************************/ |
*************************************************/ |
119 |
|
|
120 |
static const char * |
static const char * |
121 |
get_ucpname(int property) |
get_ucpname(int ptype, int pvalue) |
122 |
{ |
{ |
123 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
124 |
int i; |
int i; |
125 |
for (i = _pcre_utt_size; i >= 0; i--) |
for (i = _pcre_utt_size; i >= 0; i--) |
126 |
{ |
{ |
127 |
if (property == _pcre_utt[i].value) break; |
if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break; |
128 |
} |
} |
129 |
return (i >= 0)? _pcre_utt[i].name : "??"; |
return (i >= 0)? _pcre_utt[i].name : "??"; |
130 |
#else |
#else |
131 |
return "??"; |
/* It gets harder and harder to shut off unwanted compiler warnings. */ |
132 |
|
ptype = ptype * pvalue; |
133 |
|
return (ptype == pvalue)? "??" : "??"; |
134 |
#endif |
#endif |
135 |
} |
} |
136 |
|
|
177 |
|
|
178 |
fprintf(f, "%3d ", (int)(code - codestart)); |
fprintf(f, "%3d ", (int)(code - codestart)); |
179 |
|
|
|
if (*code >= OP_BRA) |
|
|
{ |
|
|
if (*code - OP_BRA > EXTRACT_BASIC_MAX) |
|
|
fprintf(f, "%3d Bra extra\n", GET(code, 1)); |
|
|
else |
|
|
fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA); |
|
|
code += _pcre_OP_lengths[OP_BRA]; |
|
|
continue; |
|
|
} |
|
|
|
|
180 |
switch(*code) |
switch(*code) |
181 |
{ |
{ |
182 |
case OP_END: |
case OP_END: |
189 |
break; |
break; |
190 |
|
|
191 |
case OP_CHAR: |
case OP_CHAR: |
192 |
|
fprintf(f, " "); |
193 |
|
do |
194 |
{ |
{ |
195 |
fprintf(f, " "); |
code++; |
196 |
do |
code += 1 + print_char(f, code, utf8); |
|
{ |
|
|
code++; |
|
|
code += 1 + print_char(f, code, utf8); |
|
|
} |
|
|
while (*code == OP_CHAR); |
|
|
fprintf(f, "\n"); |
|
|
continue; |
|
197 |
} |
} |
198 |
break; |
while (*code == OP_CHAR); |
199 |
|
fprintf(f, "\n"); |
200 |
|
continue; |
201 |
|
|
202 |
case OP_CHARNC: |
case OP_CHARNC: |
203 |
|
fprintf(f, " NC "); |
204 |
|
do |
205 |
{ |
{ |
206 |
fprintf(f, " NC "); |
code++; |
207 |
do |
code += 1 + print_char(f, code, utf8); |
|
{ |
|
|
code++; |
|
|
code += 1 + print_char(f, code, utf8); |
|
|
} |
|
|
while (*code == OP_CHARNC); |
|
|
fprintf(f, "\n"); |
|
|
continue; |
|
208 |
} |
} |
209 |
|
while (*code == OP_CHARNC); |
210 |
|
fprintf(f, "\n"); |
211 |
|
continue; |
212 |
|
|
213 |
|
case OP_CBRA: |
214 |
|
case OP_SCBRA: |
215 |
|
fprintf(f, "%3d %s %d", GET(code, 1), OP_names[*code], |
216 |
|
GET2(code, 1+LINK_SIZE)); |
217 |
break; |
break; |
218 |
|
|
219 |
|
case OP_BRA: |
220 |
|
case OP_SBRA: |
221 |
case OP_KETRMAX: |
case OP_KETRMAX: |
222 |
case OP_KETRMIN: |
case OP_KETRMIN: |
223 |
case OP_ALT: |
case OP_ALT: |
228 |
case OP_ASSERTBACK_NOT: |
case OP_ASSERTBACK_NOT: |
229 |
case OP_ONCE: |
case OP_ONCE: |
230 |
case OP_COND: |
case OP_COND: |
231 |
|
case OP_SCOND: |
232 |
case OP_REVERSE: |
case OP_REVERSE: |
233 |
fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]); |
fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]); |
234 |
break; |
break; |
235 |
|
|
236 |
case OP_BRANUMBER: |
case OP_CREF: |
237 |
printf("%3d %s", GET2(code, 1), OP_names[*code]); |
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]); |
238 |
break; |
break; |
239 |
|
|
240 |
case OP_CREF: |
case OP_RREF: |
241 |
if (GET2(code, 1) == CREF_RECURSE) |
c = GET2(code, 1); |
242 |
fprintf(f, " Cond recurse"); |
if (c == RREF_ANY) |
243 |
|
fprintf(f, " Cond recurse any"); |
244 |
else |
else |
245 |
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]); |
fprintf(f, " Cond recurse %d", c); |
246 |
|
break; |
247 |
|
|
248 |
|
case OP_DEF: |
249 |
|
fprintf(f, " Cond def"); |
250 |
break; |
break; |
251 |
|
|
252 |
case OP_STAR: |
case OP_STAR: |
253 |
case OP_MINSTAR: |
case OP_MINSTAR: |
254 |
|
case OP_POSSTAR: |
255 |
case OP_PLUS: |
case OP_PLUS: |
256 |
case OP_MINPLUS: |
case OP_MINPLUS: |
257 |
|
case OP_POSPLUS: |
258 |
case OP_QUERY: |
case OP_QUERY: |
259 |
case OP_MINQUERY: |
case OP_MINQUERY: |
260 |
|
case OP_POSQUERY: |
261 |
case OP_TYPESTAR: |
case OP_TYPESTAR: |
262 |
case OP_TYPEMINSTAR: |
case OP_TYPEMINSTAR: |
263 |
|
case OP_TYPEPOSSTAR: |
264 |
case OP_TYPEPLUS: |
case OP_TYPEPLUS: |
265 |
case OP_TYPEMINPLUS: |
case OP_TYPEMINPLUS: |
266 |
|
case OP_TYPEPOSPLUS: |
267 |
case OP_TYPEQUERY: |
case OP_TYPEQUERY: |
268 |
case OP_TYPEMINQUERY: |
case OP_TYPEMINQUERY: |
269 |
|
case OP_TYPEPOSQUERY: |
270 |
fprintf(f, " "); |
fprintf(f, " "); |
271 |
if (*code >= OP_TYPESTAR) |
if (*code >= OP_TYPESTAR) |
272 |
{ |
{ |
273 |
fprintf(f, "%s", OP_names[code[1]]); |
fprintf(f, "%s", OP_names[code[1]]); |
274 |
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) |
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) |
275 |
{ |
{ |
276 |
fprintf(f, " %s ", get_ucpname(code[2])); |
fprintf(f, " %s ", get_ucpname(code[2], code[3])); |
277 |
extra = 1; |
extra = 2; |
278 |
} |
} |
279 |
} |
} |
280 |
else extra = print_char(f, code+1, utf8); |
else extra = print_char(f, code+1, utf8); |
284 |
case OP_EXACT: |
case OP_EXACT: |
285 |
case OP_UPTO: |
case OP_UPTO: |
286 |
case OP_MINUPTO: |
case OP_MINUPTO: |
287 |
|
case OP_POSUPTO: |
288 |
fprintf(f, " "); |
fprintf(f, " "); |
289 |
extra = print_char(f, code+3, utf8); |
extra = print_char(f, code+3, utf8); |
290 |
fprintf(f, "{"); |
fprintf(f, "{"); |
291 |
if (*code != OP_EXACT) fprintf(f, ","); |
if (*code != OP_EXACT) fprintf(f, "0,"); |
292 |
fprintf(f, "%d}", GET2(code,1)); |
fprintf(f, "%d}", GET2(code,1)); |
293 |
if (*code == OP_MINUPTO) fprintf(f, "?"); |
if (*code == OP_MINUPTO) fprintf(f, "?"); |
294 |
|
else if (*code == OP_POSUPTO) fprintf(f, "+"); |
295 |
break; |
break; |
296 |
|
|
297 |
case OP_TYPEEXACT: |
case OP_TYPEEXACT: |
298 |
case OP_TYPEUPTO: |
case OP_TYPEUPTO: |
299 |
case OP_TYPEMINUPTO: |
case OP_TYPEMINUPTO: |
300 |
|
case OP_TYPEPOSUPTO: |
301 |
fprintf(f, " %s", OP_names[code[3]]); |
fprintf(f, " %s", OP_names[code[3]]); |
302 |
if (code[3] == OP_PROP || code[3] == OP_NOTPROP) |
if (code[3] == OP_PROP || code[3] == OP_NOTPROP) |
303 |
{ |
{ |
304 |
fprintf(f, " %s ", get_ucpname(code[4])); |
fprintf(f, " %s ", get_ucpname(code[4], code[5])); |
305 |
extra = 1; |
extra = 2; |
306 |
} |
} |
307 |
fprintf(f, "{"); |
fprintf(f, "{"); |
308 |
if (*code != OP_TYPEEXACT) fprintf(f, "0,"); |
if (*code != OP_TYPEEXACT) fprintf(f, "0,"); |
309 |
fprintf(f, "%d}", GET2(code,1)); |
fprintf(f, "%d}", GET2(code,1)); |
310 |
if (*code == OP_TYPEMINUPTO) fprintf(f, "?"); |
if (*code == OP_TYPEMINUPTO) fprintf(f, "?"); |
311 |
|
else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+"); |
312 |
break; |
break; |
313 |
|
|
314 |
case OP_NOT: |
case OP_NOT: |
315 |
if (isprint(c = code[1])) fprintf(f, " [^%c]", c); |
c = code[1]; |
316 |
|
if (PRINTABLE(c)) fprintf(f, " [^%c]", c); |
317 |
else fprintf(f, " [^\\x%02x]", c); |
else fprintf(f, " [^\\x%02x]", c); |
318 |
break; |
break; |
319 |
|
|
320 |
case OP_NOTSTAR: |
case OP_NOTSTAR: |
321 |
case OP_NOTMINSTAR: |
case OP_NOTMINSTAR: |
322 |
|
case OP_NOTPOSSTAR: |
323 |
case OP_NOTPLUS: |
case OP_NOTPLUS: |
324 |
case OP_NOTMINPLUS: |
case OP_NOTMINPLUS: |
325 |
|
case OP_NOTPOSPLUS: |
326 |
case OP_NOTQUERY: |
case OP_NOTQUERY: |
327 |
case OP_NOTMINQUERY: |
case OP_NOTMINQUERY: |
328 |
if (isprint(c = code[1])) fprintf(f, " [^%c]", c); |
case OP_NOTPOSQUERY: |
329 |
|
c = code[1]; |
330 |
|
if (PRINTABLE(c)) fprintf(f, " [^%c]", c); |
331 |
else fprintf(f, " [^\\x%02x]", c); |
else fprintf(f, " [^\\x%02x]", c); |
332 |
fprintf(f, "%s", OP_names[*code]); |
fprintf(f, "%s", OP_names[*code]); |
333 |
break; |
break; |
335 |
case OP_NOTEXACT: |
case OP_NOTEXACT: |
336 |
case OP_NOTUPTO: |
case OP_NOTUPTO: |
337 |
case OP_NOTMINUPTO: |
case OP_NOTMINUPTO: |
338 |
if (isprint(c = code[3])) fprintf(f, " [^%c]{", c); |
case OP_NOTPOSUPTO: |
339 |
|
c = code[3]; |
340 |
|
if (PRINTABLE(c)) fprintf(f, " [^%c]{", c); |
341 |
else fprintf(f, " [^\\x%02x]{", c); |
else fprintf(f, " [^\\x%02x]{", c); |
342 |
if (*code != OP_NOTEXACT) fprintf(f, "0,"); |
if (*code != OP_NOTEXACT) fprintf(f, "0,"); |
343 |
fprintf(f, "%d}", GET2(code,1)); |
fprintf(f, "%d}", GET2(code,1)); |
344 |
if (*code == OP_NOTMINUPTO) fprintf(f, "?"); |
if (*code == OP_NOTMINUPTO) fprintf(f, "?"); |
345 |
|
else if (*code == OP_NOTPOSUPTO) fprintf(f, "+"); |
346 |
break; |
break; |
347 |
|
|
348 |
case OP_RECURSE: |
case OP_RECURSE: |
361 |
|
|
362 |
case OP_PROP: |
case OP_PROP: |
363 |
case OP_NOTPROP: |
case OP_NOTPROP: |
364 |
fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1])); |
fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2])); |
365 |
break; |
break; |
366 |
|
|
367 |
/* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in |
/* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in |
402 |
for (j = i+1; j < 256; j++) |
for (j = i+1; j < 256; j++) |
403 |
if ((ccode[j/8] & (1 << (j&7))) == 0) break; |
if ((ccode[j/8] & (1 << (j&7))) == 0) break; |
404 |
if (i == '-' || i == ']') fprintf(f, "\\"); |
if (i == '-' || i == ']') fprintf(f, "\\"); |
405 |
if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i); |
if (PRINTABLE(i)) fprintf(f, "%c", i); |
406 |
|
else fprintf(f, "\\x%02x", i); |
407 |
if (--j > i) |
if (--j > i) |
408 |
{ |
{ |
409 |
if (j != i + 1) fprintf(f, "-"); |
if (j != i + 1) fprintf(f, "-"); |
410 |
if (j == '-' || j == ']') fprintf(f, "\\"); |
if (j == '-' || j == ']') fprintf(f, "\\"); |
411 |
if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j); |
if (PRINTABLE(j)) fprintf(f, "%c", j); |
412 |
|
else fprintf(f, "\\x%02x", j); |
413 |
} |
} |
414 |
i = j; |
i = j; |
415 |
} |
} |
426 |
{ |
{ |
427 |
if (ch == XCL_PROP) |
if (ch == XCL_PROP) |
428 |
{ |
{ |
429 |
fprintf(f, "\\p{%s}", get_ucpname(*ccode++)); |
int ptype = *ccode++; |
430 |
|
int pvalue = *ccode++; |
431 |
|
fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue)); |
432 |
} |
} |
433 |
else if (ch == XCL_NOTPROP) |
else if (ch == XCL_NOTPROP) |
434 |
{ |
{ |
435 |
fprintf(f, "\\P{%s}", get_ucpname(*ccode++)); |
int ptype = *ccode++; |
436 |
|
int pvalue = *ccode++; |
437 |
|
fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue)); |
438 |
} |
} |
439 |
else |
else |
440 |
{ |
{ |
476 |
if (*ccode == OP_CRMINRANGE) fprintf(f, "?"); |
if (*ccode == OP_CRMINRANGE) fprintf(f, "?"); |
477 |
extra += _pcre_OP_lengths[*ccode]; |
extra += _pcre_OP_lengths[*ccode]; |
478 |
break; |
break; |
479 |
|
|
480 |
|
/* Do nothing if it's not a repeat; this code stops picky compilers |
481 |
|
warning about the lack of a default code path. */ |
482 |
|
|
483 |
|
default: |
484 |
|
break; |
485 |
} |
} |
486 |
} |
} |
487 |
break; |
break; |