3413 |
BOOL is_quantifier; |
BOOL is_quantifier; |
3414 |
BOOL is_recurse; |
BOOL is_recurse; |
3415 |
BOOL reset_bracount; |
BOOL reset_bracount; |
3416 |
int class_charcount; |
int class_has_8bitchar; |
3417 |
|
int class_single_char; |
3418 |
int class_lastchar; |
int class_lastchar; |
3419 |
int newoptions; |
int newoptions; |
3420 |
int recno; |
int recno; |
3711 |
|
|
3712 |
should_flip_negation = FALSE; |
should_flip_negation = FALSE; |
3713 |
|
|
3714 |
/* Keep a count of chars with values < 256 so that we can optimize the case |
/* For optimization purposes, we track some properties of the class. |
3715 |
of just a single character (as long as it's < 256). However, For higher |
class_has_8bitchar will be non-zero, if the class contains at least one |
3716 |
valued UTF-8 characters, we don't yet do any optimization. */ |
< 256 character. class_single_char will be 1, if the class only contains |
3717 |
|
a single character. */ |
3718 |
|
|
3719 |
class_charcount = 0; |
class_has_8bitchar = 0; |
3720 |
|
class_single_char = 0; |
3721 |
class_lastchar = -1; |
class_lastchar = -1; |
3722 |
|
|
3723 |
/* Initialize the 32-char bit map to all zeros. We build the map in a |
/* Initialize the 32-char bit map to all zeros. We build the map in a |
3873 |
for (c = 0; c < 32; c++) classbits[c] |= pbits[c]; |
for (c = 0; c < 32; c++) classbits[c] |= pbits[c]; |
3874 |
|
|
3875 |
ptr = tempptr + 1; |
ptr = tempptr + 1; |
3876 |
class_charcount = 10; /* Set > 1; assumes more than 1 per class */ |
/* Every class contains at least one < 256 characters. */ |
3877 |
|
class_has_8bitchar = 1; |
3878 |
|
/* Every class contains at least two characters. */ |
3879 |
|
class_single_char = 2; |
3880 |
continue; /* End of POSIX syntax handling */ |
continue; /* End of POSIX syntax handling */ |
3881 |
} |
} |
3882 |
|
|
3883 |
/* Backslash may introduce a single character, or it may introduce one |
/* Backslash may introduce a single character, or it may introduce one |
3884 |
of the specials, which just set a flag. The sequence \b is a special |
of the specials, which just set a flag. The sequence \b is a special |
3885 |
case. Inside a class (and only there) it is treated as backspace. We |
case. Inside a class (and only there) it is treated as backspace. We |
3886 |
assume that other escapes have more than one character in them, so set |
assume that other escapes have more than one character in them, so |
3887 |
class_charcount bigger than one. Unrecognized escapes fall through and |
speculatively set both class_has_8bitchar class_single_char bigger |
3888 |
are either treated as literal characters (by default), or are faulted if |
than one. Unrecognized escapes fall through and are either treated |
3889 |
|
as literal characters (by default), or are faulted if |
3890 |
PCRE_EXTRA is set. */ |
PCRE_EXTRA is set. */ |
3891 |
|
|
3892 |
if (c == CHAR_BACKSLASH) |
if (c == CHAR_BACKSLASH) |
3909 |
if (c < 0) |
if (c < 0) |
3910 |
{ |
{ |
3911 |
register const pcre_uint8 *cbits = cd->cbits; |
register const pcre_uint8 *cbits = cd->cbits; |
3912 |
class_charcount += 2; /* Greater than 1 is what matters */ |
/* Every class contains at least two < 256 characters. */ |
3913 |
|
class_has_8bitchar++; |
3914 |
|
/* Every class contains at least two characters. */ |
3915 |
|
class_single_char += 2; |
3916 |
|
|
3917 |
switch (-c) |
switch (-c) |
3918 |
{ |
{ |
3925 |
case ESC_SU: |
case ESC_SU: |
3926 |
nestptr = ptr; |
nestptr = ptr; |
3927 |
ptr = substitutes[-c - ESC_DU] - 1; /* Just before substitute */ |
ptr = substitutes[-c - ESC_DU] - 1; /* Just before substitute */ |
3928 |
class_charcount -= 2; /* Undo! */ |
class_has_8bitchar--; /* Undo! */ |
3929 |
continue; |
continue; |
3930 |
#endif |
#endif |
3931 |
case ESC_d: |
case ESC_d: |
4091 |
XCL_PROP : XCL_NOTPROP; |
XCL_PROP : XCL_NOTPROP; |
4092 |
*class_uchardata++ = ptype; |
*class_uchardata++ = ptype; |
4093 |
*class_uchardata++ = pdata; |
*class_uchardata++ = pdata; |
4094 |
class_charcount -= 2; /* Not a < 256 character */ |
class_has_8bitchar--; /* Undo! */ |
4095 |
continue; |
continue; |
4096 |
} |
} |
4097 |
#endif |
#endif |
4105 |
*errorcodeptr = ERR7; |
*errorcodeptr = ERR7; |
4106 |
goto FAILED; |
goto FAILED; |
4107 |
} |
} |
4108 |
class_charcount -= 2; /* Undo the default count from above */ |
class_has_8bitchar--; /* Undo the speculative increase. */ |
4109 |
c = *ptr; /* Get the final character and fall through */ |
class_single_char -= 2; /* Undo the speculative increase. */ |
4110 |
|
c = *ptr; /* Get the final character and fall through */ |
4111 |
break; |
break; |
4112 |
} |
} |
4113 |
} |
} |
4114 |
|
|
4115 |
/* Fall through if we have a single character (c >= 0). This may be |
/* Fall through if we have a single character (c >= 0). This may be |
4116 |
greater than 256 mode. */ |
greater than 256. */ |
4117 |
|
|
4118 |
} /* End of backslash handling */ |
} /* End of backslash handling */ |
4119 |
|
|
4206 |
|
|
4207 |
if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF; |
if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF; |
4208 |
|
|
4209 |
|
/* Since we found a character range, single character optimizations |
4210 |
|
cannot be done anymore. */ |
4211 |
|
class_single_char = 2; |
4212 |
|
|
4213 |
/* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless |
/* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless |
4214 |
matching, we have to use an XCLASS with extra data items. Caseless |
matching, we have to use an XCLASS with extra data items. Caseless |
4215 |
matching for characters > 127 is available only if UCP support is |
matching for characters > 127 is available only if UCP support is |
4338 |
/* We use the bit map for 8 bit mode, or when the characters fall |
/* We use the bit map for 8 bit mode, or when the characters fall |
4339 |
partially or entirely to [0-255] ([0-127] for UCP) ranges. */ |
partially or entirely to [0-255] ([0-127] for UCP) ranges. */ |
4340 |
|
|
4341 |
class_charcount += d - c + 1; |
class_has_8bitchar = 1; |
|
class_lastchar = d; |
|
4342 |
|
|
4343 |
/* We can save a bit of time by skipping this in the pre-compile. */ |
/* We can save a bit of time by skipping this in the pre-compile. */ |
4344 |
|
|
4361 |
|
|
4362 |
LONE_SINGLE_CHARACTER: |
LONE_SINGLE_CHARACTER: |
4363 |
|
|
4364 |
/* Handle a character that cannot go in the bit map */ |
/* Only the value of 1 matters for class_single_char. */ |
4365 |
|
if (class_single_char < 2) class_single_char++; |
4366 |
|
class_lastchar = c; |
4367 |
|
|
4368 |
|
/* Handle a character that cannot go in the bit map */ |
4369 |
#if defined SUPPORT_UTF && !(defined COMPILE_PCRE8) |
#if defined SUPPORT_UTF && !(defined COMPILE_PCRE8) |
4370 |
if ((c > 255) || (utf && ((options & PCRE_CASELESS) != 0 && c > 127))) |
if ((c > 255) || (utf && ((options & PCRE_CASELESS) != 0 && c > 127))) |
4371 |
#elif defined SUPPORT_UTF |
#elif defined SUPPORT_UTF |
4413 |
#endif /* SUPPORT_UTF || COMPILE_PCRE16 */ |
#endif /* SUPPORT_UTF || COMPILE_PCRE16 */ |
4414 |
/* Handle a single-byte character */ |
/* Handle a single-byte character */ |
4415 |
{ |
{ |
4416 |
|
class_has_8bitchar = 1; |
4417 |
classbits[c/8] |= (1 << (c&7)); |
classbits[c/8] |= (1 << (c&7)); |
4418 |
if ((options & PCRE_CASELESS) != 0) |
if ((options & PCRE_CASELESS) != 0) |
4419 |
{ |
{ |
4420 |
c = cd->fcc[c]; /* flip case */ |
c = cd->fcc[c]; /* flip case */ |
4421 |
classbits[c/8] |= (1 << (c&7)); |
classbits[c/8] |= (1 << (c&7)); |
4422 |
} |
} |
|
class_charcount++; |
|
|
class_lastchar = c; |
|
4423 |
} |
} |
4424 |
|
|
4425 |
} |
} |
4459 |
of reqchar, save the previous value for reinstating. */ |
of reqchar, save the previous value for reinstating. */ |
4460 |
|
|
4461 |
#ifdef SUPPORT_UTF |
#ifdef SUPPORT_UTF |
4462 |
if (class_charcount == 1 && !xclass && |
if (class_single_char == 1 && (!utf || !negate_class |
4463 |
(!utf || !negate_class || class_lastchar < 128)) |
|| class_lastchar < (MAX_VALUE_FOR_SINGLE_CHAR + 1))) |
4464 |
#else |
#else |
4465 |
if (class_charcount == 1) |
if (class_single_char == 1) |
4466 |
#endif |
#endif |
4467 |
{ |
{ |
4468 |
zeroreqchar = reqchar; |
zeroreqchar = reqchar; |
4469 |
|
|
4470 |
/* The OP_NOT[I] opcodes work on one-byte characters only. */ |
/* The OP_NOT[I] opcodes work on single characters only. */ |
4471 |
|
|
4472 |
if (negate_class) |
if (negate_class) |
4473 |
{ |
{ |
4482 |
then we can handle this with the normal one-character code. */ |
then we can handle this with the normal one-character code. */ |
4483 |
|
|
4484 |
#ifdef SUPPORT_UTF |
#ifdef SUPPORT_UTF |
4485 |
if (utf && class_lastchar > 127) |
if (utf && class_lastchar > MAX_VALUE_FOR_SINGLE_CHAR) |
4486 |
mclength = PRIV(ord2utf)(class_lastchar, mcbuffer); |
mclength = PRIV(ord2utf)(class_lastchar, mcbuffer); |
4487 |
else |
else |
4488 |
#endif |
#endif |
4526 |
/* If the map is required, move up the extra data to make room for it; |
/* If the map is required, move up the extra data to make room for it; |
4527 |
otherwise just move the code pointer to the end of the extra data. */ |
otherwise just move the code pointer to the end of the extra data. */ |
4528 |
|
|
4529 |
if (class_charcount > 0) |
if (class_has_8bitchar > 0) |
4530 |
{ |
{ |
4531 |
*code++ |= XCL_MAP; |
*code++ |= XCL_MAP; |
4532 |
memmove(code + (32 / sizeof(pcre_uchar)), code, |
memmove(code + (32 / sizeof(pcre_uchar)), code, |
6702 |
handle it as a data character. */ |
handle it as a data character. */ |
6703 |
|
|
6704 |
#ifdef SUPPORT_UTF |
#ifdef SUPPORT_UTF |
6705 |
if (utf && c > 127) |
if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR) |
6706 |
mclength = PRIV(ord2utf)(c, mcbuffer); |
mclength = PRIV(ord2utf)(c, mcbuffer); |
6707 |
else |
else |
6708 |
#endif |
#endif |