181 |
|
|
182 |
if (caseless) |
if (caseless) |
183 |
{ |
{ |
184 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
185 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
186 |
if (md->utf) |
if (md->utf) |
187 |
{ |
{ |
365 |
/* Function local variables */ |
/* Function local variables */ |
366 |
|
|
367 |
PCRE_PUCHAR Xcallpat; |
PCRE_PUCHAR Xcallpat; |
368 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
369 |
PCRE_PUCHAR Xcharptr; |
PCRE_PUCHAR Xcharptr; |
370 |
#endif |
#endif |
371 |
PCRE_PUCHAR Xdata; |
PCRE_PUCHAR Xdata; |
527 |
|
|
528 |
/* Ditto for the local variables */ |
/* Ditto for the local variables */ |
529 |
|
|
530 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
531 |
#define charptr frame->Xcharptr |
#define charptr frame->Xcharptr |
532 |
#endif |
#endif |
533 |
#define callpat frame->Xcallpat |
#define callpat frame->Xcallpat |
585 |
below are for variables that do not have to be preserved over a recursive call |
below are for variables that do not have to be preserved over a recursive call |
586 |
to RMATCH(). */ |
to RMATCH(). */ |
587 |
|
|
588 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
589 |
const pcre_uchar *charptr; |
const pcre_uchar *charptr; |
590 |
#endif |
#endif |
591 |
const pcre_uchar *callpat; |
const pcre_uchar *callpat; |
634 |
#define code_offset codelink |
#define code_offset codelink |
635 |
#define condassert condition |
#define condassert condition |
636 |
#define matched_once prev_is_word |
#define matched_once prev_is_word |
637 |
|
#define foc number |
638 |
|
|
639 |
/* These statements are here to stop the compiler complaining about unitialized |
/* These statements are here to stop the compiler complaining about unitialized |
640 |
variables. */ |
variables. */ |
660 |
complicated macro. It has to be used in one particular way. This shouldn't, |
complicated macro. It has to be used in one particular way. This shouldn't, |
661 |
however, impact performance when true recursion is being used. */ |
however, impact performance when true recursion is being used. */ |
662 |
|
|
663 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
664 |
utf = md->utf; /* Local copy of the flag */ |
utf = md->utf; /* Local copy of the flag */ |
665 |
#else |
#else |
666 |
utf = FALSE; |
utf = FALSE; |
1597 |
back a number of characters, not bytes. */ |
back a number of characters, not bytes. */ |
1598 |
|
|
1599 |
case OP_REVERSE: |
case OP_REVERSE: |
1600 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
1601 |
if (utf) |
if (utf) |
1602 |
{ |
{ |
1603 |
i = GET(ecode, 1); |
i = GET(ecode, 1); |
2217 |
} |
} |
2218 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
2219 |
if ( |
if ( |
2220 |
#ifdef SUPPORT_UTF8 |
#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) |
2221 |
c < 256 && |
c < 256 && |
2222 |
#endif |
#endif |
2223 |
(md->ctypes[c] & ctype_digit) != 0 |
(md->ctypes[c] & ctype_digit) != 0 |
2234 |
} |
} |
2235 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
2236 |
if ( |
if ( |
2237 |
#ifdef SUPPORT_UTF8 |
#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) |
2238 |
c >= 256 || |
c > 255 || |
2239 |
#endif |
#endif |
2240 |
(md->ctypes[c] & ctype_digit) == 0 |
(md->ctypes[c] & ctype_digit) == 0 |
2241 |
) |
) |
2251 |
} |
} |
2252 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
2253 |
if ( |
if ( |
2254 |
#ifdef SUPPORT_UTF8 |
#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) |
2255 |
c < 256 && |
c < 256 && |
2256 |
#endif |
#endif |
2257 |
(md->ctypes[c] & ctype_space) != 0 |
(md->ctypes[c] & ctype_space) != 0 |
2268 |
} |
} |
2269 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
2270 |
if ( |
if ( |
2271 |
#ifdef SUPPORT_UTF8 |
#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) |
2272 |
c >= 256 || |
c > 255 || |
2273 |
#endif |
#endif |
2274 |
(md->ctypes[c] & ctype_space) == 0 |
(md->ctypes[c] & ctype_space) == 0 |
2275 |
) |
) |
2285 |
} |
} |
2286 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
2287 |
if ( |
if ( |
2288 |
#ifdef SUPPORT_UTF8 |
#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) |
2289 |
c < 256 && |
c < 256 && |
2290 |
#endif |
#endif |
2291 |
(md->ctypes[c] & ctype_word) != 0 |
(md->ctypes[c] & ctype_word) != 0 |
2302 |
} |
} |
2303 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
2304 |
if ( |
if ( |
2305 |
#ifdef SUPPORT_UTF8 |
#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) |
2306 |
c >= 256 || |
c > 255 || |
2307 |
#endif |
#endif |
2308 |
(md->ctypes[c] & ctype_word) == 0 |
(md->ctypes[c] & ctype_word) == 0 |
2309 |
) |
) |
3037 |
/* Match a single character, casefully */ |
/* Match a single character, casefully */ |
3038 |
|
|
3039 |
case OP_CHAR: |
case OP_CHAR: |
3040 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
3041 |
if (utf) |
if (utf) |
3042 |
{ |
{ |
3043 |
length = 1; |
length = 1; |
3109 |
} |
} |
3110 |
} |
} |
3111 |
else |
else |
3112 |
#endif /* SUPPORT_UTF8 */ |
#endif /* SUPPORT_UTF */ |
3113 |
|
|
3114 |
/* Not UTF mode */ |
/* Not UTF mode */ |
3115 |
{ |
{ |
3118 |
SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */ |
SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */ |
3119 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
3120 |
} |
} |
3121 |
if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); |
if (TABLE_GET(ecode[1], md->lcc, ecode[1]) |
3122 |
|
!= TABLE_GET(*eptr, md->lcc, *eptr)) MRRETURN(MATCH_NOMATCH); |
3123 |
|
eptr++; |
3124 |
ecode += 2; |
ecode += 2; |
3125 |
} |
} |
3126 |
break; |
break; |
3193 |
/* Common code for all repeated single-character matches. */ |
/* Common code for all repeated single-character matches. */ |
3194 |
|
|
3195 |
REPEATCHAR: |
REPEATCHAR: |
3196 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
3197 |
if (utf) |
if (utf) |
3198 |
{ |
{ |
3199 |
length = 1; |
length = 1; |
3217 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
3218 |
{ |
{ |
3219 |
if (eptr <= md->end_subject - length && |
if (eptr <= md->end_subject - length && |
3220 |
memcmp(eptr, charptr, length) == 0) eptr += length; |
memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length; |
3221 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
3222 |
else if (oclength > 0 && |
else if (oclength > 0 && |
3223 |
eptr <= md->end_subject - oclength && |
eptr <= md->end_subject - oclength && |
3240 |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
3241 |
if (fi >= max) MRRETURN(MATCH_NOMATCH); |
if (fi >= max) MRRETURN(MATCH_NOMATCH); |
3242 |
if (eptr <= md->end_subject - length && |
if (eptr <= md->end_subject - length && |
3243 |
memcmp(eptr, charptr, length) == 0) eptr += length; |
memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length; |
3244 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
3245 |
else if (oclength > 0 && |
else if (oclength > 0 && |
3246 |
eptr <= md->end_subject - oclength && |
eptr <= md->end_subject - oclength && |
3261 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3262 |
{ |
{ |
3263 |
if (eptr <= md->end_subject - length && |
if (eptr <= md->end_subject - length && |
3264 |
memcmp(eptr, charptr, length) == 0) eptr += length; |
memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length; |
3265 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
3266 |
else if (oclength > 0 && |
else if (oclength > 0 && |
3267 |
eptr <= md->end_subject - oclength && |
eptr <= md->end_subject - oclength && |
3297 |
value of fc will always be < 128. */ |
value of fc will always be < 128. */ |
3298 |
} |
} |
3299 |
else |
else |
3300 |
#endif /* SUPPORT_UTF8 */ |
#endif /* SUPPORT_UTF */ |
3301 |
|
/* When not in UTF-8 mode, load a single-byte character. */ |
3302 |
|
fc = *ecode++; |
3303 |
|
|
3304 |
/* When not in UTF-8 mode, load a single-byte character. */ |
/* The value of fc at this point is always one character, though we may |
3305 |
|
or may not be in UTF mode. The code is duplicated for the caseless and |
|
fc = *ecode++; |
|
|
|
|
|
/* The value of fc at this point is always less than 256, though we may or |
|
|
may not be in UTF-8 mode. The code is duplicated for the caseless and |
|
3306 |
caseful cases, for speed, since matching characters is likely to be quite |
caseful cases, for speed, since matching characters is likely to be quite |
3307 |
common. First, ensure the minimum number of matches are present. If min = |
common. First, ensure the minimum number of matches are present. If min = |
3308 |
max, continue at the same level without recursing. Otherwise, if |
max, continue at the same level without recursing. Otherwise, if |
3315 |
|
|
3316 |
if (op >= OP_STARI) /* Caseless */ |
if (op >= OP_STARI) /* Caseless */ |
3317 |
{ |
{ |
3318 |
fc = md->lcc[fc]; |
#ifdef COMPILE_PCRE8 |
3319 |
|
/* fc must be < 128 */ |
3320 |
|
foc = md->fcc[fc]; |
3321 |
|
#else |
3322 |
|
#ifdef SUPPORT_UTF |
3323 |
|
#ifdef SUPPORT_UCP |
3324 |
|
if (utf && fc > 127) |
3325 |
|
foc = UCD_OTHERCASE(fc); |
3326 |
|
#else |
3327 |
|
if (utf && fc > 127) |
3328 |
|
foc = fc; |
3329 |
|
#endif /* SUPPORT_UCP */ |
3330 |
|
else |
3331 |
|
#endif /* SUPPORT_UTF */ |
3332 |
|
foc = TABLE_GET(fc, md->fcc, fc); |
3333 |
|
#endif /* COMPILE_PCRE8 */ |
3334 |
|
|
3335 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
3336 |
{ |
{ |
3337 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
3339 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
3340 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
3341 |
} |
} |
3342 |
if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); |
if (fc != *eptr && foc != *eptr) MRRETURN(MATCH_NOMATCH); |
3343 |
|
eptr++; |
3344 |
} |
} |
3345 |
if (min == max) continue; |
if (min == max) continue; |
3346 |
if (minimize) |
if (minimize) |
3355 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
3356 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
3357 |
} |
} |
3358 |
if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); |
if (fc != *eptr && foc != *eptr) MRRETURN(MATCH_NOMATCH); |
3359 |
|
eptr++; |
3360 |
} |
} |
3361 |
/* Control never gets here */ |
/* Control never gets here */ |
3362 |
} |
} |
3370 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
3371 |
break; |
break; |
3372 |
} |
} |
3373 |
if (fc != md->lcc[*eptr]) break; |
if (fc != *eptr && foc != *eptr) break; |
3374 |
eptr++; |
eptr++; |
3375 |
} |
} |
3376 |
|
|
3459 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
3460 |
if (op == OP_NOTI) /* The caseless case */ |
if (op == OP_NOTI) /* The caseless case */ |
3461 |
{ |
{ |
3462 |
#ifdef SUPPORT_UTF8 |
#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) |
3463 |
if (c < 256) |
if (c < 256) |
3464 |
#endif |
#endif |
3465 |
c = md->lcc[c]; |
c = md->lcc[c]; |
3466 |
if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH); |
if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH); |
3467 |
} |
} |
3468 |
else /* Caseful */ |
else /* Caseful */ |
3562 |
|
|
3563 |
if (op >= OP_NOTSTARI) /* Caseless */ |
if (op >= OP_NOTSTARI) /* Caseless */ |
3564 |
{ |
{ |
3565 |
fc = md->lcc[fc]; |
fc = TABLE_GET(fc, md->lcc, fc); |
3566 |
|
|
3567 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
3568 |
if (utf) |
if (utf) |
3569 |
{ |
{ |
3570 |
register unsigned int d; |
register unsigned int d; |
3599 |
|
|
3600 |
if (minimize) |
if (minimize) |
3601 |
{ |
{ |
3602 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
3603 |
if (utf) |
if (utf) |
3604 |
{ |
{ |
3605 |
register unsigned int d; |
register unsigned int d; |
3644 |
{ |
{ |
3645 |
pp = eptr; |
pp = eptr; |
3646 |
|
|
3647 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
3648 |
if (utf) |
if (utf) |
3649 |
{ |
{ |
3650 |
register unsigned int d; |
register unsigned int d; |
3702 |
|
|
3703 |
else |
else |
3704 |
{ |
{ |
3705 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
3706 |
if (utf) |
if (utf) |
3707 |
{ |
{ |
3708 |
register unsigned int d; |
register unsigned int d; |
3736 |
|
|
3737 |
if (minimize) |
if (minimize) |
3738 |
{ |
{ |
3739 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
3740 |
if (utf) |
if (utf) |
3741 |
{ |
{ |
3742 |
register unsigned int d; |
register unsigned int d; |
3780 |
{ |
{ |
3781 |
pp = eptr; |
pp = eptr; |
3782 |
|
|
3783 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
3784 |
if (utf) |
if (utf) |
3785 |
{ |
{ |
3786 |
register unsigned int d; |
register unsigned int d; |
4372 |
} /* End switch(ctype) */ |
} /* End switch(ctype) */ |
4373 |
|
|
4374 |
else |
else |
4375 |
#endif /* SUPPORT_UTF8 */ |
#endif /* SUPPORT_UTF */ |
4376 |
|
|
4377 |
/* Code for the non-UTF-8 case for minimum matching of operators other |
/* Code for the non-UTF-8 case for minimum matching of operators other |
4378 |
than OP_PROP and OP_NOTPROP. */ |
than OP_PROP and OP_NOTPROP. */ |
4815 |
else |
else |
4816 |
#endif /* SUPPORT_UCP */ |
#endif /* SUPPORT_UCP */ |
4817 |
|
|
4818 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
4819 |
if (utf) |
if (utf) |
4820 |
{ |
{ |
4821 |
for (fi = min;; fi++) |
for (fi = min;; fi++) |
5615 |
} |
} |
5616 |
} |
} |
5617 |
else |
else |
5618 |
#endif /* SUPPORT_UTF8 */ |
#endif /* SUPPORT_UTF */ |
5619 |
/* Not UTF mode */ |
/* Not UTF mode */ |
5620 |
{ |
{ |
5621 |
switch(ctype) |
switch(ctype) |
5863 |
LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52) |
LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52) |
5864 |
LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64) |
LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64) |
5865 |
LBL(65) LBL(66) |
LBL(65) LBL(66) |
5866 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
5867 |
LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30) |
LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30) |
5868 |
LBL(32) LBL(34) LBL(42) LBL(46) |
LBL(32) LBL(34) LBL(42) LBL(46) |
5869 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
5870 |
LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45) |
LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45) |
5871 |
LBL(59) LBL(60) LBL(61) LBL(62) |
LBL(59) LBL(60) LBL(61) LBL(62) |
5872 |
#endif /* SUPPORT_UCP */ |
#endif /* SUPPORT_UCP */ |
5873 |
#endif /* SUPPORT_UTF8 */ |
#endif /* SUPPORT_UTF */ |
5874 |
default: |
default: |
5875 |
DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere)); |
DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere)); |
5876 |
return PCRE_ERROR_INTERNAL; |
return PCRE_ERROR_INTERNAL; |
6021 |
/* Check a UTF-8 string if required. Pass back the character offset and error |
/* Check a UTF-8 string if required. Pass back the character offset and error |
6022 |
code for an invalid string if a results vector is available. */ |
code for an invalid string if a results vector is available. */ |
6023 |
|
|
6024 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
6025 |
if (utf && (options & PCRE_NO_UTF8_CHECK) == 0) |
if (utf && (options & PCRE_NO_UTF8_CHECK) == 0) |
6026 |
{ |
{ |
6027 |
int erroroffset; |
int erroroffset; |
6157 |
md->hasthen = (re->flags & PCRE_HASTHEN) != 0; |
md->hasthen = (re->flags & PCRE_HASTHEN) != 0; |
6158 |
|
|
6159 |
md->lcc = tables + lcc_offset; |
md->lcc = tables + lcc_offset; |
6160 |
|
md->fcc = tables + fcc_offset; |
6161 |
md->ctypes = tables + ctypes_offset; |
md->ctypes = tables + ctypes_offset; |
6162 |
|
|
6163 |
/* Handle different \R options. */ |
/* Handle different \R options. */ |
6285 |
first_char = first_char2 = re->first_char; |
first_char = first_char2 = re->first_char; |
6286 |
if ((re->flags & PCRE_FCH_CASELESS) != 0) |
if ((re->flags & PCRE_FCH_CASELESS) != 0) |
6287 |
{ |
{ |
6288 |
first_char2 = TABLE_GET(first_char, tables + fcc_offset, first_char); |
first_char2 = TABLE_GET(first_char, md->fcc, first_char); |
6289 |
#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) |
#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) |
6290 |
if (utf && first_char > 127) |
if (utf && first_char > 127) |
6291 |
first_char2 = UCD_OTHERCASE(first_char); |
first_char2 = UCD_OTHERCASE(first_char); |
6307 |
req_char = req_char2 = re->req_char; |
req_char = req_char2 = re->req_char; |
6308 |
if ((re->flags & PCRE_RCH_CASELESS) != 0) |
if ((re->flags & PCRE_RCH_CASELESS) != 0) |
6309 |
{ |
{ |
6310 |
req_char2 = TABLE_GET(req_char, tables + fcc_offset, req_char); |
req_char2 = TABLE_GET(req_char, md->fcc, req_char); |
6311 |
#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) |
#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) |
6312 |
if (utf && req_char > 127) |
if (utf && req_char > 127) |
6313 |
req_char2 = UCD_OTHERCASE(req_char); |
req_char2 = UCD_OTHERCASE(req_char); |