414 |
const pcre_uchar *start_code = md->start_code; |
const pcre_uchar *start_code = md->start_code; |
415 |
|
|
416 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
417 |
BOOL utf8 = (md->poptions & PCRE_UTF8) != 0; |
BOOL utf = (md->poptions & PCRE_UTF8) != 0; |
418 |
#else |
#else |
419 |
BOOL utf8 = FALSE; |
BOOL utf = FALSE; |
420 |
#endif |
#endif |
421 |
|
|
422 |
rlevel++; |
rlevel++; |
474 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
475 |
/* In character mode we have to step back character by character */ |
/* In character mode we have to step back character by character */ |
476 |
|
|
477 |
if (utf8) |
if (utf) |
478 |
{ |
{ |
479 |
for (gone_back = 0; gone_back < max_back; gone_back++) |
for (gone_back = 0; gone_back < max_back; gone_back++) |
480 |
{ |
{ |
606 |
{ |
{ |
607 |
clen = 1; /* Number of bytes in the character */ |
clen = 1; /* Number of bytes in the character */ |
608 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
609 |
if (utf8) { GETCHARLEN(c, ptr, clen); } else |
if (utf) { GETCHARLEN(c, ptr, clen); } else |
610 |
#endif /* SUPPORT_UTF8 */ |
#endif /* SUPPORT_UTF8 */ |
611 |
c = *ptr; |
c = *ptr; |
612 |
} |
} |
695 |
{ |
{ |
696 |
dlen = 1; |
dlen = 1; |
697 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
698 |
if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else |
if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else |
699 |
#endif /* SUPPORT_UTF8 */ |
#endif /* SUPPORT_UTF8 */ |
700 |
d = code[coptable[codevalue]]; |
d = code[coptable[codevalue]]; |
701 |
if (codevalue >= OP_TYPESTAR) |
if (codevalue >= OP_TYPESTAR) |
960 |
const pcre_uchar *temp = ptr - 1; |
const pcre_uchar *temp = ptr - 1; |
961 |
if (temp < md->start_used_ptr) md->start_used_ptr = temp; |
if (temp < md->start_used_ptr) md->start_used_ptr = temp; |
962 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
963 |
if (utf8) BACKCHAR(temp); |
if (utf) BACKCHAR(temp); |
964 |
#endif |
#endif |
965 |
GETCHARTEST(d, temp); |
GETCHARTEST(d, temp); |
966 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
1986 |
if (clen == 0) break; |
if (clen == 0) break; |
1987 |
|
|
1988 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
1989 |
if (utf8) |
if (utf) |
1990 |
{ |
{ |
1991 |
if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else |
if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else |
1992 |
{ |
{ |
2007 |
} |
} |
2008 |
else |
else |
2009 |
#endif /* SUPPORT_UTF8 */ |
#endif /* SUPPORT_UTF8 */ |
2010 |
|
/* Not UTF mode */ |
|
/* Non-UTF-8 mode */ |
|
2011 |
{ |
{ |
2012 |
if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); } |
if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); } |
2013 |
} |
} |
2210 |
if (caseless) |
if (caseless) |
2211 |
{ |
{ |
2212 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
2213 |
if (utf8 && d >= 128) |
if (utf && d >= 128) |
2214 |
{ |
{ |
2215 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
2216 |
otherd = UCD_OTHERCASE(d); |
otherd = UCD_OTHERCASE(d); |
2257 |
if (caseless) |
if (caseless) |
2258 |
{ |
{ |
2259 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
2260 |
if (utf8 && d >= 128) |
if (utf && d >= 128) |
2261 |
{ |
{ |
2262 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
2263 |
otherd = UCD_OTHERCASE(d); |
otherd = UCD_OTHERCASE(d); |
2302 |
if (caseless) |
if (caseless) |
2303 |
{ |
{ |
2304 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
2305 |
if (utf8 && d >= 128) |
if (utf && d >= 128) |
2306 |
{ |
{ |
2307 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
2308 |
otherd = UCD_OTHERCASE(d); |
otherd = UCD_OTHERCASE(d); |
2339 |
if (caseless) |
if (caseless) |
2340 |
{ |
{ |
2341 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
2342 |
if (utf8 && d >= 128) |
if (utf && d >= 128) |
2343 |
{ |
{ |
2344 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
2345 |
otherd = UCD_OTHERCASE(d); |
otherd = UCD_OTHERCASE(d); |
2383 |
if (caseless) |
if (caseless) |
2384 |
{ |
{ |
2385 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
2386 |
if (utf8 && d >= 128) |
if (utf && d >= 128) |
2387 |
{ |
{ |
2388 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
2389 |
otherd = UCD_OTHERCASE(d); |
otherd = UCD_OTHERCASE(d); |
3004 |
real_pcre *re = (real_pcre *)argument_re; |
real_pcre *re = (real_pcre *)argument_re; |
3005 |
dfa_match_data match_block; |
dfa_match_data match_block; |
3006 |
dfa_match_data *md = &match_block; |
dfa_match_data *md = &match_block; |
3007 |
BOOL utf8, anchored, startline, firstline; |
BOOL utf, anchored, startline, firstline; |
3008 |
const pcre_uchar *current_subject, *end_subject; |
const pcre_uchar *current_subject, *end_subject; |
3009 |
const pcre_uint8 *lcc; |
const pcre_uint8 *lcc; |
3010 |
|
|
3072 |
req_char_ptr = current_subject - 1; |
req_char_ptr = current_subject - 1; |
3073 |
|
|
3074 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
3075 |
utf8 = (re->options & PCRE_UTF8) != 0; |
/* PCRE_UTF16 has the same value as PCRE_UTF8. */ |
3076 |
|
utf = (re->options & PCRE_UTF8) != 0; |
3077 |
#else |
#else |
3078 |
utf8 = FALSE; |
utf = FALSE; |
3079 |
#endif |
#endif |
3080 |
|
|
3081 |
anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 || |
anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 || |
3147 |
back the character offset. */ |
back the character offset. */ |
3148 |
|
|
3149 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
3150 |
if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0) |
if (utf && (options & PCRE_NO_UTF8_CHECK) == 0) |
3151 |
{ |
{ |
3152 |
int erroroffset; |
int erroroffset; |
3153 |
int errorcode = PRIV(valid_utf8)((pcre_uchar *)subject, length, &erroroffset); |
int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset); |
3154 |
if (errorcode != 0) |
if (errorcode != 0) |
3155 |
{ |
{ |
3156 |
if (offsetcount >= 2) |
if (offsetcount >= 2) |
3235 |
{ |
{ |
3236 |
PCRE_PUCHAR t = current_subject; |
PCRE_PUCHAR t = current_subject; |
3237 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
3238 |
if (utf8) |
if (utf) |
3239 |
{ |
{ |
3240 |
while (t < md->end_subject && !IS_NEWLINE(t)) |
while (t < md->end_subject && !IS_NEWLINE(t)) |
3241 |
{ |
{ |
3278 |
if (current_subject > md->start_subject + start_offset) |
if (current_subject > md->start_subject + start_offset) |
3279 |
{ |
{ |
3280 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
3281 |
if (utf8) |
if (utf) |
3282 |
{ |
{ |
3283 |
while (current_subject < end_subject && |
while (current_subject < end_subject && |
3284 |
!WAS_NEWLINE(current_subject)) |
!WAS_NEWLINE(current_subject)) |
3317 |
{ |
{ |
3318 |
current_subject++; |
current_subject++; |
3319 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
3320 |
if (utf8) |
if (utf) |
3321 |
while(current_subject < end_subject && |
while(current_subject < end_subject && |
3322 |
(*current_subject & 0xc0) == 0x80) current_subject++; |
(*current_subject & 0xc0) == 0x80) current_subject++; |
3323 |
#endif |
#endif |
3426 |
|
|
3427 |
if (firstline && IS_NEWLINE(current_subject)) break; |
if (firstline && IS_NEWLINE(current_subject)) break; |
3428 |
current_subject++; |
current_subject++; |
3429 |
if (utf8) |
if (utf) |
3430 |
{ |
{ |
3431 |
while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80) |
while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80) |
3432 |
current_subject++; |
current_subject++; |