1466 |
{ |
{ |
1467 |
if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; } |
if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; } |
1468 |
ptr++; |
ptr++; |
1469 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
1470 |
if (utf) while ((*ptr & 0xc0) == 0x80) ptr++; |
if (utf) FORWARDCHAR(ptr); |
1471 |
#endif |
#endif |
1472 |
} |
} |
1473 |
if (*ptr == 0) goto FAIL_EXIT; |
if (*ptr == 0) goto FAIL_EXIT; |
1759 |
case OP_NOTI: |
case OP_NOTI: |
1760 |
branchlength++; |
branchlength++; |
1761 |
cc += 2; |
cc += 2; |
1762 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
1763 |
if (utf && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f]; |
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
1764 |
#endif |
#endif |
1765 |
break; |
break; |
1766 |
|
|
1773 |
case OP_NOTEXACTI: |
case OP_NOTEXACTI: |
1774 |
branchlength += GET2(cc,1); |
branchlength += GET2(cc,1); |
1775 |
cc += 2 + IMM2_SIZE; |
cc += 2 + IMM2_SIZE; |
1776 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
1777 |
if (utf && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f]; |
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
1778 |
#endif |
#endif |
1779 |
break; |
break; |
1780 |
|
|
2041 |
a multi-byte character. The length in the table is a minimum, so we have to |
a multi-byte character. The length in the table is a minimum, so we have to |
2042 |
arrange to skip the extra bytes. */ |
arrange to skip the extra bytes. */ |
2043 |
|
|
2044 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
2045 |
if (utf) switch(c) |
if (utf) switch(c) |
2046 |
{ |
{ |
2047 |
case OP_CHAR: |
case OP_CHAR: |
2072 |
case OP_MINQUERYI: |
case OP_MINQUERYI: |
2073 |
case OP_POSQUERY: |
case OP_POSQUERY: |
2074 |
case OP_POSQUERYI: |
case OP_POSQUERYI: |
2075 |
if (code[-1] >= 0xc0) code += PRIV(utf8_table4)[code[-1] & 0x3f]; |
if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]); |
2076 |
break; |
break; |
2077 |
} |
} |
2078 |
#else |
#else |
2161 |
by a multi-byte character. The length in the table is a minimum, so we have |
by a multi-byte character. The length in the table is a minimum, so we have |
2162 |
to arrange to skip the extra bytes. */ |
to arrange to skip the extra bytes. */ |
2163 |
|
|
2164 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
2165 |
if (utf) switch(c) |
if (utf) switch(c) |
2166 |
{ |
{ |
2167 |
case OP_CHAR: |
case OP_CHAR: |
2192 |
case OP_MINQUERYI: |
case OP_MINQUERYI: |
2193 |
case OP_POSQUERY: |
case OP_POSQUERY: |
2194 |
case OP_POSQUERYI: |
case OP_POSQUERYI: |
2195 |
if (code[-1] >= 0xc0) code += PRIV(utf8_table4)[code[-1] & 0x3f]; |
if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]); |
2196 |
break; |
break; |
2197 |
} |
} |
2198 |
#else |
#else |
2452 |
/* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO, |
/* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO, |
2453 |
MINUPTO, and POSUPTO may be followed by a multibyte character */ |
MINUPTO, and POSUPTO may be followed by a multibyte character */ |
2454 |
|
|
2455 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
2456 |
case OP_STAR: |
case OP_STAR: |
2457 |
case OP_STARI: |
case OP_STARI: |
2458 |
case OP_MINSTAR: |
case OP_MINSTAR: |
2465 |
case OP_MINQUERYI: |
case OP_MINQUERYI: |
2466 |
case OP_POSQUERY: |
case OP_POSQUERY: |
2467 |
case OP_POSQUERYI: |
case OP_POSQUERYI: |
2468 |
if (utf && code[1] >= 0xc0) code += PRIV(utf8_table4)[code[1] & 0x3f]; |
if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]); |
2469 |
break; |
break; |
2470 |
|
|
2471 |
case OP_UPTO: |
case OP_UPTO: |
2474 |
case OP_MINUPTOI: |
case OP_MINUPTOI: |
2475 |
case OP_POSUPTO: |
case OP_POSUPTO: |
2476 |
case OP_POSUPTOI: |
case OP_POSUPTOI: |
2477 |
if (utf && code[1 + IMM2_SIZE] >= 0xc0) code += PRIV(utf8_table4)[code[1 + IMM2_SIZE] & 0x3f]; |
if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]); |
2478 |
break; |
break; |
2479 |
#endif |
#endif |
2480 |
|
|
2913 |
{ |
{ |
2914 |
if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; } |
if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; } |
2915 |
ptr++; |
ptr++; |
2916 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
2917 |
if (utf) while ((*ptr & 0xc0) == 0x80) ptr++; |
if (utf) FORWARDCHAR(ptr); |
2918 |
#endif |
#endif |
2919 |
} |
} |
2920 |
} |
} |
2957 |
{ |
{ |
2958 |
if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; } |
if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; } |
2959 |
ptr++; |
ptr++; |
2960 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
2961 |
if (utf) while ((*ptr & 0xc0) == 0x80) ptr++; |
if (utf) FORWARDCHAR(ptr); |
2962 |
#endif |
#endif |
2963 |
} |
} |
2964 |
} |
} |
3424 |
int tempbracount; |
int tempbracount; |
3425 |
pcre_uchar mcbuffer[8]; |
pcre_uchar mcbuffer[8]; |
3426 |
|
|
3427 |
/* Get next byte in the pattern */ |
/* Get next character in the pattern */ |
3428 |
|
|
3429 |
c = *ptr; |
c = *ptr; |
3430 |
|
|
3556 |
{ |
{ |
3557 |
if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; } |
if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; } |
3558 |
ptr++; |
ptr++; |
3559 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
3560 |
if (utf) while ((*ptr & 0xc0) == 0x80) ptr++; |
if (utf) FORWARDCHAR(ptr); |
3561 |
#endif |
#endif |
3562 |
} |
} |
3563 |
if (*ptr != 0) continue; |
if (*ptr != 0) continue; |
4601 |
{ |
{ |
4602 |
op_type = (*previous == OP_CHAR)? 0 : OP_STARI - OP_STAR; |
op_type = (*previous == OP_CHAR)? 0 : OP_STARI - OP_STAR; |
4603 |
|
|
4604 |
/* Deal with UTF-8 characters that take up more than one byte. It's |
/* Deal with UTF characters that take up more than one character. It's |
4605 |
easier to write this out separately than try to macrify it. Use c to |
easier to write this out separately than try to macrify it. Use c to |
4606 |
hold the length of the character in bytes, plus 0x80 to flag that it's a |
hold the length of the character in bytes, plus 0x80 to flag that it's a |
4607 |
length rather than a small character. */ |
length rather than a small character. */ |
4610 |
if (utf && (code[-1] & 0x80) != 0) |
if (utf && (code[-1] & 0x80) != 0) |
4611 |
{ |
{ |
4612 |
pcre_uchar *lastchar = code - 1; |
pcre_uchar *lastchar = code - 1; |
4613 |
while((*lastchar & 0xc0) == 0x80) lastchar--; |
BACKCHAR(lastchar); |
4614 |
c = code - lastchar; /* Length of UTF-8 character */ |
c = code - lastchar; /* Length of UTF-8 character */ |
4615 |
memcpy(utf_chars, lastchar, c); /* Save the char */ |
memcpy(utf_chars, lastchar, IN_UCHARS(c)); /* Save the char */ |
4616 |
c |= 0x80; /* Flag c as a length */ |
c |= 0x80; /* Flag c as a length */ |
4617 |
} |
} |
4618 |
else |
else |
4619 |
#endif |
#endif |
4620 |
|
|
4621 |
/* Handle the case of a single byte - either with no UTF8 support, or |
/* Handle the case of a single charater - either with no UTF support, or |
4622 |
with UTF-8 disabled, or for a UTF-8 character < 128. */ |
with UTF disabled, or for a single character UTF character. */ |
4623 |
|
|
4624 |
{ |
{ |
4625 |
c = code[-1]; |
c = code[-1]; |
5273 |
else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT) |
else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT) |
5274 |
{ |
{ |
5275 |
tempcode += PRIV(OP_lengths)[*tempcode]; |
tempcode += PRIV(OP_lengths)[*tempcode]; |
5276 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
5277 |
if (utf && tempcode[-1] >= 0xc0) |
if (utf && HAS_EXTRALEN(tempcode[-1])) |
5278 |
tempcode += PRIV(utf8_table4)[tempcode[-1] & 0x3f]; |
tempcode += GET_EXTRALEN(tempcode[-1]); |
5279 |
#endif |
#endif |
5280 |
} |
} |
5281 |
|
|
6659 |
mclength = 1; |
mclength = 1; |
6660 |
mcbuffer[0] = c; |
mcbuffer[0] = c; |
6661 |
|
|
6662 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
6663 |
if (utf && c >= 0xc0) |
if (utf && HAS_EXTRALEN(c)) |
6664 |
{ |
{ |
6665 |
while ((ptr[1] & 0xc0) == 0x80) |
INTERNALCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr)); |
|
mcbuffer[mclength++] = *(++ptr); |
|
6666 |
} |
} |
6667 |
#endif |
#endif |
6668 |
|
|