1397 |
|
|
1398 |
/* This is called by several functions that scan a compiled expression looking |
/* This is called by several functions that scan a compiled expression looking |
1399 |
for a fixed first character, or an anchoring op code etc. It skips over things |
for a fixed first character, or an anchoring op code etc. It skips over things |
1400 |
that do not influence this. For some calls, a change of option is important. |
that do not influence this. For some calls, it makes sense to skip negative |
1401 |
For some calls, it makes sense to skip negative forward and all backward |
forward and all backward assertions, and also the \b assertion; for others it |
1402 |
assertions, and also the \b assertion; for others it does not. |
does not. |
1403 |
|
|
1404 |
Arguments: |
Arguments: |
1405 |
code pointer to the start of the group |
code pointer to the start of the group |
1419 |
{ |
{ |
1420 |
switch ((int)*code) |
switch ((int)*code) |
1421 |
{ |
{ |
|
case OP_OPT: |
|
|
if (optbit > 0 && ((int)code[1] & optbit) != (*options & optbit)) |
|
|
*options = (int)code[1]; |
|
|
code += 2; |
|
|
break; |
|
|
|
|
1422 |
case OP_ASSERT_NOT: |
case OP_ASSERT_NOT: |
1423 |
case OP_ASSERTBACK: |
case OP_ASSERTBACK: |
1424 |
case OP_ASSERTBACK_NOT: |
case OP_ASSERTBACK_NOT: |
1555 |
case OP_RREF: |
case OP_RREF: |
1556 |
case OP_NRREF: |
case OP_NRREF: |
1557 |
case OP_DEF: |
case OP_DEF: |
|
case OP_OPT: |
|
1558 |
case OP_CALLOUT: |
case OP_CALLOUT: |
1559 |
case OP_SOD: |
case OP_SOD: |
1560 |
case OP_SOM: |
case OP_SOM: |
1562 |
case OP_EOD: |
case OP_EOD: |
1563 |
case OP_EODN: |
case OP_EODN: |
1564 |
case OP_CIRC: |
case OP_CIRC: |
1565 |
|
case OP_CIRCM: |
1566 |
case OP_DOLL: |
case OP_DOLL: |
1567 |
|
case OP_DOLLM: |
1568 |
case OP_NOT_WORD_BOUNDARY: |
case OP_NOT_WORD_BOUNDARY: |
1569 |
case OP_WORD_BOUNDARY: |
case OP_WORD_BOUNDARY: |
1570 |
cc += _pcre_OP_lengths[*cc]; |
cc += _pcre_OP_lengths[*cc]; |
1573 |
/* Handle literal characters */ |
/* Handle literal characters */ |
1574 |
|
|
1575 |
case OP_CHAR: |
case OP_CHAR: |
1576 |
case OP_CHARNC: |
case OP_CHARI: |
1577 |
case OP_NOT: |
case OP_NOT: |
1578 |
|
case OP_NOTI: |
1579 |
branchlength++; |
branchlength++; |
1580 |
cc += 2; |
cc += 2; |
1581 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
1770 |
if (utf8) switch(c) |
if (utf8) switch(c) |
1771 |
{ |
{ |
1772 |
case OP_CHAR: |
case OP_CHAR: |
1773 |
case OP_CHARNC: |
case OP_CHARI: |
1774 |
case OP_EXACT: |
case OP_EXACT: |
1775 |
|
case OP_EXACTI: |
1776 |
case OP_UPTO: |
case OP_UPTO: |
1777 |
|
case OP_UPTOI: |
1778 |
case OP_MINUPTO: |
case OP_MINUPTO: |
1779 |
|
case OP_MINUPTOI: |
1780 |
case OP_POSUPTO: |
case OP_POSUPTO: |
1781 |
|
case OP_POSUPTOI: |
1782 |
case OP_STAR: |
case OP_STAR: |
1783 |
|
case OP_STARI: |
1784 |
case OP_MINSTAR: |
case OP_MINSTAR: |
1785 |
|
case OP_MINSTARI: |
1786 |
case OP_POSSTAR: |
case OP_POSSTAR: |
1787 |
|
case OP_POSSTARI: |
1788 |
case OP_PLUS: |
case OP_PLUS: |
1789 |
|
case OP_PLUSI: |
1790 |
case OP_MINPLUS: |
case OP_MINPLUS: |
1791 |
|
case OP_MINPLUSI: |
1792 |
case OP_POSPLUS: |
case OP_POSPLUS: |
1793 |
|
case OP_POSPLUSI: |
1794 |
case OP_QUERY: |
case OP_QUERY: |
1795 |
|
case OP_QUERYI: |
1796 |
case OP_MINQUERY: |
case OP_MINQUERY: |
1797 |
|
case OP_MINQUERYI: |
1798 |
case OP_POSQUERY: |
case OP_POSQUERY: |
1799 |
|
case OP_POSQUERYI: |
1800 |
if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f]; |
if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f]; |
1801 |
break; |
break; |
1802 |
} |
} |
1889 |
if (utf8) switch(c) |
if (utf8) switch(c) |
1890 |
{ |
{ |
1891 |
case OP_CHAR: |
case OP_CHAR: |
1892 |
case OP_CHARNC: |
case OP_CHARI: |
1893 |
case OP_EXACT: |
case OP_EXACT: |
1894 |
|
case OP_EXACTI: |
1895 |
case OP_UPTO: |
case OP_UPTO: |
1896 |
|
case OP_UPTOI: |
1897 |
case OP_MINUPTO: |
case OP_MINUPTO: |
1898 |
|
case OP_MINUPTOI: |
1899 |
case OP_POSUPTO: |
case OP_POSUPTO: |
1900 |
|
case OP_POSUPTOI: |
1901 |
case OP_STAR: |
case OP_STAR: |
1902 |
|
case OP_STARI: |
1903 |
case OP_MINSTAR: |
case OP_MINSTAR: |
1904 |
|
case OP_MINSTARI: |
1905 |
case OP_POSSTAR: |
case OP_POSSTAR: |
1906 |
|
case OP_POSSTARI: |
1907 |
case OP_PLUS: |
case OP_PLUS: |
1908 |
|
case OP_PLUSI: |
1909 |
case OP_MINPLUS: |
case OP_MINPLUS: |
1910 |
|
case OP_MINPLUSI: |
1911 |
case OP_POSPLUS: |
case OP_POSPLUS: |
1912 |
|
case OP_POSPLUSI: |
1913 |
case OP_QUERY: |
case OP_QUERY: |
1914 |
|
case OP_QUERYI: |
1915 |
case OP_MINQUERY: |
case OP_MINQUERY: |
1916 |
|
case OP_MINQUERYI: |
1917 |
case OP_POSQUERY: |
case OP_POSQUERY: |
1918 |
|
case OP_POSQUERYI: |
1919 |
if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f]; |
if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f]; |
1920 |
break; |
break; |
1921 |
} |
} |
2093 |
case OP_ALLANY: |
case OP_ALLANY: |
2094 |
case OP_ANYBYTE: |
case OP_ANYBYTE: |
2095 |
case OP_CHAR: |
case OP_CHAR: |
2096 |
case OP_CHARNC: |
case OP_CHARI: |
2097 |
case OP_NOT: |
case OP_NOT: |
2098 |
|
case OP_NOTI: |
2099 |
case OP_PLUS: |
case OP_PLUS: |
2100 |
case OP_MINPLUS: |
case OP_MINPLUS: |
2101 |
case OP_POSPLUS: |
case OP_POSPLUS: |
2143 |
|
|
2144 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
2145 |
case OP_STAR: |
case OP_STAR: |
2146 |
|
case OP_STARI: |
2147 |
case OP_MINSTAR: |
case OP_MINSTAR: |
2148 |
|
case OP_MINSTARI: |
2149 |
case OP_POSSTAR: |
case OP_POSSTAR: |
2150 |
|
case OP_POSSTARI: |
2151 |
case OP_QUERY: |
case OP_QUERY: |
2152 |
|
case OP_QUERYI: |
2153 |
case OP_MINQUERY: |
case OP_MINQUERY: |
2154 |
|
case OP_MINQUERYI: |
2155 |
case OP_POSQUERY: |
case OP_POSQUERY: |
2156 |
|
case OP_POSQUERYI: |
2157 |
if (utf8 && code[1] >= 0xc0) code += _pcre_utf8_table4[code[1] & 0x3f]; |
if (utf8 && code[1] >= 0xc0) code += _pcre_utf8_table4[code[1] & 0x3f]; |
2158 |
break; |
break; |
2159 |
|
|
2160 |
case OP_UPTO: |
case OP_UPTO: |
2161 |
|
case OP_UPTOI: |
2162 |
case OP_MINUPTO: |
case OP_MINUPTO: |
2163 |
|
case OP_MINUPTOI: |
2164 |
case OP_POSUPTO: |
case OP_POSUPTO: |
2165 |
|
case OP_POSUPTOI: |
2166 |
if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f]; |
if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f]; |
2167 |
break; |
break; |
2168 |
#endif |
#endif |
2654 |
#endif |
#endif |
2655 |
return c != next; |
return c != next; |
2656 |
|
|
2657 |
/* For CHARNC (caseless character) we must check the other case. If we have |
/* For CHARI (caseless character) we must check the other case. If we have |
2658 |
Unicode property support, we can use it to test the other case of |
Unicode property support, we can use it to test the other case of |
2659 |
high-valued characters. */ |
high-valued characters. */ |
2660 |
|
|
2661 |
case OP_CHARNC: |
case OP_CHARI: |
2662 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
2663 |
GETCHARTEST(c, previous); |
GETCHARTEST(c, previous); |
2664 |
#else |
#else |
2681 |
#endif /* SUPPORT_UTF8 */ |
#endif /* SUPPORT_UTF8 */ |
2682 |
return (c != cd->fcc[next]); /* Non-UTF-8 mode */ |
return (c != cd->fcc[next]); /* Non-UTF-8 mode */ |
2683 |
|
|
2684 |
/* For OP_NOT, its data is always a single-byte character. */ |
/* For OP_NOT and OP_NOTI, the data is always a single-byte character. These |
2685 |
|
opcodes are not used for multi-byte characters, because they are coded using |
2686 |
|
an XCLASS instead. */ |
2687 |
|
|
2688 |
case OP_NOT: |
case OP_NOT: |
2689 |
|
return (c = *previous) == next; |
2690 |
|
|
2691 |
|
case OP_NOTI: |
2692 |
if ((c = *previous) == next) return TRUE; |
if ((c = *previous) == next) return TRUE; |
|
if ((options & PCRE_CASELESS) == 0) return FALSE; |
|
2693 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
2694 |
if (utf8) |
if (utf8) |
2695 |
{ |
{ |
2794 |
switch(op_code) |
switch(op_code) |
2795 |
{ |
{ |
2796 |
case OP_CHAR: |
case OP_CHAR: |
2797 |
case OP_CHARNC: |
case OP_CHARI: |
2798 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
2799 |
GETCHARTEST(c, previous); |
GETCHARTEST(c, previous); |
2800 |
#else |
#else |
3258 |
the setting of any following char as a first character. */ |
the setting of any following char as a first character. */ |
3259 |
|
|
3260 |
case CHAR_CIRCUMFLEX_ACCENT: |
case CHAR_CIRCUMFLEX_ACCENT: |
3261 |
|
previous = NULL; |
3262 |
if ((options & PCRE_MULTILINE) != 0) |
if ((options & PCRE_MULTILINE) != 0) |
3263 |
{ |
{ |
3264 |
if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; |
if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; |
3265 |
|
*code++ = OP_CIRCM; |
3266 |
} |
} |
3267 |
previous = NULL; |
else *code++ = OP_CIRC; |
|
*code++ = OP_CIRC; |
|
3268 |
break; |
break; |
3269 |
|
|
3270 |
case CHAR_DOLLAR_SIGN: |
case CHAR_DOLLAR_SIGN: |
3271 |
previous = NULL; |
previous = NULL; |
3272 |
*code++ = OP_DOLL; |
*code++ = ((options & PCRE_MULTILINE) != 0)? OP_DOLLM : OP_DOLL; |
3273 |
break; |
break; |
3274 |
|
|
3275 |
/* There can never be a first char if '.' is first, whatever happens about |
/* There can never be a first char if '.' is first, whatever happens about |
4015 |
|
|
4016 |
In UTF-8 mode, we can optimize the negative case only if there were no |
In UTF-8 mode, we can optimize the negative case only if there were no |
4017 |
characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR |
characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR |
4018 |
operate on single-bytes only. This is an historical hangover. Maybe one day |
operate on single-bytes characters only. This is an historical hangover. |
4019 |
we can tidy these opcodes to handle multi-byte characters. |
Maybe one day we can tidy these opcodes to handle multi-byte characters. |
4020 |
|
|
4021 |
The optimization throws away the bit map. We turn the item into a |
The optimization throws away the bit map. We turn the item into a |
4022 |
1-character OP_CHAR[NC] if it's positive, or OP_NOT if it's negative. Note |
1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative. |
4023 |
that OP_NOT does not support multibyte characters. In the positive case, it |
Note that OP_NOT[I] does not support multibyte characters. In the positive |
4024 |
can cause firstbyte to be set. Otherwise, there can be no first char if |
case, it can cause firstbyte to be set. Otherwise, there can be no first |
4025 |
this item is first, whatever repeat count may follow. In the case of |
char if this item is first, whatever repeat count may follow. In the case |
4026 |
reqbyte, save the previous value for reinstating. */ |
of reqbyte, save the previous value for reinstating. */ |
4027 |
|
|
4028 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
4029 |
if (class_charcount == 1 && !class_utf8 && |
if (class_charcount == 1 && !class_utf8 && |
4034 |
{ |
{ |
4035 |
zeroreqbyte = reqbyte; |
zeroreqbyte = reqbyte; |
4036 |
|
|
4037 |
/* The OP_NOT opcode works on one-byte characters only. */ |
/* The OP_NOT[I] opcodes work on one-byte characters only. */ |
4038 |
|
|
4039 |
if (negate_class) |
if (negate_class) |
4040 |
{ |
{ |
4041 |
if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; |
if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; |
4042 |
zerofirstbyte = firstbyte; |
zerofirstbyte = firstbyte; |
4043 |
*code++ = OP_NOT; |
*code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT; |
4044 |
*code++ = class_lastchar; |
*code++ = class_lastchar; |
4045 |
break; |
break; |
4046 |
} |
} |
4198 |
the first thing in a branch because the x will have gone into firstbyte |
the first thing in a branch because the x will have gone into firstbyte |
4199 |
instead. */ |
instead. */ |
4200 |
|
|
4201 |
if (*previous == OP_CHAR || *previous == OP_CHARNC) |
if (*previous == OP_CHAR || *previous == OP_CHARI) |
4202 |
{ |
{ |
4203 |
|
op_type = (*previous == OP_CHAR)? 0 : OP_STARI - OP_STAR; |
4204 |
|
|
4205 |
/* Deal with UTF-8 characters that take up more than one byte. It's |
/* Deal with UTF-8 characters that take up more than one byte. It's |
4206 |
easier to write this out separately than try to macrify it. Use c to |
easier to write this out separately than try to macrify it. Use c to |
4207 |
hold the length of the character in bytes, plus 0x80 to flag that it's a |
hold the length of the character in bytes, plus 0x80 to flag that it's a |
4246 |
/* If previous was a single negated character ([^a] or similar), we use |
/* If previous was a single negated character ([^a] or similar), we use |
4247 |
one of the special opcodes, replacing it. The code is shared with single- |
one of the special opcodes, replacing it. The code is shared with single- |
4248 |
character repeats by setting opt_type to add a suitable offset into |
character repeats by setting opt_type to add a suitable offset into |
4249 |
repeat_type. We can also test for auto-possessification. OP_NOT is |
repeat_type. We can also test for auto-possessification. OP_NOT and OP_NOTI |
4250 |
currently used only for single-byte chars. */ |
are currently used only for single-byte chars. */ |
4251 |
|
|
4252 |
else if (*previous == OP_NOT) |
else if (*previous == OP_NOT || *previous == OP_NOTI) |
4253 |
{ |
{ |
4254 |
op_type = OP_NOTSTAR - OP_STAR; /* Use "not" opcodes */ |
op_type = ((*previous == OP_NOT)? OP_NOTSTAR : OP_NOTSTARI) - OP_STAR; |
4255 |
c = previous[1]; |
c = previous[1]; |
4256 |
if (!possessive_quantifier && |
if (!possessive_quantifier && |
4257 |
repeat_max < 0 && |
repeat_max < 0 && |
4448 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
4449 |
*previous == OP_XCLASS || |
*previous == OP_XCLASS || |
4450 |
#endif |
#endif |
4451 |
*previous == OP_REF) |
*previous == OP_REF || |
4452 |
|
*previous == OP_REFI) |
4453 |
{ |
{ |
4454 |
if (repeat_max == 0) |
if (repeat_max == 0) |
4455 |
{ |
{ |
4503 |
|
|
4504 |
/* If the maximum repeat count is unlimited, find the end of the bracket |
/* If the maximum repeat count is unlimited, find the end of the bracket |
4505 |
by scanning through from the start, and compute the offset back to it |
by scanning through from the start, and compute the offset back to it |
4506 |
from the current code pointer. There may be an OP_OPT setting following |
from the current code pointer. */ |
|
the final KET, so we can't find the end just by going back from the code |
|
|
pointer. */ |
|
4507 |
|
|
4508 |
if (repeat_max == -1) |
if (repeat_max == -1) |
4509 |
{ |
{ |
4803 |
case OP_QUERY: *tempcode = OP_POSQUERY; break; |
case OP_QUERY: *tempcode = OP_POSQUERY; break; |
4804 |
case OP_UPTO: *tempcode = OP_POSUPTO; break; |
case OP_UPTO: *tempcode = OP_POSUPTO; break; |
4805 |
|
|
4806 |
case OP_TYPESTAR: *tempcode = OP_TYPEPOSSTAR; break; |
case OP_STARI: *tempcode = OP_POSSTARI; break; |
4807 |
case OP_TYPEPLUS: *tempcode = OP_TYPEPOSPLUS; break; |
case OP_PLUSI: *tempcode = OP_POSPLUSI; break; |
4808 |
case OP_TYPEQUERY: *tempcode = OP_TYPEPOSQUERY; break; |
case OP_QUERYI: *tempcode = OP_POSQUERYI; break; |
4809 |
case OP_TYPEUPTO: *tempcode = OP_TYPEPOSUPTO; break; |
case OP_UPTOI: *tempcode = OP_POSUPTOI; break; |
4810 |
|
|
4811 |
case OP_NOTSTAR: *tempcode = OP_NOTPOSSTAR; break; |
case OP_NOTSTAR: *tempcode = OP_NOTPOSSTAR; break; |
4812 |
case OP_NOTPLUS: *tempcode = OP_NOTPOSPLUS; break; |
case OP_NOTPLUS: *tempcode = OP_NOTPOSPLUS; break; |
4813 |
case OP_NOTQUERY: *tempcode = OP_NOTPOSQUERY; break; |
case OP_NOTQUERY: *tempcode = OP_NOTPOSQUERY; break; |
4814 |
case OP_NOTUPTO: *tempcode = OP_NOTPOSUPTO; break; |
case OP_NOTUPTO: *tempcode = OP_NOTPOSUPTO; break; |
4815 |
|
|
4816 |
|
case OP_NOTSTARI: *tempcode = OP_NOTPOSSTARI; break; |
4817 |
|
case OP_NOTPLUSI: *tempcode = OP_NOTPOSPLUSI; break; |
4818 |
|
case OP_NOTQUERYI: *tempcode = OP_NOTPOSQUERYI; break; |
4819 |
|
case OP_NOTUPTOI: *tempcode = OP_NOTPOSUPTOI; break; |
4820 |
|
|
4821 |
|
case OP_TYPESTAR: *tempcode = OP_TYPEPOSSTAR; break; |
4822 |
|
case OP_TYPEPLUS: *tempcode = OP_TYPEPOSPLUS; break; |
4823 |
|
case OP_TYPEQUERY: *tempcode = OP_TYPEPOSQUERY; break; |
4824 |
|
case OP_TYPEUPTO: *tempcode = OP_TYPEPOSUPTO; break; |
4825 |
|
|
4826 |
/* Because we are moving code along, we must ensure that any |
/* Because we are moving code along, we must ensure that any |
4827 |
pending recursive references are updated. */ |
pending recursive references are updated. */ |
4828 |
|
|
5727 |
} |
} |
5728 |
else |
else |
5729 |
{ |
{ |
|
if ((options & PCRE_IMS) != (newoptions & PCRE_IMS)) |
|
|
{ |
|
|
*code++ = OP_OPT; |
|
|
*code++ = newoptions & PCRE_IMS; |
|
|
} |
|
5730 |
greedy_default = ((newoptions & PCRE_UNGREEDY) != 0); |
greedy_default = ((newoptions & PCRE_UNGREEDY) != 0); |
5731 |
greedy_non_default = greedy_default ^ 1; |
greedy_non_default = greedy_default ^ 1; |
5732 |
req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS : 0; |
req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS : 0; |
6064 |
HANDLE_REFERENCE: /* Come here from named backref handling */ |
HANDLE_REFERENCE: /* Come here from named backref handling */ |
6065 |
if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; |
if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; |
6066 |
previous = code; |
previous = code; |
6067 |
*code++ = OP_REF; |
*code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF; |
6068 |
PUT2INC(code, 0, recno); |
PUT2INC(code, 0, recno); |
6069 |
cd->backref_map |= (recno < 32)? (1 << recno) : 1; |
cd->backref_map |= (recno < 32)? (1 << recno) : 1; |
6070 |
if (recno > cd->top_backref) cd->top_backref = recno; |
if (recno > cd->top_backref) cd->top_backref = recno; |
6172 |
|
|
6173 |
ONE_CHAR: |
ONE_CHAR: |
6174 |
previous = code; |
previous = code; |
6175 |
*code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARNC : OP_CHAR; |
*code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARI : OP_CHAR; |
6176 |
for (c = 0; c < mclength; c++) *code++ = mcbuffer[c]; |
for (c = 0; c < mclength; c++) *code++ = mcbuffer[c]; |
6177 |
|
|
6178 |
/* Remember if \r or \n were seen */ |
/* Remember if \r or \n were seen */ |
6236 |
/* On entry, ptr is pointing past the bracket character, but on return it |
/* On entry, ptr is pointing past the bracket character, but on return it |
6237 |
points to the closing bracket, or vertical bar, or end of string. The code |
points to the closing bracket, or vertical bar, or end of string. The code |
6238 |
variable is pointing at the byte into which the BRA operator has been stored. |
variable is pointing at the byte into which the BRA operator has been stored. |
|
If the ims options are changed at the start (for a (?ims: group) or during any |
|
|
branch, we need to insert an OP_OPT item at the start of every following branch |
|
|
to ensure they get set correctly at run time, and also pass the new options |
|
|
into every subsequent branch compile. |
|
|
|
|
6239 |
This function is used during the pre-compile phase when we are trying to find |
This function is used during the pre-compile phase when we are trying to find |
6240 |
out the amount of memory needed, as well as during the real compile phase. The |
out the amount of memory needed, as well as during the real compile phase. The |
6241 |
value of lengthptr distinguishes the two phases. |
value of lengthptr distinguishes the two phases. |
6327 |
|
|
6328 |
if (reset_bracount) cd->bracount = orig_bracount; |
if (reset_bracount) cd->bracount = orig_bracount; |
6329 |
|
|
|
/* Handle a change of ims options at the start of the branch */ |
|
|
|
|
|
if ((options & PCRE_IMS) != oldims) |
|
|
{ |
|
|
*code++ = OP_OPT; |
|
|
*code++ = options & PCRE_IMS; |
|
|
length += 2; |
|
|
} |
|
|
|
|
6330 |
/* Set up dummy OP_REVERSE if lookbehind assertion */ |
/* Set up dummy OP_REVERSE if lookbehind assertion */ |
6331 |
|
|
6332 |
if (lookbehind) |
if (lookbehind) |
6483 |
cd->open_caps = cd->open_caps->next; |
cd->open_caps = cd->open_caps->next; |
6484 |
} |
} |
6485 |
|
|
|
/* Reset options if needed. */ |
|
|
|
|
|
if ((options & PCRE_IMS) != oldims && *ptr == CHAR_RIGHT_PARENTHESIS) |
|
|
{ |
|
|
*code++ = OP_OPT; |
|
|
*code++ = oldims; |
|
|
length += 2; |
|
|
} |
|
|
|
|
6486 |
/* Retain the highest bracket number, in case resetting was used. */ |
/* Retain the highest bracket number, in case resetting was used. */ |
6487 |
|
|
6488 |
cd->bracount = max_bracount; |
cd->bracount = max_bracount; |
6542 |
/* Try to find out if this is an anchored regular expression. Consider each |
/* Try to find out if this is an anchored regular expression. Consider each |
6543 |
alternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket |
alternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket |
6544 |
all of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then |
all of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then |
6545 |
it's anchored. However, if this is a multiline pattern, then only OP_SOD |
it's anchored. However, if this is a multiline pattern, then only OP_SOD will |
6546 |
counts, since OP_CIRC can match in the middle. |
be found, because ^ generates OP_CIRCM in that mode. |
6547 |
|
|
6548 |
We can also consider a regex to be anchored if OP_SOM starts all its branches. |
We can also consider a regex to be anchored if OP_SOM starts all its branches. |
6549 |
This is the code for \G, which means "match at start of match position, taking |
This is the code for \G, which means "match at start of match position, taking |
6617 |
|
|
6618 |
/* Check for explicit anchoring */ |
/* Check for explicit anchoring */ |
6619 |
|
|
6620 |
else if (op != OP_SOD && op != OP_SOM && |
else if (op != OP_SOD && op != OP_SOM && op != OP_CIRC) return FALSE; |
|
((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC)) |
|
|
return FALSE; |
|
6621 |
code += GET(code, 1); |
code += GET(code, 1); |
6622 |
} |
} |
6623 |
while (*code == OP_ALT); /* Loop for each alternative */ |
while (*code == OP_ALT); /* Loop for each alternative */ |
6717 |
|
|
6718 |
/* Check for explicit circumflex */ |
/* Check for explicit circumflex */ |
6719 |
|
|
6720 |
else if (op != OP_CIRC) return FALSE; |
else if (op != OP_CIRC && op != OP_CIRCM) return FALSE; |
6721 |
|
|
6722 |
/* Move on to the next alternative */ |
/* Move on to the next alternative */ |
6723 |
|
|
6778 |
scode += 2; |
scode += 2; |
6779 |
|
|
6780 |
case OP_CHAR: |
case OP_CHAR: |
6781 |
case OP_CHARNC: |
case OP_CHARI: |
6782 |
case OP_PLUS: |
case OP_PLUS: |
6783 |
case OP_MINPLUS: |
case OP_MINPLUS: |
6784 |
case OP_POSPLUS: |
case OP_POSPLUS: |