1790 |
code += 2; |
code += 2; |
1791 |
break; |
break; |
1792 |
|
|
1793 |
|
case OP_WORD_BOUNDARY: |
1794 |
|
case OP_NOT_WORD_BOUNDARY: |
1795 |
|
code++; |
1796 |
|
break; |
1797 |
|
|
1798 |
case OP_ASSERT_NOT: |
case OP_ASSERT_NOT: |
1799 |
case OP_ASSERTBACK: |
case OP_ASSERTBACK: |
1800 |
case OP_ASSERTBACK_NOT: |
case OP_ASSERTBACK_NOT: |
1822 |
it's anchored. However, if this is a multiline pattern, then only OP_SOD |
it's anchored. However, if this is a multiline pattern, then only OP_SOD |
1823 |
counts, since OP_CIRC can match in the middle. |
counts, since OP_CIRC can match in the middle. |
1824 |
|
|
1825 |
A branch is also implicitly anchored if it starts with .* because that will try |
A branch is also implicitly anchored if it starts with .* and DOTALL is set, |
1826 |
the rest of the pattern at all possible matching points, so there is no point |
because that will try the rest of the pattern at all possible matching points, |
1827 |
trying them again. |
so there is no point trying them again. |
1828 |
|
|
1829 |
Arguments: |
Arguments: |
1830 |
code points to start of expression (the bracket) |
code points to start of expression (the bracket) |
1842 |
register int op = *scode; |
register int op = *scode; |
1843 |
if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND) |
if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND) |
1844 |
{ if (!is_anchored(scode, options)) return FALSE; } |
{ if (!is_anchored(scode, options)) return FALSE; } |
1845 |
else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR) |
else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR) && |
1846 |
|
(*options & PCRE_DOTALL) != 0) |
1847 |
{ if (scode[1] != OP_ANY) return FALSE; } |
{ if (scode[1] != OP_ANY) return FALSE; } |
1848 |
else if (op != OP_SOD && |
else if (op != OP_SOD && |
1849 |
((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC)) |
((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC)) |
1857 |
|
|
1858 |
|
|
1859 |
/************************************************* |
/************************************************* |
1860 |
* Check for start with \n line expression * |
* Check for starting with ^ or .* * |
1861 |
*************************************************/ |
*************************************************/ |
1862 |
|
|
1863 |
/* This is called for multiline expressions to try to find out if every branch |
/* This is called to find out if every branch starts with ^ or .* so that |
1864 |
starts with ^ so that "first char" processing can be done to speed things up. |
"first char" processing can be done to speed things up in multiline |
1865 |
|
matching and for non-DOTALL patterns that start with .* (which must start at |
1866 |
|
the beginning or after \n). |
1867 |
|
|
1868 |
Argument: points to start of expression (the bracket) |
Argument: points to start of expression (the bracket) |
1869 |
Returns: TRUE or FALSE |
Returns: TRUE or FALSE |
1877 |
register int op = *scode; |
register int op = *scode; |
1878 |
if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND) |
if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND) |
1879 |
{ if (!is_startline(scode)) return FALSE; } |
{ if (!is_startline(scode)) return FALSE; } |
1880 |
|
else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR) |
1881 |
|
{ if (scode[1] != OP_ANY) return FALSE; } |
1882 |
else if (op != OP_CIRC) return FALSE; |
else if (op != OP_CIRC) return FALSE; |
1883 |
code += (code[1] << 8) + code[2]; |
code += (code[1] << 8) + code[2]; |
1884 |
} |
} |
2556 |
return NULL; |
return NULL; |
2557 |
} |
} |
2558 |
|
|
2559 |
/* If the anchored option was not passed, set flag if we can determine that it |
/* If the anchored option was not passed, set flag if we can determine that the |
2560 |
is anchored by virtue of ^ characters or \A or anything else. Otherwise, see if |
pattern is anchored by virtue of ^ characters or \A or anything else (such as |
2561 |
we can determine what the first character has to be, because that speeds up |
starting with .* when DOTALL is set). |
2562 |
unanchored matches no end. In the case of multiline matches, an alternative is |
|
2563 |
to set the PCRE_STARTLINE flag if all branches start with ^. */ |
Otherwise, see if we can determine what the first character has to be, because |
2564 |
|
that speeds up unanchored matches no end. If not, see if we can set the |
2565 |
|
PCRE_STARTLINE flag. This is helpful for multiline matches when all branches |
2566 |
|
start with ^. and also when all branches start with .* for non-DOTALL matches. |
2567 |
|
*/ |
2568 |
|
|
2569 |
if ((options & PCRE_ANCHORED) == 0) |
if ((options & PCRE_ANCHORED) == 0) |
2570 |
{ |
{ |
4118 |
external_extra points to "hints" from pcre_study() or is NULL |
external_extra points to "hints" from pcre_study() or is NULL |
4119 |
subject points to the subject string |
subject points to the subject string |
4120 |
length length of subject string (may contain binary zeros) |
length length of subject string (may contain binary zeros) |
4121 |
|
start_offset where to start in the subject string |
4122 |
options option bits |
options option bits |
4123 |
offsets points to a vector of ints to be filled in with offsets |
offsets points to a vector of ints to be filled in with offsets |
4124 |
offsetcount the number of elements in the vector |
offsetcount the number of elements in the vector |
4131 |
|
|
4132 |
int |
int |
4133 |
pcre_exec(const pcre *external_re, const pcre_extra *external_extra, |
pcre_exec(const pcre *external_re, const pcre_extra *external_extra, |
4134 |
const char *subject, int length, int options, int *offsets, int offsetcount) |
const char *subject, int length, int start_offset, int options, int *offsets, |
4135 |
|
int offsetcount) |
4136 |
{ |
{ |
4137 |
int resetcount, ocount; |
int resetcount, ocount; |
4138 |
int first_char = -1; |
int first_char = -1; |
4139 |
int ims = 0; |
int ims = 0; |
4140 |
match_data match_block; |
match_data match_block; |
4141 |
const uschar *start_bits = NULL; |
const uschar *start_bits = NULL; |
4142 |
const uschar *start_match = (const uschar *)subject; |
const uschar *start_match = (const uschar *)subject + start_offset; |
4143 |
const uschar *end_subject; |
const uschar *end_subject; |
4144 |
const real_pcre *re = (const real_pcre *)external_re; |
const real_pcre *re = (const real_pcre *)external_re; |
4145 |
const real_pcre_extra *extra = (const real_pcre_extra *)external_extra; |
const real_pcre_extra *extra = (const real_pcre_extra *)external_extra; |
4231 |
start_bits = extra->start_bits; |
start_bits = extra->start_bits; |
4232 |
} |
} |
4233 |
|
|
4234 |
/* Loop for unanchored matches; for anchored regexps the loop runs just once. */ |
/* Loop for unanchored matches; for anchored regexs the loop runs just once. */ |
4235 |
|
|
4236 |
do |
do |
4237 |
{ |
{ |