25 |
|
|
26 |
3. Altered versions must be plainly marked as such, and must not be |
3. Altered versions must be plainly marked as such, and must not be |
27 |
misrepresented as being the original software. |
misrepresented as being the original software. |
28 |
|
|
29 |
|
4. If PCRE is embedded in any software that is released under the GNU |
30 |
|
General Purpose Licence (GPL), then the terms of that licence shall |
31 |
|
supersede any condition above with which it is incompatible. |
32 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
33 |
*/ |
*/ |
34 |
|
|
1091 |
else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE || |
else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE || |
1092 |
(int)*previous == OP_COND) |
(int)*previous == OP_COND) |
1093 |
{ |
{ |
1094 |
int i, ketoffset = 0; |
register int i; |
1095 |
|
int ketoffset = 0; |
1096 |
int len = code - previous; |
int len = code - previous; |
1097 |
|
uschar *bralink = NULL; |
1098 |
|
|
1099 |
/* If the maximum repeat count is unlimited, find the end of the bracket |
/* If the maximum repeat count is unlimited, find the end of the bracket |
1100 |
by scanning through from the start, and compute the offset back to it |
by scanning through from the start, and compute the offset back to it |
1109 |
ketoffset = code - ket; |
ketoffset = code - ket; |
1110 |
} |
} |
1111 |
|
|
1112 |
|
/* The case of a zero minimum is special because of the need to stick |
1113 |
|
OP_BRAZERO in front of it, and because the group appears once in the |
1114 |
|
data, whereas in other cases it appears the minimum number of times. For |
1115 |
|
this reason, it is simplest to treat this case separately, as otherwise |
1116 |
|
the code gets far too mess. There are several special subcases when the |
1117 |
|
minimum is zero. */ |
1118 |
|
|
1119 |
|
if (repeat_min == 0) |
1120 |
|
{ |
1121 |
|
/* If the maximum is also zero, we just omit the group from the output |
1122 |
|
altogether. */ |
1123 |
|
|
1124 |
|
if (repeat_max == 0) |
1125 |
|
{ |
1126 |
|
code = previous; |
1127 |
|
previous = NULL; |
1128 |
|
break; |
1129 |
|
} |
1130 |
|
|
1131 |
|
/* If the maximum is 1 or unlimited, we just have to stick in the |
1132 |
|
BRAZERO and do no more at this point. */ |
1133 |
|
|
1134 |
|
if (repeat_max <= 1) |
1135 |
|
{ |
1136 |
|
memmove(previous+1, previous, len); |
1137 |
|
code++; |
1138 |
|
*previous++ = OP_BRAZERO + repeat_type; |
1139 |
|
} |
1140 |
|
|
1141 |
|
/* If the maximum is greater than 1 and limited, we have to replicate |
1142 |
|
in a nested fashion, sticking OP_BRAZERO before each set of brackets. |
1143 |
|
The first one has to be handled carefully because it's the original |
1144 |
|
copy, which has to be moved up. The remainder can be handled by code |
1145 |
|
that is common with the non-zero minimum case below. We just have to |
1146 |
|
adjust the value or repeat_max, since one less copy is required. */ |
1147 |
|
|
1148 |
|
else |
1149 |
|
{ |
1150 |
|
int offset; |
1151 |
|
memmove(previous+4, previous, len); |
1152 |
|
code += 4; |
1153 |
|
*previous++ = OP_BRAZERO + repeat_type; |
1154 |
|
*previous++ = OP_BRA; |
1155 |
|
|
1156 |
|
/* We chain together the bracket offset fields that have to be |
1157 |
|
filled in later when the ends of the brackets are reached. */ |
1158 |
|
|
1159 |
|
offset = (bralink == NULL)? 0 : previous - bralink; |
1160 |
|
bralink = previous; |
1161 |
|
*previous++ = offset >> 8; |
1162 |
|
*previous++ = offset & 255; |
1163 |
|
} |
1164 |
|
|
1165 |
|
repeat_max--; |
1166 |
|
} |
1167 |
|
|
1168 |
|
/* If the minimum is greater than zero, replicate the group as many |
1169 |
|
times as necessary, and adjust the maximum to the number of subsequent |
1170 |
|
copies that we need. */ |
1171 |
|
|
1172 |
|
else |
1173 |
|
{ |
1174 |
|
for (i = 1; i < repeat_min; i++) |
1175 |
|
{ |
1176 |
|
memcpy(code, previous, len); |
1177 |
|
code += len; |
1178 |
|
} |
1179 |
|
if (repeat_max > 0) repeat_max -= repeat_min; |
1180 |
|
} |
1181 |
|
|
1182 |
|
/* This code is common to both the zero and non-zero minimum cases. If |
1183 |
|
the maximum is limited, it replicates the group in a nested fashion, |
1184 |
|
remembering the bracket starts on a stack. In the case of a zero minimum, |
1185 |
|
the first one was set up above. In all cases the repeat_max now specifies |
1186 |
|
the number of additional copies needed. */ |
1187 |
|
|
1188 |
|
if (repeat_max >= 0) |
1189 |
|
{ |
1190 |
|
for (i = repeat_max - 1; i >= 0; i--) |
1191 |
|
{ |
1192 |
|
*code++ = OP_BRAZERO + repeat_type; |
1193 |
|
|
1194 |
|
/* All but the final copy start a new nesting, maintaining the |
1195 |
|
chain of brackets outstanding. */ |
1196 |
|
|
1197 |
|
if (i != 0) |
1198 |
|
{ |
1199 |
|
int offset; |
1200 |
|
*code++ = OP_BRA; |
1201 |
|
offset = (bralink == NULL)? 0 : code - bralink; |
1202 |
|
bralink = code; |
1203 |
|
*code++ = offset >> 8; |
1204 |
|
*code++ = offset & 255; |
1205 |
|
} |
1206 |
|
|
1207 |
|
memcpy(code, previous, len); |
1208 |
|
code += len; |
1209 |
|
} |
1210 |
|
|
1211 |
|
/* Now chain through the pending brackets, and fill in their length |
1212 |
|
fields (which are holding the chain links pro tem). */ |
1213 |
|
|
1214 |
|
while (bralink != NULL) |
1215 |
|
{ |
1216 |
|
int oldlinkoffset; |
1217 |
|
int offset = code - bralink + 1; |
1218 |
|
uschar *bra = code - offset; |
1219 |
|
oldlinkoffset = (bra[1] << 8) + bra[2]; |
1220 |
|
bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset; |
1221 |
|
*code++ = OP_KET; |
1222 |
|
*code++ = bra[1] = offset >> 8; |
1223 |
|
*code++ = bra[2] = (offset & 255); |
1224 |
|
} |
1225 |
|
} |
1226 |
|
|
1227 |
|
/* If the maximum is unlimited, set a repeater in the final copy. We |
1228 |
|
can't just offset backwards from the current code point, because we |
1229 |
|
don't know if there's been an options resetting after the ket. The |
1230 |
|
correct offset was computed above. */ |
1231 |
|
|
1232 |
|
else code[-ketoffset] = OP_KETRMAX + repeat_type; |
1233 |
|
|
1234 |
|
|
1235 |
|
#ifdef NEVER |
1236 |
/* If the minimum is greater than zero, and the maximum is unlimited or |
/* If the minimum is greater than zero, and the maximum is unlimited or |
1237 |
equal to the minimum, the first copy remains where it is, and is |
equal to the minimum, the first copy remains where it is, and is |
1238 |
replicated up to the minimum number of times. This case includes the + |
replicated up to the minimum number of times. This case includes the + |
1280 |
correct offset was computed above. */ |
correct offset was computed above. */ |
1281 |
|
|
1282 |
if (repeat_max == -1) code[-ketoffset] = OP_KETRMAX + repeat_type; |
if (repeat_max == -1) code[-ketoffset] = OP_KETRMAX + repeat_type; |
1283 |
|
#endif |
1284 |
|
|
1285 |
|
|
1286 |
} |
} |
1287 |
|
|
1288 |
/* Else there's some kind of shambles */ |
/* Else there's some kind of shambles */ |
1790 |
code += 2; |
code += 2; |
1791 |
break; |
break; |
1792 |
|
|
1793 |
|
case OP_WORD_BOUNDARY: |
1794 |
|
case OP_NOT_WORD_BOUNDARY: |
1795 |
|
code++; |
1796 |
|
break; |
1797 |
|
|
1798 |
case OP_ASSERT_NOT: |
case OP_ASSERT_NOT: |
1799 |
case OP_ASSERTBACK: |
case OP_ASSERTBACK: |
1800 |
case OP_ASSERTBACK_NOT: |
case OP_ASSERTBACK_NOT: |
1822 |
it's anchored. However, if this is a multiline pattern, then only OP_SOD |
it's anchored. However, if this is a multiline pattern, then only OP_SOD |
1823 |
counts, since OP_CIRC can match in the middle. |
counts, since OP_CIRC can match in the middle. |
1824 |
|
|
1825 |
A branch is also implicitly anchored if it starts with .* because that will try |
A branch is also implicitly anchored if it starts with .* and DOTALL is set, |
1826 |
the rest of the pattern at all possible matching points, so there is no point |
because that will try the rest of the pattern at all possible matching points, |
1827 |
trying them again. |
so there is no point trying them again. |
1828 |
|
|
1829 |
Arguments: |
Arguments: |
1830 |
code points to start of expression (the bracket) |
code points to start of expression (the bracket) |
1842 |
register int op = *scode; |
register int op = *scode; |
1843 |
if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND) |
if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND) |
1844 |
{ if (!is_anchored(scode, options)) return FALSE; } |
{ if (!is_anchored(scode, options)) return FALSE; } |
1845 |
else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR) |
else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR) && |
1846 |
|
(*options & PCRE_DOTALL) != 0) |
1847 |
{ if (scode[1] != OP_ANY) return FALSE; } |
{ if (scode[1] != OP_ANY) return FALSE; } |
1848 |
else if (op != OP_SOD && |
else if (op != OP_SOD && |
1849 |
((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC)) |
((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC)) |
1857 |
|
|
1858 |
|
|
1859 |
/************************************************* |
/************************************************* |
1860 |
* Check for start with \n line expression * |
* Check for starting with ^ or .* * |
1861 |
*************************************************/ |
*************************************************/ |
1862 |
|
|
1863 |
/* This is called for multiline expressions to try to find out if every branch |
/* This is called to find out if every branch starts with ^ or .* so that |
1864 |
starts with ^ so that "first char" processing can be done to speed things up. |
"first char" processing can be done to speed things up in multiline |
1865 |
|
matching and for non-DOTALL patterns that start with .* (which must start at |
1866 |
|
the beginning or after \n). |
1867 |
|
|
1868 |
Argument: points to start of expression (the bracket) |
Argument: points to start of expression (the bracket) |
1869 |
Returns: TRUE or FALSE |
Returns: TRUE or FALSE |
1877 |
register int op = *scode; |
register int op = *scode; |
1878 |
if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND) |
if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND) |
1879 |
{ if (!is_startline(scode)) return FALSE; } |
{ if (!is_startline(scode)) return FALSE; } |
1880 |
|
else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR) |
1881 |
|
{ if (scode[1] != OP_ANY) return FALSE; } |
1882 |
else if (op != OP_CIRC) return FALSE; |
else if (op != OP_CIRC) return FALSE; |
1883 |
code += (code[1] << 8) + code[2]; |
code += (code[1] << 8) + code[2]; |
1884 |
} |
} |
2411 |
else if (c == '+') { maxval = -1; ptr++; } |
else if (c == '+') { maxval = -1; ptr++; } |
2412 |
else if (c == '?') { minval = 0; ptr++; } |
else if (c == '?') { minval = 0; ptr++; } |
2413 |
|
|
2414 |
/* If there is a minimum > 1 we have to replicate up to minval-1 times; |
/* If the minimum is zero, we have to allow for an OP_BRAZERO before the |
2415 |
if there is a limited maximum we have to replicate up to maxval-1 times |
group, and if the maximum is greater than zero, we have to replicate |
2416 |
and allow for a BRAZERO item before each optional copy, as we also have |
maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting |
2417 |
to do before the first copy if the minimum is zero. */ |
bracket set - hence the 7. */ |
2418 |
|
|
2419 |
if (minval == 0) length++; |
if (minval == 0) |
2420 |
else if (minval > 1) length += (minval - 1) * duplength; |
{ |
2421 |
if (maxval > minval) length += (maxval - minval) * (duplength + 1); |
length++; |
2422 |
|
if (maxval > 0) length += (maxval - 1) * (duplength + 7); |
2423 |
|
} |
2424 |
|
|
2425 |
|
/* When the minimum is greater than zero, 1 we have to replicate up to |
2426 |
|
minval-1 times, with no additions required in the copies. Then, if |
2427 |
|
there is a limited maximum we have to replicate up to maxval-1 times |
2428 |
|
allowing for a BRAZERO item before each optional copy and nesting |
2429 |
|
brackets for all but one of the optional copies. */ |
2430 |
|
|
2431 |
|
else |
2432 |
|
{ |
2433 |
|
length += (minval - 1) * duplength; |
2434 |
|
if (maxval > minval) /* Need this test as maxval=-1 means no limit */ |
2435 |
|
length += (maxval - minval) * (duplength + 7) - 6; |
2436 |
|
} |
2437 |
} |
} |
2438 |
continue; |
continue; |
2439 |
|
|
2556 |
return NULL; |
return NULL; |
2557 |
} |
} |
2558 |
|
|
2559 |
/* If the anchored option was not passed, set flag if we can determine that it |
/* If the anchored option was not passed, set flag if we can determine that the |
2560 |
is anchored by virtue of ^ characters or \A or anything else. Otherwise, see if |
pattern is anchored by virtue of ^ characters or \A or anything else (such as |
2561 |
we can determine what the first character has to be, because that speeds up |
starting with .* when DOTALL is set). |
2562 |
unanchored matches no end. In the case of multiline matches, an alternative is |
|
2563 |
to set the PCRE_STARTLINE flag if all branches start with ^. */ |
Otherwise, see if we can determine what the first character has to be, because |
2564 |
|
that speeds up unanchored matches no end. If not, see if we can set the |
2565 |
|
PCRE_STARTLINE flag. This is helpful for multiline matches when all branches |
2566 |
|
start with ^. and also when all branches start with .* for non-DOTALL matches. |
2567 |
|
*/ |
2568 |
|
|
2569 |
if ((options & PCRE_ANCHORED) == 0) |
if ((options & PCRE_ANCHORED) == 0) |
2570 |
{ |
{ |
2933 |
int number = op - OP_BRA; |
int number = op - OP_BRA; |
2934 |
int offset = number << 1; |
int offset = number << 1; |
2935 |
|
|
2936 |
DPRINTF(("start bracket %d\n", number)); |
#ifdef DEBUG |
2937 |
|
printf("start bracket %d subject=", number); |
2938 |
|
pchars(eptr, 16, TRUE, md); |
2939 |
|
printf("\n"); |
2940 |
|
#endif |
2941 |
|
|
2942 |
if (offset < md->offset_max) |
if (offset < md->offset_max) |
2943 |
{ |
{ |
4118 |
external_extra points to "hints" from pcre_study() or is NULL |
external_extra points to "hints" from pcre_study() or is NULL |
4119 |
subject points to the subject string |
subject points to the subject string |
4120 |
length length of subject string (may contain binary zeros) |
length length of subject string (may contain binary zeros) |
4121 |
|
start_offset where to start in the subject string |
4122 |
options option bits |
options option bits |
4123 |
offsets points to a vector of ints to be filled in with offsets |
offsets points to a vector of ints to be filled in with offsets |
4124 |
offsetcount the number of elements in the vector |
offsetcount the number of elements in the vector |
4131 |
|
|
4132 |
int |
int |
4133 |
pcre_exec(const pcre *external_re, const pcre_extra *external_extra, |
pcre_exec(const pcre *external_re, const pcre_extra *external_extra, |
4134 |
const char *subject, int length, int options, int *offsets, int offsetcount) |
const char *subject, int length, int start_offset, int options, int *offsets, |
4135 |
|
int offsetcount) |
4136 |
{ |
{ |
4137 |
int resetcount, ocount; |
int resetcount, ocount; |
4138 |
int first_char = -1; |
int first_char = -1; |
4139 |
int ims = 0; |
int ims = 0; |
4140 |
match_data match_block; |
match_data match_block; |
4141 |
const uschar *start_bits = NULL; |
const uschar *start_bits = NULL; |
4142 |
const uschar *start_match = (const uschar *)subject; |
const uschar *start_match = (const uschar *)subject + start_offset; |
4143 |
const uschar *end_subject; |
const uschar *end_subject; |
4144 |
const real_pcre *re = (const real_pcre *)external_re; |
const real_pcre *re = (const real_pcre *)external_re; |
4145 |
const real_pcre_extra *extra = (const real_pcre_extra *)external_extra; |
const real_pcre_extra *extra = (const real_pcre_extra *)external_extra; |
4231 |
start_bits = extra->start_bits; |
start_bits = extra->start_bits; |
4232 |
} |
} |
4233 |
|
|
4234 |
/* Loop for unanchored matches; for anchored regexps the loop runs just once. */ |
/* Loop for unanchored matches; for anchored regexs the loop runs just once. */ |
4235 |
|
|
4236 |
do |
do |
4237 |
{ |
{ |