9 |
|
|
10 |
Written by: Philip Hazel <ph10@cam.ac.uk> |
Written by: Philip Hazel <ph10@cam.ac.uk> |
11 |
|
|
12 |
Copyright (c) 1998 University of Cambridge |
Copyright (c) 1997-1999 University of Cambridge |
13 |
|
|
14 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
15 |
Permission is granted to anyone to use this software for any purpose on any |
Permission is granted to anyone to use this software for any purpose on any |
25 |
|
|
26 |
3. Altered versions must be plainly marked as such, and must not be |
3. Altered versions must be plainly marked as such, and must not be |
27 |
misrepresented as being the original software. |
misrepresented as being the original software. |
28 |
|
|
29 |
|
4. If PCRE is embedded in any software that is released under the GNU |
30 |
|
General Purpose Licence (GPL), then the terms of that licence shall |
31 |
|
supersede any condition above with which it is incompatible. |
32 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
33 |
*/ |
*/ |
34 |
|
|
111 |
|
|
112 |
static BOOL |
static BOOL |
113 |
compile_regex(int, int, int *, uschar **, const uschar **, const char **, |
compile_regex(int, int, int *, uschar **, const uschar **, const char **, |
114 |
BOOL, int); |
BOOL, int, compile_data *); |
|
|
|
|
/* Structure for passing "static" information around between the functions |
|
|
doing the matching, so that they are thread-safe. */ |
|
|
|
|
|
typedef struct match_data { |
|
|
int errorcode; /* As it says */ |
|
|
int *offset_vector; /* Offset vector */ |
|
|
int offset_end; /* One past the end */ |
|
|
int offset_max; /* The maximum usable for return data */ |
|
|
BOOL offset_overflow; /* Set if too many extractions */ |
|
|
BOOL notbol; /* NOTBOL flag */ |
|
|
BOOL noteol; /* NOTEOL flag */ |
|
|
BOOL endonly; /* Dollar not before final \n */ |
|
|
const uschar *start_subject; /* Start of the subject string */ |
|
|
const uschar *end_subject; /* End of the subject string */ |
|
|
const uschar *end_match_ptr; /* Subject position at end match */ |
|
|
int end_offset_top; /* Highwater mark at end of match */ |
|
|
} match_data; |
|
115 |
|
|
116 |
|
|
117 |
|
|
131 |
|
|
132 |
|
|
133 |
/************************************************* |
/************************************************* |
134 |
|
* Default character tables * |
135 |
|
*************************************************/ |
136 |
|
|
137 |
|
/* A default set of character tables is included in the PCRE binary. Its source |
138 |
|
is built by the maketables auxiliary program, which uses the default C ctypes |
139 |
|
functions, and put in the file chartables.c. These tables are used by PCRE |
140 |
|
whenever the caller of pcre_compile() does not provide an alternate set of |
141 |
|
tables. */ |
142 |
|
|
143 |
|
#include "chartables.c" |
144 |
|
|
145 |
|
|
146 |
|
|
147 |
|
/************************************************* |
148 |
* Return version string * |
* Return version string * |
149 |
*************************************************/ |
*************************************************/ |
150 |
|
|
237 |
bracount number of previous extracting brackets |
bracount number of previous extracting brackets |
238 |
options the options bits |
options the options bits |
239 |
isclass TRUE if inside a character class |
isclass TRUE if inside a character class |
240 |
|
cd pointer to char tables block |
241 |
|
|
242 |
Returns: zero or positive => a data character |
Returns: zero or positive => a data character |
243 |
negative => a special escape sequence |
negative => a special escape sequence |
246 |
|
|
247 |
static int |
static int |
248 |
check_escape(const uschar **ptrptr, const char **errorptr, int bracount, |
check_escape(const uschar **ptrptr, const char **errorptr, int bracount, |
249 |
int options, BOOL isclass) |
int options, BOOL isclass, compile_data *cd) |
250 |
{ |
{ |
251 |
const uschar *ptr = *ptrptr; |
const uschar *ptr = *ptrptr; |
252 |
int c = *(++ptr) & 255; /* Ensure > 0 on signed-char systems */ |
int c = *(++ptr) & 255; /* Ensure > 0 on signed-char systems */ |
289 |
{ |
{ |
290 |
oldptr = ptr; |
oldptr = ptr; |
291 |
c -= '0'; |
c -= '0'; |
292 |
while ((pcre_ctypes[ptr[1]] & ctype_digit) != 0) |
while ((cd->ctypes[ptr[1]] & ctype_digit) != 0) |
293 |
c = c * 10 + *(++ptr) - '0'; |
c = c * 10 + *(++ptr) - '0'; |
294 |
if (c < 10 || c <= bracount) |
if (c < 10 || c <= bracount) |
295 |
{ |
{ |
315 |
|
|
316 |
case '0': |
case '0': |
317 |
c -= '0'; |
c -= '0'; |
318 |
while(i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_digit) != 0 && |
while(i++ < 2 && (cd->ctypes[ptr[1]] & ctype_digit) != 0 && |
319 |
ptr[1] != '8' && ptr[1] != '9') |
ptr[1] != '8' && ptr[1] != '9') |
320 |
c = c * 8 + *(++ptr) - '0'; |
c = c * 8 + *(++ptr) - '0'; |
321 |
break; |
break; |
324 |
|
|
325 |
case 'x': |
case 'x': |
326 |
c = 0; |
c = 0; |
327 |
while (i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_xdigit) != 0) |
while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_xdigit) != 0) |
328 |
{ |
{ |
329 |
ptr++; |
ptr++; |
330 |
c = c * 16 + pcre_lcc[*ptr] - |
c = c * 16 + cd->lcc[*ptr] - |
331 |
(((pcre_ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W'); |
(((cd->ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W'); |
332 |
} |
} |
333 |
break; |
break; |
334 |
|
|
342 |
|
|
343 |
/* A letter is upper-cased; then the 0x40 bit is flipped */ |
/* A letter is upper-cased; then the 0x40 bit is flipped */ |
344 |
|
|
345 |
if (c >= 'a' && c <= 'z') c = pcre_fcc[c]; |
if (c >= 'a' && c <= 'z') c = cd->fcc[c]; |
346 |
c ^= 0x40; |
c ^= 0x40; |
347 |
break; |
break; |
348 |
|
|
349 |
/* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any |
/* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any |
350 |
other alphameric following \ is an error if PCRE_EXTRA was set; otherwise, |
other alphameric following \ is an error if PCRE_EXTRA was set; otherwise, |
351 |
for Perl compatibility, it is a literal. */ |
for Perl compatibility, it is a literal. This code looks a bit odd, but |
352 |
|
there used to be some cases other than the default, and there may be again |
353 |
|
in future, so I haven't "optimized" it. */ |
354 |
|
|
355 |
default: |
default: |
356 |
if ((options & PCRE_EXTRA) != 0) switch(c) |
if ((options & PCRE_EXTRA) != 0) switch(c) |
380 |
|
|
381 |
Arguments: |
Arguments: |
382 |
p pointer to the first char after '{' |
p pointer to the first char after '{' |
383 |
|
cd pointer to char tables block |
384 |
|
|
385 |
Returns: TRUE or FALSE |
Returns: TRUE or FALSE |
386 |
*/ |
*/ |
387 |
|
|
388 |
static BOOL |
static BOOL |
389 |
is_counted_repeat(const uschar *p) |
is_counted_repeat(const uschar *p, compile_data *cd) |
390 |
{ |
{ |
391 |
if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE; |
if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE; |
392 |
while ((pcre_ctypes[*p] & ctype_digit) != 0) p++; |
while ((cd->ctypes[*p] & ctype_digit) != 0) p++; |
393 |
if (*p == '}') return TRUE; |
if (*p == '}') return TRUE; |
394 |
|
|
395 |
if (*p++ != ',') return FALSE; |
if (*p++ != ',') return FALSE; |
396 |
if (*p == '}') return TRUE; |
if (*p == '}') return TRUE; |
397 |
|
|
398 |
if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE; |
if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE; |
399 |
while ((pcre_ctypes[*p] & ctype_digit) != 0) p++; |
while ((cd->ctypes[*p] & ctype_digit) != 0) p++; |
400 |
return (*p == '}'); |
return (*p == '}'); |
401 |
} |
} |
402 |
|
|
416 |
maxp pointer to int for max |
maxp pointer to int for max |
417 |
returned as -1 if no max |
returned as -1 if no max |
418 |
errorptr points to pointer to error message |
errorptr points to pointer to error message |
419 |
|
cd pointer to character tables clock |
420 |
|
|
421 |
Returns: pointer to '}' on success; |
Returns: pointer to '}' on success; |
422 |
current ptr on error, with errorptr set |
current ptr on error, with errorptr set |
423 |
*/ |
*/ |
424 |
|
|
425 |
static const uschar * |
static const uschar * |
426 |
read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr) |
read_repeat_counts(const uschar *p, int *minp, int *maxp, |
427 |
|
const char **errorptr, compile_data *cd) |
428 |
{ |
{ |
429 |
int min = 0; |
int min = 0; |
430 |
int max = -1; |
int max = -1; |
431 |
|
|
432 |
while ((pcre_ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0'; |
while ((cd->ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0'; |
433 |
|
|
434 |
if (*p == '}') max = min; else |
if (*p == '}') max = min; else |
435 |
{ |
{ |
436 |
if (*(++p) != '}') |
if (*(++p) != '}') |
437 |
{ |
{ |
438 |
max = 0; |
max = 0; |
439 |
while((pcre_ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0'; |
while((cd->ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0'; |
440 |
if (max < min) |
if (max < min) |
441 |
{ |
{ |
442 |
*errorptr = ERR4; |
*errorptr = ERR4; |
621 |
/* Scan the pattern, compiling it into the code vector. |
/* Scan the pattern, compiling it into the code vector. |
622 |
|
|
623 |
Arguments: |
Arguments: |
624 |
options the option bits |
options the option bits |
625 |
brackets points to number of brackets used |
brackets points to number of brackets used |
626 |
code points to the pointer to the current code point |
code points to the pointer to the current code point |
627 |
ptrptr points to the current pattern pointer |
ptrptr points to the current pattern pointer |
628 |
errorptr points to pointer to error message |
errorptr points to pointer to error message |
629 |
optchanged set to the value of the last OP_OPT item compiled |
optchanged set to the value of the last OP_OPT item compiled |
630 |
|
cd contains pointers to tables |
631 |
|
|
632 |
Returns: TRUE on success |
Returns: TRUE on success |
633 |
FALSE, with *errorptr set on error |
FALSE, with *errorptr set on error |
634 |
*/ |
*/ |
635 |
|
|
636 |
static BOOL |
static BOOL |
637 |
compile_branch(int options, int *brackets, uschar **codeptr, |
compile_branch(int options, int *brackets, uschar **codeptr, |
638 |
const uschar **ptrptr, const char **errorptr, int *optchanged) |
const uschar **ptrptr, const char **errorptr, int *optchanged, |
639 |
|
compile_data *cd) |
640 |
{ |
{ |
641 |
int repeat_type, op_type; |
int repeat_type, op_type; |
642 |
int repeat_min, repeat_max; |
int repeat_min, repeat_max; |
668 |
c = *ptr; |
c = *ptr; |
669 |
if ((options & PCRE_EXTENDED) != 0) |
if ((options & PCRE_EXTENDED) != 0) |
670 |
{ |
{ |
671 |
if ((pcre_ctypes[c] & ctype_space) != 0) continue; |
if ((cd->ctypes[c] & ctype_space) != 0) continue; |
672 |
if (c == '#') |
if (c == '#') |
673 |
{ |
{ |
674 |
while ((c = *(++ptr)) != 0 && c != '\n'); |
while ((c = *(++ptr)) != 0 && c != '\n'); |
756 |
|
|
757 |
if (c == '\\') |
if (c == '\\') |
758 |
{ |
{ |
759 |
c = check_escape(&ptr, errorptr, *brackets, options, TRUE); |
c = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd); |
760 |
if (-c == ESC_b) c = '\b'; |
if (-c == ESC_b) c = '\b'; |
761 |
else if (c < 0) |
else if (c < 0) |
762 |
{ |
{ |
763 |
|
register const uschar *cbits = cd->cbits; |
764 |
class_charcount = 10; |
class_charcount = 10; |
765 |
switch (-c) |
switch (-c) |
766 |
{ |
{ |
767 |
case ESC_d: |
case ESC_d: |
768 |
for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_digit]; |
for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_digit]; |
769 |
continue; |
continue; |
770 |
|
|
771 |
case ESC_D: |
case ESC_D: |
772 |
for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_digit]; |
for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_digit]; |
773 |
continue; |
continue; |
774 |
|
|
775 |
case ESC_w: |
case ESC_w: |
776 |
for (c = 0; c < 32; c++) |
for (c = 0; c < 32; c++) |
777 |
class[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]); |
class[c] |= (cbits[c+cbit_digit] | cbits[c+cbit_word]); |
778 |
continue; |
continue; |
779 |
|
|
780 |
case ESC_W: |
case ESC_W: |
781 |
for (c = 0; c < 32; c++) |
for (c = 0; c < 32; c++) |
782 |
class[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]); |
class[c] |= ~(cbits[c+cbit_digit] | cbits[c+cbit_word]); |
783 |
continue; |
continue; |
784 |
|
|
785 |
case ESC_s: |
case ESC_s: |
786 |
for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_space]; |
for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_space]; |
787 |
continue; |
continue; |
788 |
|
|
789 |
case ESC_S: |
case ESC_S: |
790 |
for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_space]; |
for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_space]; |
791 |
continue; |
continue; |
792 |
|
|
793 |
default: |
default: |
819 |
|
|
820 |
if (d == '\\') |
if (d == '\\') |
821 |
{ |
{ |
822 |
d = check_escape(&ptr, errorptr, *brackets, options, TRUE); |
d = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd); |
823 |
if (d < 0) |
if (d < 0) |
824 |
{ |
{ |
825 |
if (d == -ESC_b) d = '\b'; else |
if (d == -ESC_b) d = '\b'; else |
841 |
class[c/8] |= (1 << (c&7)); |
class[c/8] |= (1 << (c&7)); |
842 |
if ((options & PCRE_CASELESS) != 0) |
if ((options & PCRE_CASELESS) != 0) |
843 |
{ |
{ |
844 |
int uc = pcre_fcc[c]; /* flip case */ |
int uc = cd->fcc[c]; /* flip case */ |
845 |
class[uc/8] |= (1 << (uc&7)); |
class[uc/8] |= (1 << (uc&7)); |
846 |
} |
} |
847 |
class_charcount++; /* in case a one-char range */ |
class_charcount++; /* in case a one-char range */ |
856 |
class [c/8] |= (1 << (c&7)); |
class [c/8] |= (1 << (c&7)); |
857 |
if ((options & PCRE_CASELESS) != 0) |
if ((options & PCRE_CASELESS) != 0) |
858 |
{ |
{ |
859 |
c = pcre_fcc[c]; /* flip case */ |
c = cd->fcc[c]; /* flip case */ |
860 |
class[c/8] |= (1 << (c&7)); |
class[c/8] |= (1 << (c&7)); |
861 |
} |
} |
862 |
class_charcount++; |
class_charcount++; |
903 |
/* Various kinds of repeat */ |
/* Various kinds of repeat */ |
904 |
|
|
905 |
case '{': |
case '{': |
906 |
if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR; |
if (!is_counted_repeat(ptr+1, cd)) goto NORMAL_CHAR; |
907 |
ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr); |
ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr, cd); |
908 |
if (*errorptr != NULL) goto FAILED; |
if (*errorptr != NULL) goto FAILED; |
909 |
goto REPEAT; |
goto REPEAT; |
910 |
|
|
1091 |
else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE || |
else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE || |
1092 |
(int)*previous == OP_COND) |
(int)*previous == OP_COND) |
1093 |
{ |
{ |
1094 |
int i, ketoffset = 0; |
register int i; |
1095 |
|
int ketoffset = 0; |
1096 |
int len = code - previous; |
int len = code - previous; |
1097 |
|
uschar *bralink = NULL; |
1098 |
|
|
1099 |
/* If the maximum repeat count is unlimited, find the end of the bracket |
/* If the maximum repeat count is unlimited, find the end of the bracket |
1100 |
by scanning through from the start, and compute the offset back to it |
by scanning through from the start, and compute the offset back to it |
1109 |
ketoffset = code - ket; |
ketoffset = code - ket; |
1110 |
} |
} |
1111 |
|
|
1112 |
|
/* The case of a zero minimum is special because of the need to stick |
1113 |
|
OP_BRAZERO in front of it, and because the group appears once in the |
1114 |
|
data, whereas in other cases it appears the minimum number of times. For |
1115 |
|
this reason, it is simplest to treat this case separately, as otherwise |
1116 |
|
the code gets far too mess. There are several special subcases when the |
1117 |
|
minimum is zero. */ |
1118 |
|
|
1119 |
|
if (repeat_min == 0) |
1120 |
|
{ |
1121 |
|
/* If the maximum is also zero, we just omit the group from the output |
1122 |
|
altogether. */ |
1123 |
|
|
1124 |
|
if (repeat_max == 0) |
1125 |
|
{ |
1126 |
|
code = previous; |
1127 |
|
previous = NULL; |
1128 |
|
break; |
1129 |
|
} |
1130 |
|
|
1131 |
|
/* If the maximum is 1 or unlimited, we just have to stick in the |
1132 |
|
BRAZERO and do no more at this point. */ |
1133 |
|
|
1134 |
|
if (repeat_max <= 1) |
1135 |
|
{ |
1136 |
|
memmove(previous+1, previous, len); |
1137 |
|
code++; |
1138 |
|
*previous++ = OP_BRAZERO + repeat_type; |
1139 |
|
} |
1140 |
|
|
1141 |
|
/* If the maximum is greater than 1 and limited, we have to replicate |
1142 |
|
in a nested fashion, sticking OP_BRAZERO before each set of brackets. |
1143 |
|
The first one has to be handled carefully because it's the original |
1144 |
|
copy, which has to be moved up. The remainder can be handled by code |
1145 |
|
that is common with the non-zero minimum case below. We just have to |
1146 |
|
adjust the value or repeat_max, since one less copy is required. */ |
1147 |
|
|
1148 |
|
else |
1149 |
|
{ |
1150 |
|
int offset; |
1151 |
|
memmove(previous+4, previous, len); |
1152 |
|
code += 4; |
1153 |
|
*previous++ = OP_BRAZERO + repeat_type; |
1154 |
|
*previous++ = OP_BRA; |
1155 |
|
|
1156 |
|
/* We chain together the bracket offset fields that have to be |
1157 |
|
filled in later when the ends of the brackets are reached. */ |
1158 |
|
|
1159 |
|
offset = (bralink == NULL)? 0 : previous - bralink; |
1160 |
|
bralink = previous; |
1161 |
|
*previous++ = offset >> 8; |
1162 |
|
*previous++ = offset & 255; |
1163 |
|
} |
1164 |
|
|
1165 |
|
repeat_max--; |
1166 |
|
} |
1167 |
|
|
1168 |
|
/* If the minimum is greater than zero, replicate the group as many |
1169 |
|
times as necessary, and adjust the maximum to the number of subsequent |
1170 |
|
copies that we need. */ |
1171 |
|
|
1172 |
|
else |
1173 |
|
{ |
1174 |
|
for (i = 1; i < repeat_min; i++) |
1175 |
|
{ |
1176 |
|
memcpy(code, previous, len); |
1177 |
|
code += len; |
1178 |
|
} |
1179 |
|
if (repeat_max > 0) repeat_max -= repeat_min; |
1180 |
|
} |
1181 |
|
|
1182 |
|
/* This code is common to both the zero and non-zero minimum cases. If |
1183 |
|
the maximum is limited, it replicates the group in a nested fashion, |
1184 |
|
remembering the bracket starts on a stack. In the case of a zero minimum, |
1185 |
|
the first one was set up above. In all cases the repeat_max now specifies |
1186 |
|
the number of additional copies needed. */ |
1187 |
|
|
1188 |
|
if (repeat_max >= 0) |
1189 |
|
{ |
1190 |
|
for (i = repeat_max - 1; i >= 0; i--) |
1191 |
|
{ |
1192 |
|
*code++ = OP_BRAZERO + repeat_type; |
1193 |
|
|
1194 |
|
/* All but the final copy start a new nesting, maintaining the |
1195 |
|
chain of brackets outstanding. */ |
1196 |
|
|
1197 |
|
if (i != 0) |
1198 |
|
{ |
1199 |
|
int offset; |
1200 |
|
*code++ = OP_BRA; |
1201 |
|
offset = (bralink == NULL)? 0 : code - bralink; |
1202 |
|
bralink = code; |
1203 |
|
*code++ = offset >> 8; |
1204 |
|
*code++ = offset & 255; |
1205 |
|
} |
1206 |
|
|
1207 |
|
memcpy(code, previous, len); |
1208 |
|
code += len; |
1209 |
|
} |
1210 |
|
|
1211 |
|
/* Now chain through the pending brackets, and fill in their length |
1212 |
|
fields (which are holding the chain links pro tem). */ |
1213 |
|
|
1214 |
|
while (bralink != NULL) |
1215 |
|
{ |
1216 |
|
int oldlinkoffset; |
1217 |
|
int offset = code - bralink + 1; |
1218 |
|
uschar *bra = code - offset; |
1219 |
|
oldlinkoffset = (bra[1] << 8) + bra[2]; |
1220 |
|
bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset; |
1221 |
|
*code++ = OP_KET; |
1222 |
|
*code++ = bra[1] = offset >> 8; |
1223 |
|
*code++ = bra[2] = (offset & 255); |
1224 |
|
} |
1225 |
|
} |
1226 |
|
|
1227 |
|
/* If the maximum is unlimited, set a repeater in the final copy. We |
1228 |
|
can't just offset backwards from the current code point, because we |
1229 |
|
don't know if there's been an options resetting after the ket. The |
1230 |
|
correct offset was computed above. */ |
1231 |
|
|
1232 |
|
else code[-ketoffset] = OP_KETRMAX + repeat_type; |
1233 |
|
|
1234 |
|
|
1235 |
|
#ifdef NEVER |
1236 |
/* If the minimum is greater than zero, and the maximum is unlimited or |
/* If the minimum is greater than zero, and the maximum is unlimited or |
1237 |
equal to the minimum, the first copy remains where it is, and is |
equal to the minimum, the first copy remains where it is, and is |
1238 |
replicated up to the minimum number of times. This case includes the + |
replicated up to the minimum number of times. This case includes the + |
1280 |
correct offset was computed above. */ |
correct offset was computed above. */ |
1281 |
|
|
1282 |
if (repeat_max == -1) code[-ketoffset] = OP_KETRMAX + repeat_type; |
if (repeat_max == -1) code[-ketoffset] = OP_KETRMAX + repeat_type; |
1283 |
|
#endif |
1284 |
|
|
1285 |
|
|
1286 |
} |
} |
1287 |
|
|
1288 |
/* Else there's some kind of shambles */ |
/* Else there's some kind of shambles */ |
1329 |
|
|
1330 |
case '(': |
case '(': |
1331 |
bravalue = OP_COND; /* Conditional group */ |
bravalue = OP_COND; /* Conditional group */ |
1332 |
if ((pcre_ctypes[*(++ptr)] & ctype_digit) != 0) |
if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0) |
1333 |
{ |
{ |
1334 |
condref = *ptr - '0'; |
condref = *ptr - '0'; |
1335 |
while (*(++ptr) != ')') condref = condref*10 + *ptr - '0'; |
while (*(++ptr) != ')') condref = condref*10 + *ptr - '0'; |
1462 |
errorptr, /* Where to put an error message */ |
errorptr, /* Where to put an error message */ |
1463 |
(bravalue == OP_ASSERTBACK || |
(bravalue == OP_ASSERTBACK || |
1464 |
bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ |
bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ |
1465 |
condref)) /* Condition reference number */ |
condref, /* Condition reference number */ |
1466 |
|
cd)) /* Tables block */ |
1467 |
goto FAILED; |
goto FAILED; |
1468 |
|
|
1469 |
/* At the end of compiling, code is still pointing to the start of the |
/* At the end of compiling, code is still pointing to the start of the |
1511 |
|
|
1512 |
case '\\': |
case '\\': |
1513 |
tempptr = ptr; |
tempptr = ptr; |
1514 |
c = check_escape(&ptr, errorptr, *brackets, options, FALSE); |
c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd); |
1515 |
|
|
1516 |
/* Handle metacharacters introduced by \. For ones like \d, the ESC_ values |
/* Handle metacharacters introduced by \. For ones like \d, the ESC_ values |
1517 |
are arranged to be the negation of the corresponding OP_values. For the |
are arranged to be the negation of the corresponding OP_values. For the |
1556 |
{ |
{ |
1557 |
if ((options & PCRE_EXTENDED) != 0) |
if ((options & PCRE_EXTENDED) != 0) |
1558 |
{ |
{ |
1559 |
if ((pcre_ctypes[c] & ctype_space) != 0) continue; |
if ((cd->ctypes[c] & ctype_space) != 0) continue; |
1560 |
if (c == '#') |
if (c == '#') |
1561 |
{ |
{ |
1562 |
while ((c = *(++ptr)) != 0 && c != '\n'); |
while ((c = *(++ptr)) != 0 && c != '\n'); |
1572 |
if (c == '\\') |
if (c == '\\') |
1573 |
{ |
{ |
1574 |
tempptr = ptr; |
tempptr = ptr; |
1575 |
c = check_escape(&ptr, errorptr, *brackets, options, FALSE); |
c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd); |
1576 |
if (c < 0) { ptr = tempptr; break; } |
if (c < 0) { ptr = tempptr; break; } |
1577 |
} |
} |
1578 |
|
|
1584 |
|
|
1585 |
/* This "while" is the end of the "do" above. */ |
/* This "while" is the end of the "do" above. */ |
1586 |
|
|
1587 |
while (length < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0); |
while (length < 255 && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0); |
1588 |
|
|
1589 |
/* Compute the length and set it in the data vector, and advance to |
/* Compute the length and set it in the data vector, and advance to |
1590 |
the next state. */ |
the next state. */ |
1629 |
errorptr -> pointer to error message |
errorptr -> pointer to error message |
1630 |
lookbehind TRUE if this is a lookbehind assertion |
lookbehind TRUE if this is a lookbehind assertion |
1631 |
condref > 0 for OPT_CREF setting at start of conditional group |
condref > 0 for OPT_CREF setting at start of conditional group |
1632 |
|
cd points to the data block with tables pointers |
1633 |
|
|
1634 |
Returns: TRUE on success |
Returns: TRUE on success |
1635 |
*/ |
*/ |
1636 |
|
|
1637 |
static BOOL |
static BOOL |
1638 |
compile_regex(int options, int optchanged, int *brackets, uschar **codeptr, |
compile_regex(int options, int optchanged, int *brackets, uschar **codeptr, |
1639 |
const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref) |
const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref, |
1640 |
|
compile_data *cd) |
1641 |
{ |
{ |
1642 |
const uschar *ptr = *ptrptr; |
const uschar *ptr = *ptrptr; |
1643 |
uschar *code = *codeptr; |
uschar *code = *codeptr; |
1684 |
|
|
1685 |
/* Now compile the branch */ |
/* Now compile the branch */ |
1686 |
|
|
1687 |
if (!compile_branch(options, brackets, &code, &ptr, errorptr, &optchanged)) |
if (!compile_branch(options,brackets,&code,&ptr,errorptr,&optchanged,cd)) |
1688 |
{ |
{ |
1689 |
*ptrptr = ptr; |
*ptrptr = ptr; |
1690 |
return FALSE; |
return FALSE; |
1790 |
code += 2; |
code += 2; |
1791 |
break; |
break; |
1792 |
|
|
1793 |
|
case OP_WORD_BOUNDARY: |
1794 |
|
case OP_NOT_WORD_BOUNDARY: |
1795 |
|
code++; |
1796 |
|
break; |
1797 |
|
|
1798 |
case OP_ASSERT_NOT: |
case OP_ASSERT_NOT: |
1799 |
case OP_ASSERTBACK: |
case OP_ASSERTBACK: |
1800 |
case OP_ASSERTBACK_NOT: |
case OP_ASSERTBACK_NOT: |
1822 |
it's anchored. However, if this is a multiline pattern, then only OP_SOD |
it's anchored. However, if this is a multiline pattern, then only OP_SOD |
1823 |
counts, since OP_CIRC can match in the middle. |
counts, since OP_CIRC can match in the middle. |
1824 |
|
|
1825 |
A branch is also implicitly anchored if it starts with .* because that will try |
A branch is also implicitly anchored if it starts with .* and DOTALL is set, |
1826 |
the rest of the pattern at all possible matching points, so there is no point |
because that will try the rest of the pattern at all possible matching points, |
1827 |
trying them again. |
so there is no point trying them again. |
1828 |
|
|
1829 |
Arguments: |
Arguments: |
1830 |
code points to start of expression (the bracket) |
code points to start of expression (the bracket) |
1842 |
register int op = *scode; |
register int op = *scode; |
1843 |
if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND) |
if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND) |
1844 |
{ if (!is_anchored(scode, options)) return FALSE; } |
{ if (!is_anchored(scode, options)) return FALSE; } |
1845 |
else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR) |
else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR) && |
1846 |
|
(*options & PCRE_DOTALL) != 0) |
1847 |
{ if (scode[1] != OP_ANY) return FALSE; } |
{ if (scode[1] != OP_ANY) return FALSE; } |
1848 |
else if (op != OP_SOD && |
else if (op != OP_SOD && |
1849 |
((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC)) |
((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC)) |
1857 |
|
|
1858 |
|
|
1859 |
/************************************************* |
/************************************************* |
1860 |
* Check for start with \n line expression * |
* Check for starting with ^ or .* * |
1861 |
*************************************************/ |
*************************************************/ |
1862 |
|
|
1863 |
/* This is called for multiline expressions to try to find out if every branch |
/* This is called to find out if every branch starts with ^ or .* so that |
1864 |
starts with ^ so that "first char" processing can be done to speed things up. |
"first char" processing can be done to speed things up in multiline |
1865 |
|
matching and for non-DOTALL patterns that start with .* (which must start at |
1866 |
|
the beginning or after \n). |
1867 |
|
|
1868 |
Argument: points to start of expression (the bracket) |
Argument: points to start of expression (the bracket) |
1869 |
Returns: TRUE or FALSE |
Returns: TRUE or FALSE |
1877 |
register int op = *scode; |
register int op = *scode; |
1878 |
if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND) |
if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND) |
1879 |
{ if (!is_startline(scode)) return FALSE; } |
{ if (!is_startline(scode)) return FALSE; } |
1880 |
|
else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR) |
1881 |
|
{ if (scode[1] != OP_ANY) return FALSE; } |
1882 |
else if (op != OP_CIRC) return FALSE; |
else if (op != OP_CIRC) return FALSE; |
1883 |
code += (code[1] << 8) + code[2]; |
code += (code[1] << 8) + code[2]; |
1884 |
} |
} |
1964 |
options various option bits |
options various option bits |
1965 |
errorptr pointer to pointer to error text |
errorptr pointer to pointer to error text |
1966 |
erroroffset ptr offset in pattern where error was detected |
erroroffset ptr offset in pattern where error was detected |
1967 |
|
tables pointer to character tables or NULL |
1968 |
|
|
1969 |
Returns: pointer to compiled data block, or NULL on error, |
Returns: pointer to compiled data block, or NULL on error, |
1970 |
with errorptr and erroroffset set |
with errorptr and erroroffset set |
1972 |
|
|
1973 |
pcre * |
pcre * |
1974 |
pcre_compile(const char *pattern, int options, const char **errorptr, |
pcre_compile(const char *pattern, int options, const char **errorptr, |
1975 |
int *erroroffset) |
int *erroroffset, const unsigned char *tables) |
1976 |
{ |
{ |
1977 |
real_pcre *re; |
real_pcre *re; |
1978 |
int length = 3; /* For initial BRA plus length */ |
int length = 3; /* For initial BRA plus length */ |
1985 |
unsigned int brastackptr = 0; |
unsigned int brastackptr = 0; |
1986 |
uschar *code; |
uschar *code; |
1987 |
const uschar *ptr; |
const uschar *ptr; |
1988 |
|
compile_data compile_block; |
1989 |
int brastack[BRASTACK_SIZE]; |
int brastack[BRASTACK_SIZE]; |
1990 |
uschar bralenstack[BRASTACK_SIZE]; |
uschar bralenstack[BRASTACK_SIZE]; |
1991 |
|
|
2014 |
return NULL; |
return NULL; |
2015 |
} |
} |
2016 |
|
|
2017 |
|
/* Set up pointers to the individual character tables */ |
2018 |
|
|
2019 |
|
if (tables == NULL) tables = pcre_default_tables; |
2020 |
|
compile_block.lcc = tables + lcc_offset; |
2021 |
|
compile_block.fcc = tables + fcc_offset; |
2022 |
|
compile_block.cbits = tables + cbits_offset; |
2023 |
|
compile_block.ctypes = tables + ctypes_offset; |
2024 |
|
|
2025 |
|
/* Reflect pattern for debugging output */ |
2026 |
|
|
2027 |
DPRINTF(("------------------------------------------------------------------\n")); |
DPRINTF(("------------------------------------------------------------------\n")); |
2028 |
DPRINTF(("%s\n", pattern)); |
DPRINTF(("%s\n", pattern)); |
2029 |
|
|
2042 |
|
|
2043 |
if ((options & PCRE_EXTENDED) != 0) |
if ((options & PCRE_EXTENDED) != 0) |
2044 |
{ |
{ |
2045 |
if ((pcre_ctypes[c] & ctype_space) != 0) continue; |
if ((compile_block.ctypes[c] & ctype_space) != 0) continue; |
2046 |
if (c == '#') |
if (c == '#') |
2047 |
{ |
{ |
2048 |
while ((c = *(++ptr)) != 0 && c != '\n'); |
while ((c = *(++ptr)) != 0 && c != '\n'); |
2060 |
case '\\': |
case '\\': |
2061 |
{ |
{ |
2062 |
const uschar *save_ptr = ptr; |
const uschar *save_ptr = ptr; |
2063 |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
c = check_escape(&ptr, errorptr, bracount, options, FALSE, &compile_block); |
2064 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
2065 |
if (c >= 0) |
if (c >= 0) |
2066 |
{ |
{ |
2080 |
int refnum = -c - ESC_REF; |
int refnum = -c - ESC_REF; |
2081 |
if (refnum > top_backref) top_backref = refnum; |
if (refnum > top_backref) top_backref = refnum; |
2082 |
length++; /* For single back reference */ |
length++; /* For single back reference */ |
2083 |
if (ptr[1] == '{' && is_counted_repeat(ptr+2)) |
if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block)) |
2084 |
{ |
{ |
2085 |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr); |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block); |
2086 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
2087 |
if ((min == 0 && (max == 1 || max == -1)) || |
if ((min == 0 && (max == 1 || max == -1)) || |
2088 |
(min == 1 && max == -1)) |
(min == 1 && max == -1)) |
2106 |
or back reference. */ |
or back reference. */ |
2107 |
|
|
2108 |
case '{': |
case '{': |
2109 |
if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR; |
if (!is_counted_repeat(ptr+1, &compile_block)) goto NORMAL_CHAR; |
2110 |
ptr = read_repeat_counts(ptr+1, &min, &max, errorptr); |
ptr = read_repeat_counts(ptr+1, &min, &max, errorptr, &compile_block); |
2111 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
2112 |
if ((min == 0 && (max == 1 || max == -1)) || |
if ((min == 0 && (max == 1 || max == -1)) || |
2113 |
(min == 1 && max == -1)) |
(min == 1 && max == -1)) |
2142 |
{ |
{ |
2143 |
if (*ptr == '\\') |
if (*ptr == '\\') |
2144 |
{ |
{ |
2145 |
int ch = check_escape(&ptr, errorptr, bracount, options, TRUE); |
int ch = check_escape(&ptr, errorptr, bracount, options, TRUE, |
2146 |
|
&compile_block); |
2147 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
2148 |
if (-ch == ESC_b) class_charcount++; else class_charcount = 10; |
if (-ch == ESC_b) class_charcount++; else class_charcount = 10; |
2149 |
} |
} |
2160 |
|
|
2161 |
/* A repeat needs either 1 or 5 bytes. */ |
/* A repeat needs either 1 or 5 bytes. */ |
2162 |
|
|
2163 |
if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2)) |
if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block)) |
2164 |
{ |
{ |
2165 |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr); |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block); |
2166 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
2167 |
if ((min == 0 && (max == 1 || max == -1)) || |
if ((min == 0 && (max == 1 || max == -1)) || |
2168 |
(min == 1 && max == -1)) |
(min == 1 && max == -1)) |
2228 |
group. */ |
group. */ |
2229 |
|
|
2230 |
case '(': |
case '(': |
2231 |
if ((pcre_ctypes[ptr[3]] & ctype_digit) != 0) |
if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0) |
2232 |
{ |
{ |
2233 |
ptr += 4; |
ptr += 4; |
2234 |
length += 2; |
length += 2; |
2235 |
while ((pcre_ctypes[*ptr] & ctype_digit) != 0) ptr++; |
while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++; |
2236 |
if (*ptr != ')') |
if (*ptr != ')') |
2237 |
{ |
{ |
2238 |
*errorptr = ERR26; |
*errorptr = ERR26; |
2401 |
/* Leave ptr at the final char; for read_repeat_counts this happens |
/* Leave ptr at the final char; for read_repeat_counts this happens |
2402 |
automatically; for the others we need an increment. */ |
automatically; for the others we need an increment. */ |
2403 |
|
|
2404 |
if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2)) |
if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2, &compile_block)) |
2405 |
{ |
{ |
2406 |
ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr); |
ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr, |
2407 |
|
&compile_block); |
2408 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
2409 |
} |
} |
2410 |
else if (c == '*') { minval = 0; maxval = -1; ptr++; } |
else if (c == '*') { minval = 0; maxval = -1; ptr++; } |
2411 |
else if (c == '+') { maxval = -1; ptr++; } |
else if (c == '+') { maxval = -1; ptr++; } |
2412 |
else if (c == '?') { minval = 0; ptr++; } |
else if (c == '?') { minval = 0; ptr++; } |
2413 |
|
|
2414 |
/* If there is a minimum > 1 we have to replicate up to minval-1 times; |
/* If the minimum is zero, we have to allow for an OP_BRAZERO before the |
2415 |
if there is a limited maximum we have to replicate up to maxval-1 times |
group, and if the maximum is greater than zero, we have to replicate |
2416 |
and allow for a BRAZERO item before each optional copy, as we also have |
maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting |
2417 |
to do before the first copy if the minimum is zero. */ |
bracket set - hence the 7. */ |
2418 |
|
|
2419 |
if (minval == 0) length++; |
if (minval == 0) |
2420 |
else if (minval > 1) length += (minval - 1) * duplength; |
{ |
2421 |
if (maxval > minval) length += (maxval - minval) * (duplength + 1); |
length++; |
2422 |
|
if (maxval > 0) length += (maxval - 1) * (duplength + 7); |
2423 |
|
} |
2424 |
|
|
2425 |
|
/* When the minimum is greater than zero, 1 we have to replicate up to |
2426 |
|
minval-1 times, with no additions required in the copies. Then, if |
2427 |
|
there is a limited maximum we have to replicate up to maxval-1 times |
2428 |
|
allowing for a BRAZERO item before each optional copy and nesting |
2429 |
|
brackets for all but one of the optional copies. */ |
2430 |
|
|
2431 |
|
else |
2432 |
|
{ |
2433 |
|
length += (minval - 1) * duplength; |
2434 |
|
if (maxval > minval) /* Need this test as maxval=-1 means no limit */ |
2435 |
|
length += (maxval - minval) * (duplength + 7) - 6; |
2436 |
|
} |
2437 |
} |
} |
2438 |
continue; |
continue; |
2439 |
|
|
2450 |
{ |
{ |
2451 |
if ((options & PCRE_EXTENDED) != 0) |
if ((options & PCRE_EXTENDED) != 0) |
2452 |
{ |
{ |
2453 |
if ((pcre_ctypes[c] & ctype_space) != 0) continue; |
if ((compile_block.ctypes[c] & ctype_space) != 0) continue; |
2454 |
if (c == '#') |
if (c == '#') |
2455 |
{ |
{ |
2456 |
while ((c = *(++ptr)) != 0 && c != '\n'); |
while ((c = *(++ptr)) != 0 && c != '\n'); |
2464 |
if (c == '\\') |
if (c == '\\') |
2465 |
{ |
{ |
2466 |
const uschar *saveptr = ptr; |
const uschar *saveptr = ptr; |
2467 |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
c = check_escape(&ptr, errorptr, bracount, options, FALSE, |
2468 |
|
&compile_block); |
2469 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
2470 |
if (c < 0) { ptr = saveptr; break; } |
if (c < 0) { ptr = saveptr; break; } |
2471 |
} |
} |
2477 |
|
|
2478 |
/* This "while" is the end of the "do" above. */ |
/* This "while" is the end of the "do" above. */ |
2479 |
|
|
2480 |
while (runlength < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0); |
while (runlength < 255 && |
2481 |
|
(compile_block.ctypes[c = *(++ptr)] & ctype_meta) == 0); |
2482 |
|
|
2483 |
ptr--; |
ptr--; |
2484 |
length += runlength; |
length += runlength; |
2513 |
|
|
2514 |
re->magic_number = MAGIC_NUMBER; |
re->magic_number = MAGIC_NUMBER; |
2515 |
re->options = options; |
re->options = options; |
2516 |
|
re->tables = tables; |
2517 |
|
|
2518 |
/* Set up a starting, non-extracting bracket, then compile the expression. On |
/* Set up a starting, non-extracting bracket, then compile the expression. On |
2519 |
error, *errorptr will be set non-NULL, so we don't need to look at the result |
error, *errorptr will be set non-NULL, so we don't need to look at the result |
2523 |
code = re->code; |
code = re->code; |
2524 |
*code = OP_BRA; |
*code = OP_BRA; |
2525 |
bracount = 0; |
bracount = 0; |
2526 |
(void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1); |
(void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1, |
2527 |
|
&compile_block); |
2528 |
re->top_bracket = bracount; |
re->top_bracket = bracount; |
2529 |
re->top_backref = top_backref; |
re->top_backref = top_backref; |
2530 |
|
|
2556 |
return NULL; |
return NULL; |
2557 |
} |
} |
2558 |
|
|
2559 |
/* If the anchored option was not passed, set flag if we can determine that it |
/* If the anchored option was not passed, set flag if we can determine that the |
2560 |
is anchored by virtue of ^ characters or \A or anything else. Otherwise, see if |
pattern is anchored by virtue of ^ characters or \A or anything else (such as |
2561 |
we can determine what the first character has to be, because that speeds up |
starting with .* when DOTALL is set). |
2562 |
unanchored matches no end. In the case of multiline matches, an alternative is |
|
2563 |
to set the PCRE_STARTLINE flag if all branches start with ^. */ |
Otherwise, see if we can determine what the first character has to be, because |
2564 |
|
that speeds up unanchored matches no end. If not, see if we can set the |
2565 |
|
PCRE_STARTLINE flag. This is helpful for multiline matches when all branches |
2566 |
|
start with ^. and also when all branches start with .* for non-DOTALL matches. |
2567 |
|
*/ |
2568 |
|
|
2569 |
if ((options & PCRE_ANCHORED) == 0) |
if ((options & PCRE_ANCHORED) == 0) |
2570 |
{ |
{ |
2825 |
|
|
2826 |
|
|
2827 |
/************************************************* |
/************************************************* |
|
* Match a character type * |
|
|
*************************************************/ |
|
|
|
|
|
/* Not used in all the places it might be as it's sometimes faster |
|
|
to put the code inline. |
|
|
|
|
|
Arguments: |
|
|
type the character type |
|
|
c the character |
|
|
dotall the dotall flag |
|
|
|
|
|
Returns: TRUE if character is of the type |
|
|
*/ |
|
|
|
|
|
static BOOL |
|
|
match_type(int type, int c, BOOL dotall) |
|
|
{ |
|
|
|
|
|
#ifdef DEBUG |
|
|
if (isprint(c)) printf("matching subject %c against ", c); |
|
|
else printf("matching subject \\x%02x against ", c); |
|
|
printf("%s\n", OP_names[type]); |
|
|
#endif |
|
|
|
|
|
switch(type) |
|
|
{ |
|
|
case OP_ANY: return dotall || c != '\n'; |
|
|
case OP_NOT_DIGIT: return (pcre_ctypes[c] & ctype_digit) == 0; |
|
|
case OP_DIGIT: return (pcre_ctypes[c] & ctype_digit) != 0; |
|
|
case OP_NOT_WHITESPACE: return (pcre_ctypes[c] & ctype_space) == 0; |
|
|
case OP_WHITESPACE: return (pcre_ctypes[c] & ctype_space) != 0; |
|
|
case OP_NOT_WORDCHAR: return (pcre_ctypes[c] & ctype_word) == 0; |
|
|
case OP_WORDCHAR: return (pcre_ctypes[c] & ctype_word) != 0; |
|
|
} |
|
|
return FALSE; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/************************************************* |
|
2828 |
* Match a back-reference * |
* Match a back-reference * |
2829 |
*************************************************/ |
*************************************************/ |
2830 |
|
|
2867 |
/* Separate the caselesss case for speed */ |
/* Separate the caselesss case for speed */ |
2868 |
|
|
2869 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
2870 |
{ while (length-- > 0) if (pcre_lcc[*p++] != pcre_lcc[*eptr++]) return FALSE; } |
{ |
2871 |
|
while (length-- > 0) |
2872 |
|
if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; |
2873 |
|
} |
2874 |
else |
else |
2875 |
{ while (length-- > 0) if (*p++ != *eptr++) return FALSE; } |
{ while (length-- > 0) if (*p++ != *eptr++) return FALSE; } |
2876 |
|
|
2933 |
int number = op - OP_BRA; |
int number = op - OP_BRA; |
2934 |
int offset = number << 1; |
int offset = number << 1; |
2935 |
|
|
2936 |
DPRINTF(("start bracket %d\n", number)); |
#ifdef DEBUG |
2937 |
|
printf("start bracket %d subject=", number); |
2938 |
|
pchars(eptr, 16, TRUE, md); |
2939 |
|
printf("\n"); |
2940 |
|
#endif |
2941 |
|
|
2942 |
if (offset < md->offset_max) |
if (offset < md->offset_max) |
2943 |
{ |
{ |
3327 |
case OP_WORD_BOUNDARY: |
case OP_WORD_BOUNDARY: |
3328 |
{ |
{ |
3329 |
BOOL prev_is_word = (eptr != md->start_subject) && |
BOOL prev_is_word = (eptr != md->start_subject) && |
3330 |
((pcre_ctypes[eptr[-1]] & ctype_word) != 0); |
((md->ctypes[eptr[-1]] & ctype_word) != 0); |
3331 |
BOOL cur_is_word = (eptr < md->end_subject) && |
BOOL cur_is_word = (eptr < md->end_subject) && |
3332 |
((pcre_ctypes[*eptr] & ctype_word) != 0); |
((md->ctypes[*eptr] & ctype_word) != 0); |
3333 |
if ((*ecode++ == OP_WORD_BOUNDARY)? |
if ((*ecode++ == OP_WORD_BOUNDARY)? |
3334 |
cur_is_word == prev_is_word : cur_is_word != prev_is_word) |
cur_is_word == prev_is_word : cur_is_word != prev_is_word) |
3335 |
return FALSE; |
return FALSE; |
3346 |
break; |
break; |
3347 |
|
|
3348 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
3349 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) != 0) |
if (eptr >= md->end_subject || |
3350 |
|
(md->ctypes[*eptr++] & ctype_digit) != 0) |
3351 |
return FALSE; |
return FALSE; |
3352 |
ecode++; |
ecode++; |
3353 |
break; |
break; |
3354 |
|
|
3355 |
case OP_DIGIT: |
case OP_DIGIT: |
3356 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) == 0) |
if (eptr >= md->end_subject || |
3357 |
|
(md->ctypes[*eptr++] & ctype_digit) == 0) |
3358 |
return FALSE; |
return FALSE; |
3359 |
ecode++; |
ecode++; |
3360 |
break; |
break; |
3361 |
|
|
3362 |
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
3363 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) != 0) |
if (eptr >= md->end_subject || |
3364 |
|
(md->ctypes[*eptr++] & ctype_space) != 0) |
3365 |
return FALSE; |
return FALSE; |
3366 |
ecode++; |
ecode++; |
3367 |
break; |
break; |
3368 |
|
|
3369 |
case OP_WHITESPACE: |
case OP_WHITESPACE: |
3370 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) == 0) |
if (eptr >= md->end_subject || |
3371 |
|
(md->ctypes[*eptr++] & ctype_space) == 0) |
3372 |
return FALSE; |
return FALSE; |
3373 |
ecode++; |
ecode++; |
3374 |
break; |
break; |
3375 |
|
|
3376 |
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
3377 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) != 0) |
if (eptr >= md->end_subject || |
3378 |
|
(md->ctypes[*eptr++] & ctype_word) != 0) |
3379 |
return FALSE; |
return FALSE; |
3380 |
ecode++; |
ecode++; |
3381 |
break; |
break; |
3382 |
|
|
3383 |
case OP_WORDCHAR: |
case OP_WORDCHAR: |
3384 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) == 0) |
if (eptr >= md->end_subject || |
3385 |
|
(md->ctypes[*eptr++] & ctype_word) == 0) |
3386 |
return FALSE; |
return FALSE; |
3387 |
ecode++; |
ecode++; |
3388 |
break; |
break; |
3614 |
if (length > md->end_subject - eptr) return FALSE; |
if (length > md->end_subject - eptr) return FALSE; |
3615 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
3616 |
{ |
{ |
3617 |
while (length-- > 0) if (pcre_lcc[*ecode++] != pcre_lcc[*eptr++]) return FALSE; |
while (length-- > 0) |
3618 |
|
if (md->lcc[*ecode++] != md->lcc[*eptr++]) |
3619 |
|
return FALSE; |
3620 |
} |
} |
3621 |
else |
else |
3622 |
{ |
{ |
3673 |
|
|
3674 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
3675 |
{ |
{ |
3676 |
c = pcre_lcc[c]; |
c = md->lcc[c]; |
3677 |
for (i = 1; i <= min; i++) if (c != pcre_lcc[*eptr++]) return FALSE; |
for (i = 1; i <= min; i++) |
3678 |
|
if (c != md->lcc[*eptr++]) return FALSE; |
3679 |
if (min == max) continue; |
if (min == max) continue; |
3680 |
if (minimize) |
if (minimize) |
3681 |
{ |
{ |
3683 |
{ |
{ |
3684 |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) |
3685 |
return TRUE; |
return TRUE; |
3686 |
if (i >= max || eptr >= md->end_subject || c != pcre_lcc[*eptr++]) |
if (i >= max || eptr >= md->end_subject || |
3687 |
|
c != md->lcc[*eptr++]) |
3688 |
return FALSE; |
return FALSE; |
3689 |
} |
} |
3690 |
/* Control never gets here */ |
/* Control never gets here */ |
3694 |
const uschar *pp = eptr; |
const uschar *pp = eptr; |
3695 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3696 |
{ |
{ |
3697 |
if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break; |
if (eptr >= md->end_subject || c != md->lcc[*eptr]) break; |
3698 |
eptr++; |
eptr++; |
3699 |
} |
} |
3700 |
while (eptr >= pp) |
while (eptr >= pp) |
3744 |
ecode++; |
ecode++; |
3745 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
3746 |
{ |
{ |
3747 |
if (pcre_lcc[*ecode++] == pcre_lcc[*eptr++]) return FALSE; |
if (md->lcc[*ecode++] == md->lcc[*eptr++]) return FALSE; |
3748 |
} |
} |
3749 |
else |
else |
3750 |
{ |
{ |
3804 |
|
|
3805 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
3806 |
{ |
{ |
3807 |
c = pcre_lcc[c]; |
c = md->lcc[c]; |
3808 |
for (i = 1; i <= min; i++) if (c == pcre_lcc[*eptr++]) return FALSE; |
for (i = 1; i <= min; i++) |
3809 |
|
if (c == md->lcc[*eptr++]) return FALSE; |
3810 |
if (min == max) continue; |
if (min == max) continue; |
3811 |
if (minimize) |
if (minimize) |
3812 |
{ |
{ |
3814 |
{ |
{ |
3815 |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) |
3816 |
return TRUE; |
return TRUE; |
3817 |
if (i >= max || eptr >= md->end_subject || c == pcre_lcc[*eptr++]) |
if (i >= max || eptr >= md->end_subject || |
3818 |
|
c == md->lcc[*eptr++]) |
3819 |
return FALSE; |
return FALSE; |
3820 |
} |
} |
3821 |
/* Control never gets here */ |
/* Control never gets here */ |
3825 |
const uschar *pp = eptr; |
const uschar *pp = eptr; |
3826 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3827 |
{ |
{ |
3828 |
if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break; |
if (eptr >= md->end_subject || c == md->lcc[*eptr]) break; |
3829 |
eptr++; |
eptr++; |
3830 |
} |
} |
3831 |
while (eptr >= pp) |
while (eptr >= pp) |
3919 |
|
|
3920 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
3921 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
3922 |
if ((pcre_ctypes[*eptr++] & ctype_digit) != 0) return FALSE; |
if ((md->ctypes[*eptr++] & ctype_digit) != 0) return FALSE; |
3923 |
break; |
break; |
3924 |
|
|
3925 |
case OP_DIGIT: |
case OP_DIGIT: |
3926 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
3927 |
if ((pcre_ctypes[*eptr++] & ctype_digit) == 0) return FALSE; |
if ((md->ctypes[*eptr++] & ctype_digit) == 0) return FALSE; |
3928 |
break; |
break; |
3929 |
|
|
3930 |
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
3931 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
3932 |
if ((pcre_ctypes[*eptr++] & ctype_space) != 0) return FALSE; |
if ((md->ctypes[*eptr++] & ctype_space) != 0) return FALSE; |
3933 |
break; |
break; |
3934 |
|
|
3935 |
case OP_WHITESPACE: |
case OP_WHITESPACE: |
3936 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
3937 |
if ((pcre_ctypes[*eptr++] & ctype_space) == 0) return FALSE; |
if ((md->ctypes[*eptr++] & ctype_space) == 0) return FALSE; |
3938 |
break; |
break; |
3939 |
|
|
3940 |
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
3941 |
for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) != 0) |
for (i = 1; i <= min; i++) |
3942 |
return FALSE; |
if ((md->ctypes[*eptr++] & ctype_word) != 0) |
3943 |
|
return FALSE; |
3944 |
break; |
break; |
3945 |
|
|
3946 |
case OP_WORDCHAR: |
case OP_WORDCHAR: |
3947 |
for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) == 0) |
for (i = 1; i <= min; i++) |
3948 |
return FALSE; |
if ((md->ctypes[*eptr++] & ctype_word) == 0) |
3949 |
|
return FALSE; |
3950 |
break; |
break; |
3951 |
} |
} |
3952 |
|
|
3955 |
if (min == max) continue; |
if (min == max) continue; |
3956 |
|
|
3957 |
/* If minimizing, we have to test the rest of the pattern before each |
/* If minimizing, we have to test the rest of the pattern before each |
3958 |
subsequent match, so inlining isn't much help; just use the function. */ |
subsequent match. */ |
3959 |
|
|
3960 |
if (minimize) |
if (minimize) |
3961 |
{ |
{ |
3962 |
for (i = min;; i++) |
for (i = min;; i++) |
3963 |
{ |
{ |
3964 |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) return TRUE; |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) return TRUE; |
3965 |
if (i >= max || eptr >= md->end_subject || |
if (i >= max || eptr >= md->end_subject) return FALSE; |
3966 |
!match_type(ctype, *eptr++, (ims & PCRE_DOTALL) != 0)) |
|
3967 |
return FALSE; |
c = *eptr++; |
3968 |
|
switch(ctype) |
3969 |
|
{ |
3970 |
|
case OP_ANY: |
3971 |
|
if ((ims & PCRE_DOTALL) == 0 && c == '\n') return FALSE; |
3972 |
|
break; |
3973 |
|
|
3974 |
|
case OP_NOT_DIGIT: |
3975 |
|
if ((md->ctypes[c] & ctype_digit) != 0) return FALSE; |
3976 |
|
break; |
3977 |
|
|
3978 |
|
case OP_DIGIT: |
3979 |
|
if ((md->ctypes[c] & ctype_digit) == 0) return FALSE; |
3980 |
|
break; |
3981 |
|
|
3982 |
|
case OP_NOT_WHITESPACE: |
3983 |
|
if ((md->ctypes[c] & ctype_space) != 0) return FALSE; |
3984 |
|
break; |
3985 |
|
|
3986 |
|
case OP_WHITESPACE: |
3987 |
|
if ((md->ctypes[c] & ctype_space) == 0) return FALSE; |
3988 |
|
break; |
3989 |
|
|
3990 |
|
case OP_NOT_WORDCHAR: |
3991 |
|
if ((md->ctypes[c] & ctype_word) != 0) return FALSE; |
3992 |
|
break; |
3993 |
|
|
3994 |
|
case OP_WORDCHAR: |
3995 |
|
if ((md->ctypes[c] & ctype_word) == 0) return FALSE; |
3996 |
|
break; |
3997 |
|
} |
3998 |
} |
} |
3999 |
/* Control never gets here */ |
/* Control never gets here */ |
4000 |
} |
} |
4027 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
4028 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
4029 |
{ |
{ |
4030 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) != 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0) |
4031 |
break; |
break; |
4032 |
eptr++; |
eptr++; |
4033 |
} |
} |
4036 |
case OP_DIGIT: |
case OP_DIGIT: |
4037 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
4038 |
{ |
{ |
4039 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) == 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0) |
4040 |
break; |
break; |
4041 |
eptr++; |
eptr++; |
4042 |
} |
} |
4045 |
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
4046 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
4047 |
{ |
{ |
4048 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) != 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0) |
4049 |
break; |
break; |
4050 |
eptr++; |
eptr++; |
4051 |
} |
} |
4054 |
case OP_WHITESPACE: |
case OP_WHITESPACE: |
4055 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
4056 |
{ |
{ |
4057 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) == 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0) |
4058 |
break; |
break; |
4059 |
eptr++; |
eptr++; |
4060 |
} |
} |
4063 |
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
4064 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
4065 |
{ |
{ |
4066 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) != 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0) |
4067 |
break; |
break; |
4068 |
eptr++; |
eptr++; |
4069 |
} |
} |
4072 |
case OP_WORDCHAR: |
case OP_WORDCHAR: |
4073 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
4074 |
{ |
{ |
4075 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) == 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0) |
4076 |
break; |
break; |
4077 |
eptr++; |
eptr++; |
4078 |
} |
} |
4118 |
external_extra points to "hints" from pcre_study() or is NULL |
external_extra points to "hints" from pcre_study() or is NULL |
4119 |
subject points to the subject string |
subject points to the subject string |
4120 |
length length of subject string (may contain binary zeros) |
length length of subject string (may contain binary zeros) |
4121 |
|
start_offset where to start in the subject string |
4122 |
options option bits |
options option bits |
4123 |
offsets points to a vector of ints to be filled in with offsets |
offsets points to a vector of ints to be filled in with offsets |
4124 |
offsetcount the number of elements in the vector |
offsetcount the number of elements in the vector |
4131 |
|
|
4132 |
int |
int |
4133 |
pcre_exec(const pcre *external_re, const pcre_extra *external_extra, |
pcre_exec(const pcre *external_re, const pcre_extra *external_extra, |
4134 |
const char *subject, int length, int options, int *offsets, int offsetcount) |
const char *subject, int length, int start_offset, int options, int *offsets, |
4135 |
|
int offsetcount) |
4136 |
{ |
{ |
4137 |
int resetcount, ocount; |
int resetcount, ocount; |
4138 |
int first_char = -1; |
int first_char = -1; |
4139 |
int ims = 0; |
int ims = 0; |
4140 |
match_data match_block; |
match_data match_block; |
4141 |
const uschar *start_bits = NULL; |
const uschar *start_bits = NULL; |
4142 |
const uschar *start_match = (const uschar *)subject; |
const uschar *start_match = (const uschar *)subject + start_offset; |
4143 |
const uschar *end_subject; |
const uschar *end_subject; |
4144 |
const real_pcre *re = (const real_pcre *)external_re; |
const real_pcre *re = (const real_pcre *)external_re; |
4145 |
const real_pcre_extra *extra = (const real_pcre_extra *)external_extra; |
const real_pcre_extra *extra = (const real_pcre_extra *)external_extra; |
4164 |
|
|
4165 |
match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */ |
match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */ |
4166 |
|
|
4167 |
|
match_block.lcc = re->tables + lcc_offset; |
4168 |
|
match_block.ctypes = re->tables + ctypes_offset; |
4169 |
|
|
4170 |
/* The ims options can vary during the matching as a result of the presence |
/* The ims options can vary during the matching as a result of the presence |
4171 |
of (?ims) items in the pattern. They are kept in a local variable so that |
of (?ims) items in the pattern. They are kept in a local variable so that |
4172 |
restoring at the exit of a group is easy. */ |
restoring at the exit of a group is easy. */ |
4201 |
resetcount = 2 + re->top_bracket * 2; |
resetcount = 2 + re->top_bracket * 2; |
4202 |
if (resetcount > offsetcount) resetcount = ocount; |
if (resetcount > offsetcount) resetcount = ocount; |
4203 |
|
|
4204 |
|
/* Reset the working variable associated with each extraction. These should |
4205 |
|
never be used unless previously set, but they get saved and restored, and so we |
4206 |
|
initialize them to avoid reading uninitialized locations. */ |
4207 |
|
|
4208 |
|
if (match_block.offset_vector != NULL) |
4209 |
|
{ |
4210 |
|
register int *iptr = match_block.offset_vector + ocount; |
4211 |
|
register int *iend = iptr - resetcount/2 + 1; |
4212 |
|
while (--iptr >= iend) *iptr = -1; |
4213 |
|
} |
4214 |
|
|
4215 |
/* Set up the first character to match, if available. The first_char value is |
/* Set up the first character to match, if available. The first_char value is |
4216 |
never set for an anchored regular expression, but the anchoring may be forced |
never set for an anchored regular expression, but the anchoring may be forced |
4217 |
at run time, so we have to test for anchoring. The first char may be unset for |
at run time, so we have to test for anchoring. The first char may be unset for |
4223 |
if ((re->options & PCRE_FIRSTSET) != 0) |
if ((re->options & PCRE_FIRSTSET) != 0) |
4224 |
{ |
{ |
4225 |
first_char = re->first_char; |
first_char = re->first_char; |
4226 |
if ((ims & PCRE_CASELESS) != 0) first_char = pcre_lcc[first_char]; |
if ((ims & PCRE_CASELESS) != 0) first_char = match_block.lcc[first_char]; |
4227 |
} |
} |
4228 |
else |
else |
4229 |
if (!startline && extra != NULL && |
if (!startline && extra != NULL && |
4231 |
start_bits = extra->start_bits; |
start_bits = extra->start_bits; |
4232 |
} |
} |
4233 |
|
|
4234 |
/* Loop for unanchored matches; for anchored regexps the loop runs just once. */ |
/* Loop for unanchored matches; for anchored regexs the loop runs just once. */ |
4235 |
|
|
4236 |
do |
do |
4237 |
{ |
{ |
4248 |
if (first_char >= 0) |
if (first_char >= 0) |
4249 |
{ |
{ |
4250 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
4251 |
while (start_match < end_subject && pcre_lcc[*start_match] != first_char) |
while (start_match < end_subject && |
4252 |
|
match_block.lcc[*start_match] != first_char) |
4253 |
start_match++; |
start_match++; |
4254 |
else |
else |
4255 |
while (start_match < end_subject && *start_match != first_char) |
while (start_match < end_subject && *start_match != first_char) |
4322 |
DPRINTF((">>>> returning %d\n", rc)); |
DPRINTF((">>>> returning %d\n", rc)); |
4323 |
return rc; |
return rc; |
4324 |
} |
} |
4325 |
|
|
4326 |
|
/* This "while" is the end of the "do" above */ |
4327 |
|
|
4328 |
while (!anchored && |
while (!anchored && |
4329 |
match_block.errorcode == PCRE_ERROR_NOMATCH && |
match_block.errorcode == PCRE_ERROR_NOMATCH && |
4330 |
start_match++ < end_subject); |
start_match++ < end_subject); |