9 |
|
|
10 |
Written by: Philip Hazel <ph10@cam.ac.uk> |
Written by: Philip Hazel <ph10@cam.ac.uk> |
11 |
|
|
12 |
Copyright (c) 1998 University of Cambridge |
Copyright (c) 1997-1999 University of Cambridge |
13 |
|
|
14 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
15 |
Permission is granted to anyone to use this software for any purpose on any |
Permission is granted to anyone to use this software for any purpose on any |
25 |
|
|
26 |
3. Altered versions must be plainly marked as such, and must not be |
3. Altered versions must be plainly marked as such, and must not be |
27 |
misrepresented as being the original software. |
misrepresented as being the original software. |
28 |
|
|
29 |
|
4. If PCRE is embedded in any software that is released under the GNU |
30 |
|
General Purpose Licence (GPL), then the terms of that licence shall |
31 |
|
supersede any condition above with which it is incompatible. |
32 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
33 |
*/ |
*/ |
34 |
|
|
111 |
|
|
112 |
static BOOL |
static BOOL |
113 |
compile_regex(int, int, int *, uschar **, const uschar **, const char **, |
compile_regex(int, int, int *, uschar **, const uschar **, const char **, |
114 |
BOOL, int); |
BOOL, int, compile_data *); |
|
|
|
|
/* Structure for passing "static" information around between the functions |
|
|
doing the matching, so that they are thread-safe. */ |
|
|
|
|
|
typedef struct match_data { |
|
|
int errorcode; /* As it says */ |
|
|
int *offset_vector; /* Offset vector */ |
|
|
int offset_end; /* One past the end */ |
|
|
int offset_max; /* The maximum usable for return data */ |
|
|
BOOL offset_overflow; /* Set if too many extractions */ |
|
|
BOOL notbol; /* NOTBOL flag */ |
|
|
BOOL noteol; /* NOTEOL flag */ |
|
|
BOOL endonly; /* Dollar not before final \n */ |
|
|
const uschar *start_subject; /* Start of the subject string */ |
|
|
const uschar *end_subject; /* End of the subject string */ |
|
|
const uschar *end_match_ptr; /* Subject position at end match */ |
|
|
int end_offset_top; /* Highwater mark at end of match */ |
|
|
} match_data; |
|
115 |
|
|
116 |
|
|
117 |
|
|
131 |
|
|
132 |
|
|
133 |
/************************************************* |
/************************************************* |
134 |
|
* Default character tables * |
135 |
|
*************************************************/ |
136 |
|
|
137 |
|
/* A default set of character tables is included in the PCRE binary. Its source |
138 |
|
is built by the maketables auxiliary program, which uses the default C ctypes |
139 |
|
functions, and put in the file chartables.c. These tables are used by PCRE |
140 |
|
whenever the caller of pcre_compile() does not provide an alternate set of |
141 |
|
tables. */ |
142 |
|
|
143 |
|
#include "chartables.c" |
144 |
|
|
145 |
|
|
146 |
|
|
147 |
|
/************************************************* |
148 |
* Return version string * |
* Return version string * |
149 |
*************************************************/ |
*************************************************/ |
150 |
|
|
237 |
bracount number of previous extracting brackets |
bracount number of previous extracting brackets |
238 |
options the options bits |
options the options bits |
239 |
isclass TRUE if inside a character class |
isclass TRUE if inside a character class |
240 |
|
cd pointer to char tables block |
241 |
|
|
242 |
Returns: zero or positive => a data character |
Returns: zero or positive => a data character |
243 |
negative => a special escape sequence |
negative => a special escape sequence |
246 |
|
|
247 |
static int |
static int |
248 |
check_escape(const uschar **ptrptr, const char **errorptr, int bracount, |
check_escape(const uschar **ptrptr, const char **errorptr, int bracount, |
249 |
int options, BOOL isclass) |
int options, BOOL isclass, compile_data *cd) |
250 |
{ |
{ |
251 |
const uschar *ptr = *ptrptr; |
const uschar *ptr = *ptrptr; |
252 |
int c = *(++ptr) & 255; /* Ensure > 0 on signed-char systems */ |
int c = *(++ptr) & 255; /* Ensure > 0 on signed-char systems */ |
289 |
{ |
{ |
290 |
oldptr = ptr; |
oldptr = ptr; |
291 |
c -= '0'; |
c -= '0'; |
292 |
while ((pcre_ctypes[ptr[1]] & ctype_digit) != 0) |
while ((cd->ctypes[ptr[1]] & ctype_digit) != 0) |
293 |
c = c * 10 + *(++ptr) - '0'; |
c = c * 10 + *(++ptr) - '0'; |
294 |
if (c < 10 || c <= bracount) |
if (c < 10 || c <= bracount) |
295 |
{ |
{ |
315 |
|
|
316 |
case '0': |
case '0': |
317 |
c -= '0'; |
c -= '0'; |
318 |
while(i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_digit) != 0 && |
while(i++ < 2 && (cd->ctypes[ptr[1]] & ctype_digit) != 0 && |
319 |
ptr[1] != '8' && ptr[1] != '9') |
ptr[1] != '8' && ptr[1] != '9') |
320 |
c = c * 8 + *(++ptr) - '0'; |
c = c * 8 + *(++ptr) - '0'; |
321 |
break; |
break; |
324 |
|
|
325 |
case 'x': |
case 'x': |
326 |
c = 0; |
c = 0; |
327 |
while (i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_xdigit) != 0) |
while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_xdigit) != 0) |
328 |
{ |
{ |
329 |
ptr++; |
ptr++; |
330 |
c = c * 16 + pcre_lcc[*ptr] - |
c = c * 16 + cd->lcc[*ptr] - |
331 |
(((pcre_ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W'); |
(((cd->ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W'); |
332 |
} |
} |
333 |
break; |
break; |
334 |
|
|
342 |
|
|
343 |
/* A letter is upper-cased; then the 0x40 bit is flipped */ |
/* A letter is upper-cased; then the 0x40 bit is flipped */ |
344 |
|
|
345 |
if (c >= 'a' && c <= 'z') c = pcre_fcc[c]; |
if (c >= 'a' && c <= 'z') c = cd->fcc[c]; |
346 |
c ^= 0x40; |
c ^= 0x40; |
347 |
break; |
break; |
348 |
|
|
349 |
/* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any |
/* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any |
350 |
other alphameric following \ is an error if PCRE_EXTRA was set; otherwise, |
other alphameric following \ is an error if PCRE_EXTRA was set; otherwise, |
351 |
for Perl compatibility, it is a literal. */ |
for Perl compatibility, it is a literal. This code looks a bit odd, but |
352 |
|
there used to be some cases other than the default, and there may be again |
353 |
|
in future, so I haven't "optimized" it. */ |
354 |
|
|
355 |
default: |
default: |
356 |
if ((options & PCRE_EXTRA) != 0) switch(c) |
if ((options & PCRE_EXTRA) != 0) switch(c) |
380 |
|
|
381 |
Arguments: |
Arguments: |
382 |
p pointer to the first char after '{' |
p pointer to the first char after '{' |
383 |
|
cd pointer to char tables block |
384 |
|
|
385 |
Returns: TRUE or FALSE |
Returns: TRUE or FALSE |
386 |
*/ |
*/ |
387 |
|
|
388 |
static BOOL |
static BOOL |
389 |
is_counted_repeat(const uschar *p) |
is_counted_repeat(const uschar *p, compile_data *cd) |
390 |
{ |
{ |
391 |
if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE; |
if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE; |
392 |
while ((pcre_ctypes[*p] & ctype_digit) != 0) p++; |
while ((cd->ctypes[*p] & ctype_digit) != 0) p++; |
393 |
if (*p == '}') return TRUE; |
if (*p == '}') return TRUE; |
394 |
|
|
395 |
if (*p++ != ',') return FALSE; |
if (*p++ != ',') return FALSE; |
396 |
if (*p == '}') return TRUE; |
if (*p == '}') return TRUE; |
397 |
|
|
398 |
if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE; |
if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE; |
399 |
while ((pcre_ctypes[*p] & ctype_digit) != 0) p++; |
while ((cd->ctypes[*p] & ctype_digit) != 0) p++; |
400 |
return (*p == '}'); |
return (*p == '}'); |
401 |
} |
} |
402 |
|
|
416 |
maxp pointer to int for max |
maxp pointer to int for max |
417 |
returned as -1 if no max |
returned as -1 if no max |
418 |
errorptr points to pointer to error message |
errorptr points to pointer to error message |
419 |
|
cd pointer to character tables clock |
420 |
|
|
421 |
Returns: pointer to '}' on success; |
Returns: pointer to '}' on success; |
422 |
current ptr on error, with errorptr set |
current ptr on error, with errorptr set |
423 |
*/ |
*/ |
424 |
|
|
425 |
static const uschar * |
static const uschar * |
426 |
read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr) |
read_repeat_counts(const uschar *p, int *minp, int *maxp, |
427 |
|
const char **errorptr, compile_data *cd) |
428 |
{ |
{ |
429 |
int min = 0; |
int min = 0; |
430 |
int max = -1; |
int max = -1; |
431 |
|
|
432 |
while ((pcre_ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0'; |
while ((cd->ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0'; |
433 |
|
|
434 |
if (*p == '}') max = min; else |
if (*p == '}') max = min; else |
435 |
{ |
{ |
436 |
if (*(++p) != '}') |
if (*(++p) != '}') |
437 |
{ |
{ |
438 |
max = 0; |
max = 0; |
439 |
while((pcre_ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0'; |
while((cd->ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0'; |
440 |
if (max < min) |
if (max < min) |
441 |
{ |
{ |
442 |
*errorptr = ERR4; |
*errorptr = ERR4; |
621 |
/* Scan the pattern, compiling it into the code vector. |
/* Scan the pattern, compiling it into the code vector. |
622 |
|
|
623 |
Arguments: |
Arguments: |
624 |
options the option bits |
options the option bits |
625 |
brackets points to number of brackets used |
brackets points to number of brackets used |
626 |
code points to the pointer to the current code point |
code points to the pointer to the current code point |
627 |
ptrptr points to the current pattern pointer |
ptrptr points to the current pattern pointer |
628 |
errorptr points to pointer to error message |
errorptr points to pointer to error message |
629 |
optchanged set to the value of the last OP_OPT item compiled |
optchanged set to the value of the last OP_OPT item compiled |
630 |
|
cd contains pointers to tables |
631 |
|
|
632 |
Returns: TRUE on success |
Returns: TRUE on success |
633 |
FALSE, with *errorptr set on error |
FALSE, with *errorptr set on error |
634 |
*/ |
*/ |
635 |
|
|
636 |
static BOOL |
static BOOL |
637 |
compile_branch(int options, int *brackets, uschar **codeptr, |
compile_branch(int options, int *brackets, uschar **codeptr, |
638 |
const uschar **ptrptr, const char **errorptr, int *optchanged) |
const uschar **ptrptr, const char **errorptr, int *optchanged, |
639 |
|
compile_data *cd) |
640 |
{ |
{ |
641 |
int repeat_type, op_type; |
int repeat_type, op_type; |
642 |
int repeat_min, repeat_max; |
int repeat_min, repeat_max; |
668 |
c = *ptr; |
c = *ptr; |
669 |
if ((options & PCRE_EXTENDED) != 0) |
if ((options & PCRE_EXTENDED) != 0) |
670 |
{ |
{ |
671 |
if ((pcre_ctypes[c] & ctype_space) != 0) continue; |
if ((cd->ctypes[c] & ctype_space) != 0) continue; |
672 |
if (c == '#') |
if (c == '#') |
673 |
{ |
{ |
674 |
while ((c = *(++ptr)) != 0 && c != '\n'); |
while ((c = *(++ptr)) != 0 && c != '\n'); |
756 |
|
|
757 |
if (c == '\\') |
if (c == '\\') |
758 |
{ |
{ |
759 |
c = check_escape(&ptr, errorptr, *brackets, options, TRUE); |
c = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd); |
760 |
if (-c == ESC_b) c = '\b'; |
if (-c == ESC_b) c = '\b'; |
761 |
else if (c < 0) |
else if (c < 0) |
762 |
{ |
{ |
763 |
|
register const uschar *cbits = cd->cbits; |
764 |
class_charcount = 10; |
class_charcount = 10; |
765 |
switch (-c) |
switch (-c) |
766 |
{ |
{ |
767 |
case ESC_d: |
case ESC_d: |
768 |
for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_digit]; |
for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_digit]; |
769 |
continue; |
continue; |
770 |
|
|
771 |
case ESC_D: |
case ESC_D: |
772 |
for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_digit]; |
for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_digit]; |
773 |
continue; |
continue; |
774 |
|
|
775 |
case ESC_w: |
case ESC_w: |
776 |
for (c = 0; c < 32; c++) |
for (c = 0; c < 32; c++) |
777 |
class[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]); |
class[c] |= (cbits[c+cbit_digit] | cbits[c+cbit_word]); |
778 |
continue; |
continue; |
779 |
|
|
780 |
case ESC_W: |
case ESC_W: |
781 |
for (c = 0; c < 32; c++) |
for (c = 0; c < 32; c++) |
782 |
class[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]); |
class[c] |= ~(cbits[c+cbit_digit] | cbits[c+cbit_word]); |
783 |
continue; |
continue; |
784 |
|
|
785 |
case ESC_s: |
case ESC_s: |
786 |
for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_space]; |
for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_space]; |
787 |
continue; |
continue; |
788 |
|
|
789 |
case ESC_S: |
case ESC_S: |
790 |
for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_space]; |
for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_space]; |
791 |
continue; |
continue; |
792 |
|
|
793 |
default: |
default: |
819 |
|
|
820 |
if (d == '\\') |
if (d == '\\') |
821 |
{ |
{ |
822 |
d = check_escape(&ptr, errorptr, *brackets, options, TRUE); |
d = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd); |
823 |
if (d < 0) |
if (d < 0) |
824 |
{ |
{ |
825 |
if (d == -ESC_b) d = '\b'; else |
if (d == -ESC_b) d = '\b'; else |
841 |
class[c/8] |= (1 << (c&7)); |
class[c/8] |= (1 << (c&7)); |
842 |
if ((options & PCRE_CASELESS) != 0) |
if ((options & PCRE_CASELESS) != 0) |
843 |
{ |
{ |
844 |
int uc = pcre_fcc[c]; /* flip case */ |
int uc = cd->fcc[c]; /* flip case */ |
845 |
class[uc/8] |= (1 << (uc&7)); |
class[uc/8] |= (1 << (uc&7)); |
846 |
} |
} |
847 |
class_charcount++; /* in case a one-char range */ |
class_charcount++; /* in case a one-char range */ |
856 |
class [c/8] |= (1 << (c&7)); |
class [c/8] |= (1 << (c&7)); |
857 |
if ((options & PCRE_CASELESS) != 0) |
if ((options & PCRE_CASELESS) != 0) |
858 |
{ |
{ |
859 |
c = pcre_fcc[c]; /* flip case */ |
c = cd->fcc[c]; /* flip case */ |
860 |
class[c/8] |= (1 << (c&7)); |
class[c/8] |= (1 << (c&7)); |
861 |
} |
} |
862 |
class_charcount++; |
class_charcount++; |
903 |
/* Various kinds of repeat */ |
/* Various kinds of repeat */ |
904 |
|
|
905 |
case '{': |
case '{': |
906 |
if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR; |
if (!is_counted_repeat(ptr+1, cd)) goto NORMAL_CHAR; |
907 |
ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr); |
ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr, cd); |
908 |
if (*errorptr != NULL) goto FAILED; |
if (*errorptr != NULL) goto FAILED; |
909 |
goto REPEAT; |
goto REPEAT; |
910 |
|
|
1091 |
else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE || |
else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE || |
1092 |
(int)*previous == OP_COND) |
(int)*previous == OP_COND) |
1093 |
{ |
{ |
1094 |
int i, ketoffset = 0; |
register int i; |
1095 |
|
int ketoffset = 0; |
1096 |
int len = code - previous; |
int len = code - previous; |
1097 |
|
uschar *bralink = NULL; |
1098 |
|
|
1099 |
/* If the maximum repeat count is unlimited, find the end of the bracket |
/* If the maximum repeat count is unlimited, find the end of the bracket |
1100 |
by scanning through from the start, and compute the offset back to it |
by scanning through from the start, and compute the offset back to it |
1109 |
ketoffset = code - ket; |
ketoffset = code - ket; |
1110 |
} |
} |
1111 |
|
|
1112 |
|
/* The case of a zero minimum is special because of the need to stick |
1113 |
|
OP_BRAZERO in front of it, and because the group appears once in the |
1114 |
|
data, whereas in other cases it appears the minimum number of times. For |
1115 |
|
this reason, it is simplest to treat this case separately, as otherwise |
1116 |
|
the code gets far too mess. There are several special subcases when the |
1117 |
|
minimum is zero. */ |
1118 |
|
|
1119 |
|
if (repeat_min == 0) |
1120 |
|
{ |
1121 |
|
/* If the maximum is also zero, we just omit the group from the output |
1122 |
|
altogether. */ |
1123 |
|
|
1124 |
|
if (repeat_max == 0) |
1125 |
|
{ |
1126 |
|
code = previous; |
1127 |
|
previous = NULL; |
1128 |
|
break; |
1129 |
|
} |
1130 |
|
|
1131 |
|
/* If the maximum is 1 or unlimited, we just have to stick in the |
1132 |
|
BRAZERO and do no more at this point. */ |
1133 |
|
|
1134 |
|
if (repeat_max <= 1) |
1135 |
|
{ |
1136 |
|
memmove(previous+1, previous, len); |
1137 |
|
code++; |
1138 |
|
*previous++ = OP_BRAZERO + repeat_type; |
1139 |
|
} |
1140 |
|
|
1141 |
|
/* If the maximum is greater than 1 and limited, we have to replicate |
1142 |
|
in a nested fashion, sticking OP_BRAZERO before each set of brackets. |
1143 |
|
The first one has to be handled carefully because it's the original |
1144 |
|
copy, which has to be moved up. The remainder can be handled by code |
1145 |
|
that is common with the non-zero minimum case below. We just have to |
1146 |
|
adjust the value or repeat_max, since one less copy is required. */ |
1147 |
|
|
1148 |
|
else |
1149 |
|
{ |
1150 |
|
int offset; |
1151 |
|
memmove(previous+4, previous, len); |
1152 |
|
code += 4; |
1153 |
|
*previous++ = OP_BRAZERO + repeat_type; |
1154 |
|
*previous++ = OP_BRA; |
1155 |
|
|
1156 |
|
/* We chain together the bracket offset fields that have to be |
1157 |
|
filled in later when the ends of the brackets are reached. */ |
1158 |
|
|
1159 |
|
offset = (bralink == NULL)? 0 : previous - bralink; |
1160 |
|
bralink = previous; |
1161 |
|
*previous++ = offset >> 8; |
1162 |
|
*previous++ = offset & 255; |
1163 |
|
} |
1164 |
|
|
1165 |
|
repeat_max--; |
1166 |
|
} |
1167 |
|
|
1168 |
|
/* If the minimum is greater than zero, replicate the group as many |
1169 |
|
times as necessary, and adjust the maximum to the number of subsequent |
1170 |
|
copies that we need. */ |
1171 |
|
|
1172 |
|
else |
1173 |
|
{ |
1174 |
|
for (i = 1; i < repeat_min; i++) |
1175 |
|
{ |
1176 |
|
memcpy(code, previous, len); |
1177 |
|
code += len; |
1178 |
|
} |
1179 |
|
if (repeat_max > 0) repeat_max -= repeat_min; |
1180 |
|
} |
1181 |
|
|
1182 |
|
/* This code is common to both the zero and non-zero minimum cases. If |
1183 |
|
the maximum is limited, it replicates the group in a nested fashion, |
1184 |
|
remembering the bracket starts on a stack. In the case of a zero minimum, |
1185 |
|
the first one was set up above. In all cases the repeat_max now specifies |
1186 |
|
the number of additional copies needed. */ |
1187 |
|
|
1188 |
|
if (repeat_max >= 0) |
1189 |
|
{ |
1190 |
|
for (i = repeat_max - 1; i >= 0; i--) |
1191 |
|
{ |
1192 |
|
*code++ = OP_BRAZERO + repeat_type; |
1193 |
|
|
1194 |
|
/* All but the final copy start a new nesting, maintaining the |
1195 |
|
chain of brackets outstanding. */ |
1196 |
|
|
1197 |
|
if (i != 0) |
1198 |
|
{ |
1199 |
|
int offset; |
1200 |
|
*code++ = OP_BRA; |
1201 |
|
offset = (bralink == NULL)? 0 : code - bralink; |
1202 |
|
bralink = code; |
1203 |
|
*code++ = offset >> 8; |
1204 |
|
*code++ = offset & 255; |
1205 |
|
} |
1206 |
|
|
1207 |
|
memcpy(code, previous, len); |
1208 |
|
code += len; |
1209 |
|
} |
1210 |
|
|
1211 |
|
/* Now chain through the pending brackets, and fill in their length |
1212 |
|
fields (which are holding the chain links pro tem). */ |
1213 |
|
|
1214 |
|
while (bralink != NULL) |
1215 |
|
{ |
1216 |
|
int oldlinkoffset; |
1217 |
|
int offset = code - bralink + 1; |
1218 |
|
uschar *bra = code - offset; |
1219 |
|
oldlinkoffset = (bra[1] << 8) + bra[2]; |
1220 |
|
bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset; |
1221 |
|
*code++ = OP_KET; |
1222 |
|
*code++ = bra[1] = offset >> 8; |
1223 |
|
*code++ = bra[2] = (offset & 255); |
1224 |
|
} |
1225 |
|
} |
1226 |
|
|
1227 |
|
/* If the maximum is unlimited, set a repeater in the final copy. We |
1228 |
|
can't just offset backwards from the current code point, because we |
1229 |
|
don't know if there's been an options resetting after the ket. The |
1230 |
|
correct offset was computed above. */ |
1231 |
|
|
1232 |
|
else code[-ketoffset] = OP_KETRMAX + repeat_type; |
1233 |
|
|
1234 |
|
|
1235 |
|
#ifdef NEVER |
1236 |
/* If the minimum is greater than zero, and the maximum is unlimited or |
/* If the minimum is greater than zero, and the maximum is unlimited or |
1237 |
equal to the minimum, the first copy remains where it is, and is |
equal to the minimum, the first copy remains where it is, and is |
1238 |
replicated up to the minimum number of times. This case includes the + |
replicated up to the minimum number of times. This case includes the + |
1280 |
correct offset was computed above. */ |
correct offset was computed above. */ |
1281 |
|
|
1282 |
if (repeat_max == -1) code[-ketoffset] = OP_KETRMAX + repeat_type; |
if (repeat_max == -1) code[-ketoffset] = OP_KETRMAX + repeat_type; |
1283 |
|
#endif |
1284 |
|
|
1285 |
|
|
1286 |
} |
} |
1287 |
|
|
1288 |
/* Else there's some kind of shambles */ |
/* Else there's some kind of shambles */ |
1329 |
|
|
1330 |
case '(': |
case '(': |
1331 |
bravalue = OP_COND; /* Conditional group */ |
bravalue = OP_COND; /* Conditional group */ |
1332 |
if ((pcre_ctypes[*(++ptr)] & ctype_digit) != 0) |
if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0) |
1333 |
{ |
{ |
1334 |
condref = *ptr - '0'; |
condref = *ptr - '0'; |
1335 |
while (*(++ptr) != ')') condref = condref*10 + *ptr - '0'; |
while (*(++ptr) != ')') condref = condref*10 + *ptr - '0'; |
1462 |
errorptr, /* Where to put an error message */ |
errorptr, /* Where to put an error message */ |
1463 |
(bravalue == OP_ASSERTBACK || |
(bravalue == OP_ASSERTBACK || |
1464 |
bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ |
bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ |
1465 |
condref)) /* Condition reference number */ |
condref, /* Condition reference number */ |
1466 |
|
cd)) /* Tables block */ |
1467 |
goto FAILED; |
goto FAILED; |
1468 |
|
|
1469 |
/* At the end of compiling, code is still pointing to the start of the |
/* At the end of compiling, code is still pointing to the start of the |
1511 |
|
|
1512 |
case '\\': |
case '\\': |
1513 |
tempptr = ptr; |
tempptr = ptr; |
1514 |
c = check_escape(&ptr, errorptr, *brackets, options, FALSE); |
c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd); |
1515 |
|
|
1516 |
/* Handle metacharacters introduced by \. For ones like \d, the ESC_ values |
/* Handle metacharacters introduced by \. For ones like \d, the ESC_ values |
1517 |
are arranged to be the negation of the corresponding OP_values. For the |
are arranged to be the negation of the corresponding OP_values. For the |
1556 |
{ |
{ |
1557 |
if ((options & PCRE_EXTENDED) != 0) |
if ((options & PCRE_EXTENDED) != 0) |
1558 |
{ |
{ |
1559 |
if ((pcre_ctypes[c] & ctype_space) != 0) continue; |
if ((cd->ctypes[c] & ctype_space) != 0) continue; |
1560 |
if (c == '#') |
if (c == '#') |
1561 |
{ |
{ |
1562 |
while ((c = *(++ptr)) != 0 && c != '\n'); |
while ((c = *(++ptr)) != 0 && c != '\n'); |
1572 |
if (c == '\\') |
if (c == '\\') |
1573 |
{ |
{ |
1574 |
tempptr = ptr; |
tempptr = ptr; |
1575 |
c = check_escape(&ptr, errorptr, *brackets, options, FALSE); |
c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd); |
1576 |
if (c < 0) { ptr = tempptr; break; } |
if (c < 0) { ptr = tempptr; break; } |
1577 |
} |
} |
1578 |
|
|
1584 |
|
|
1585 |
/* This "while" is the end of the "do" above. */ |
/* This "while" is the end of the "do" above. */ |
1586 |
|
|
1587 |
while (length < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0); |
while (length < 255 && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0); |
1588 |
|
|
1589 |
/* Compute the length and set it in the data vector, and advance to |
/* Compute the length and set it in the data vector, and advance to |
1590 |
the next state. */ |
the next state. */ |
1629 |
errorptr -> pointer to error message |
errorptr -> pointer to error message |
1630 |
lookbehind TRUE if this is a lookbehind assertion |
lookbehind TRUE if this is a lookbehind assertion |
1631 |
condref > 0 for OPT_CREF setting at start of conditional group |
condref > 0 for OPT_CREF setting at start of conditional group |
1632 |
|
cd points to the data block with tables pointers |
1633 |
|
|
1634 |
Returns: TRUE on success |
Returns: TRUE on success |
1635 |
*/ |
*/ |
1636 |
|
|
1637 |
static BOOL |
static BOOL |
1638 |
compile_regex(int options, int optchanged, int *brackets, uschar **codeptr, |
compile_regex(int options, int optchanged, int *brackets, uschar **codeptr, |
1639 |
const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref) |
const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref, |
1640 |
|
compile_data *cd) |
1641 |
{ |
{ |
1642 |
const uschar *ptr = *ptrptr; |
const uschar *ptr = *ptrptr; |
1643 |
uschar *code = *codeptr; |
uschar *code = *codeptr; |
1684 |
|
|
1685 |
/* Now compile the branch */ |
/* Now compile the branch */ |
1686 |
|
|
1687 |
if (!compile_branch(options, brackets, &code, &ptr, errorptr, &optchanged)) |
if (!compile_branch(options,brackets,&code,&ptr,errorptr,&optchanged,cd)) |
1688 |
{ |
{ |
1689 |
*ptrptr = ptr; |
*ptrptr = ptr; |
1690 |
return FALSE; |
return FALSE; |
1954 |
options various option bits |
options various option bits |
1955 |
errorptr pointer to pointer to error text |
errorptr pointer to pointer to error text |
1956 |
erroroffset ptr offset in pattern where error was detected |
erroroffset ptr offset in pattern where error was detected |
1957 |
|
tables pointer to character tables or NULL |
1958 |
|
|
1959 |
Returns: pointer to compiled data block, or NULL on error, |
Returns: pointer to compiled data block, or NULL on error, |
1960 |
with errorptr and erroroffset set |
with errorptr and erroroffset set |
1962 |
|
|
1963 |
pcre * |
pcre * |
1964 |
pcre_compile(const char *pattern, int options, const char **errorptr, |
pcre_compile(const char *pattern, int options, const char **errorptr, |
1965 |
int *erroroffset) |
int *erroroffset, const unsigned char *tables) |
1966 |
{ |
{ |
1967 |
real_pcre *re; |
real_pcre *re; |
1968 |
int length = 3; /* For initial BRA plus length */ |
int length = 3; /* For initial BRA plus length */ |
1975 |
unsigned int brastackptr = 0; |
unsigned int brastackptr = 0; |
1976 |
uschar *code; |
uschar *code; |
1977 |
const uschar *ptr; |
const uschar *ptr; |
1978 |
|
compile_data compile_block; |
1979 |
int brastack[BRASTACK_SIZE]; |
int brastack[BRASTACK_SIZE]; |
1980 |
uschar bralenstack[BRASTACK_SIZE]; |
uschar bralenstack[BRASTACK_SIZE]; |
1981 |
|
|
2004 |
return NULL; |
return NULL; |
2005 |
} |
} |
2006 |
|
|
2007 |
|
/* Set up pointers to the individual character tables */ |
2008 |
|
|
2009 |
|
if (tables == NULL) tables = pcre_default_tables; |
2010 |
|
compile_block.lcc = tables + lcc_offset; |
2011 |
|
compile_block.fcc = tables + fcc_offset; |
2012 |
|
compile_block.cbits = tables + cbits_offset; |
2013 |
|
compile_block.ctypes = tables + ctypes_offset; |
2014 |
|
|
2015 |
|
/* Reflect pattern for debugging output */ |
2016 |
|
|
2017 |
DPRINTF(("------------------------------------------------------------------\n")); |
DPRINTF(("------------------------------------------------------------------\n")); |
2018 |
DPRINTF(("%s\n", pattern)); |
DPRINTF(("%s\n", pattern)); |
2019 |
|
|
2032 |
|
|
2033 |
if ((options & PCRE_EXTENDED) != 0) |
if ((options & PCRE_EXTENDED) != 0) |
2034 |
{ |
{ |
2035 |
if ((pcre_ctypes[c] & ctype_space) != 0) continue; |
if ((compile_block.ctypes[c] & ctype_space) != 0) continue; |
2036 |
if (c == '#') |
if (c == '#') |
2037 |
{ |
{ |
2038 |
while ((c = *(++ptr)) != 0 && c != '\n'); |
while ((c = *(++ptr)) != 0 && c != '\n'); |
2050 |
case '\\': |
case '\\': |
2051 |
{ |
{ |
2052 |
const uschar *save_ptr = ptr; |
const uschar *save_ptr = ptr; |
2053 |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
c = check_escape(&ptr, errorptr, bracount, options, FALSE, &compile_block); |
2054 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
2055 |
if (c >= 0) |
if (c >= 0) |
2056 |
{ |
{ |
2070 |
int refnum = -c - ESC_REF; |
int refnum = -c - ESC_REF; |
2071 |
if (refnum > top_backref) top_backref = refnum; |
if (refnum > top_backref) top_backref = refnum; |
2072 |
length++; /* For single back reference */ |
length++; /* For single back reference */ |
2073 |
if (ptr[1] == '{' && is_counted_repeat(ptr+2)) |
if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block)) |
2074 |
{ |
{ |
2075 |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr); |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block); |
2076 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
2077 |
if ((min == 0 && (max == 1 || max == -1)) || |
if ((min == 0 && (max == 1 || max == -1)) || |
2078 |
(min == 1 && max == -1)) |
(min == 1 && max == -1)) |
2096 |
or back reference. */ |
or back reference. */ |
2097 |
|
|
2098 |
case '{': |
case '{': |
2099 |
if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR; |
if (!is_counted_repeat(ptr+1, &compile_block)) goto NORMAL_CHAR; |
2100 |
ptr = read_repeat_counts(ptr+1, &min, &max, errorptr); |
ptr = read_repeat_counts(ptr+1, &min, &max, errorptr, &compile_block); |
2101 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
2102 |
if ((min == 0 && (max == 1 || max == -1)) || |
if ((min == 0 && (max == 1 || max == -1)) || |
2103 |
(min == 1 && max == -1)) |
(min == 1 && max == -1)) |
2132 |
{ |
{ |
2133 |
if (*ptr == '\\') |
if (*ptr == '\\') |
2134 |
{ |
{ |
2135 |
int ch = check_escape(&ptr, errorptr, bracount, options, TRUE); |
int ch = check_escape(&ptr, errorptr, bracount, options, TRUE, |
2136 |
|
&compile_block); |
2137 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
2138 |
if (-ch == ESC_b) class_charcount++; else class_charcount = 10; |
if (-ch == ESC_b) class_charcount++; else class_charcount = 10; |
2139 |
} |
} |
2150 |
|
|
2151 |
/* A repeat needs either 1 or 5 bytes. */ |
/* A repeat needs either 1 or 5 bytes. */ |
2152 |
|
|
2153 |
if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2)) |
if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block)) |
2154 |
{ |
{ |
2155 |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr); |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block); |
2156 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
2157 |
if ((min == 0 && (max == 1 || max == -1)) || |
if ((min == 0 && (max == 1 || max == -1)) || |
2158 |
(min == 1 && max == -1)) |
(min == 1 && max == -1)) |
2218 |
group. */ |
group. */ |
2219 |
|
|
2220 |
case '(': |
case '(': |
2221 |
if ((pcre_ctypes[ptr[3]] & ctype_digit) != 0) |
if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0) |
2222 |
{ |
{ |
2223 |
ptr += 4; |
ptr += 4; |
2224 |
length += 2; |
length += 2; |
2225 |
while ((pcre_ctypes[*ptr] & ctype_digit) != 0) ptr++; |
while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++; |
2226 |
if (*ptr != ')') |
if (*ptr != ')') |
2227 |
{ |
{ |
2228 |
*errorptr = ERR26; |
*errorptr = ERR26; |
2391 |
/* Leave ptr at the final char; for read_repeat_counts this happens |
/* Leave ptr at the final char; for read_repeat_counts this happens |
2392 |
automatically; for the others we need an increment. */ |
automatically; for the others we need an increment. */ |
2393 |
|
|
2394 |
if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2)) |
if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2, &compile_block)) |
2395 |
{ |
{ |
2396 |
ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr); |
ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr, |
2397 |
|
&compile_block); |
2398 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
2399 |
} |
} |
2400 |
else if (c == '*') { minval = 0; maxval = -1; ptr++; } |
else if (c == '*') { minval = 0; maxval = -1; ptr++; } |
2401 |
else if (c == '+') { maxval = -1; ptr++; } |
else if (c == '+') { maxval = -1; ptr++; } |
2402 |
else if (c == '?') { minval = 0; ptr++; } |
else if (c == '?') { minval = 0; ptr++; } |
2403 |
|
|
2404 |
/* If there is a minimum > 1 we have to replicate up to minval-1 times; |
/* If the minimum is zero, we have to allow for an OP_BRAZERO before the |
2405 |
if there is a limited maximum we have to replicate up to maxval-1 times |
group, and if the maximum is greater than zero, we have to replicate |
2406 |
and allow for a BRAZERO item before each optional copy, as we also have |
maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting |
2407 |
to do before the first copy if the minimum is zero. */ |
bracket set - hence the 7. */ |
2408 |
|
|
2409 |
if (minval == 0) length++; |
if (minval == 0) |
2410 |
else if (minval > 1) length += (minval - 1) * duplength; |
{ |
2411 |
if (maxval > minval) length += (maxval - minval) * (duplength + 1); |
length++; |
2412 |
|
if (maxval > 0) length += (maxval - 1) * (duplength + 7); |
2413 |
|
} |
2414 |
|
|
2415 |
|
/* When the minimum is greater than zero, 1 we have to replicate up to |
2416 |
|
minval-1 times, with no additions required in the copies. Then, if |
2417 |
|
there is a limited maximum we have to replicate up to maxval-1 times |
2418 |
|
allowing for a BRAZERO item before each optional copy and nesting |
2419 |
|
brackets for all but one of the optional copies. */ |
2420 |
|
|
2421 |
|
else |
2422 |
|
{ |
2423 |
|
length += (minval - 1) * duplength; |
2424 |
|
if (maxval > minval) /* Need this test as maxval=-1 means no limit */ |
2425 |
|
length += (maxval - minval) * (duplength + 7) - 6; |
2426 |
|
} |
2427 |
} |
} |
2428 |
continue; |
continue; |
2429 |
|
|
2440 |
{ |
{ |
2441 |
if ((options & PCRE_EXTENDED) != 0) |
if ((options & PCRE_EXTENDED) != 0) |
2442 |
{ |
{ |
2443 |
if ((pcre_ctypes[c] & ctype_space) != 0) continue; |
if ((compile_block.ctypes[c] & ctype_space) != 0) continue; |
2444 |
if (c == '#') |
if (c == '#') |
2445 |
{ |
{ |
2446 |
while ((c = *(++ptr)) != 0 && c != '\n'); |
while ((c = *(++ptr)) != 0 && c != '\n'); |
2454 |
if (c == '\\') |
if (c == '\\') |
2455 |
{ |
{ |
2456 |
const uschar *saveptr = ptr; |
const uschar *saveptr = ptr; |
2457 |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
c = check_escape(&ptr, errorptr, bracount, options, FALSE, |
2458 |
|
&compile_block); |
2459 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
2460 |
if (c < 0) { ptr = saveptr; break; } |
if (c < 0) { ptr = saveptr; break; } |
2461 |
} |
} |
2467 |
|
|
2468 |
/* This "while" is the end of the "do" above. */ |
/* This "while" is the end of the "do" above. */ |
2469 |
|
|
2470 |
while (runlength < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0); |
while (runlength < 255 && |
2471 |
|
(compile_block.ctypes[c = *(++ptr)] & ctype_meta) == 0); |
2472 |
|
|
2473 |
ptr--; |
ptr--; |
2474 |
length += runlength; |
length += runlength; |
2503 |
|
|
2504 |
re->magic_number = MAGIC_NUMBER; |
re->magic_number = MAGIC_NUMBER; |
2505 |
re->options = options; |
re->options = options; |
2506 |
|
re->tables = tables; |
2507 |
|
|
2508 |
/* Set up a starting, non-extracting bracket, then compile the expression. On |
/* Set up a starting, non-extracting bracket, then compile the expression. On |
2509 |
error, *errorptr will be set non-NULL, so we don't need to look at the result |
error, *errorptr will be set non-NULL, so we don't need to look at the result |
2513 |
code = re->code; |
code = re->code; |
2514 |
*code = OP_BRA; |
*code = OP_BRA; |
2515 |
bracount = 0; |
bracount = 0; |
2516 |
(void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1); |
(void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1, |
2517 |
|
&compile_block); |
2518 |
re->top_bracket = bracount; |
re->top_bracket = bracount; |
2519 |
re->top_backref = top_backref; |
re->top_backref = top_backref; |
2520 |
|
|
2811 |
|
|
2812 |
|
|
2813 |
/************************************************* |
/************************************************* |
|
* Match a character type * |
|
|
*************************************************/ |
|
|
|
|
|
/* Not used in all the places it might be as it's sometimes faster |
|
|
to put the code inline. |
|
|
|
|
|
Arguments: |
|
|
type the character type |
|
|
c the character |
|
|
dotall the dotall flag |
|
|
|
|
|
Returns: TRUE if character is of the type |
|
|
*/ |
|
|
|
|
|
static BOOL |
|
|
match_type(int type, int c, BOOL dotall) |
|
|
{ |
|
|
|
|
|
#ifdef DEBUG |
|
|
if (isprint(c)) printf("matching subject %c against ", c); |
|
|
else printf("matching subject \\x%02x against ", c); |
|
|
printf("%s\n", OP_names[type]); |
|
|
#endif |
|
|
|
|
|
switch(type) |
|
|
{ |
|
|
case OP_ANY: return dotall || c != '\n'; |
|
|
case OP_NOT_DIGIT: return (pcre_ctypes[c] & ctype_digit) == 0; |
|
|
case OP_DIGIT: return (pcre_ctypes[c] & ctype_digit) != 0; |
|
|
case OP_NOT_WHITESPACE: return (pcre_ctypes[c] & ctype_space) == 0; |
|
|
case OP_WHITESPACE: return (pcre_ctypes[c] & ctype_space) != 0; |
|
|
case OP_NOT_WORDCHAR: return (pcre_ctypes[c] & ctype_word) == 0; |
|
|
case OP_WORDCHAR: return (pcre_ctypes[c] & ctype_word) != 0; |
|
|
} |
|
|
return FALSE; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/************************************************* |
|
2814 |
* Match a back-reference * |
* Match a back-reference * |
2815 |
*************************************************/ |
*************************************************/ |
2816 |
|
|
2853 |
/* Separate the caselesss case for speed */ |
/* Separate the caselesss case for speed */ |
2854 |
|
|
2855 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
2856 |
{ while (length-- > 0) if (pcre_lcc[*p++] != pcre_lcc[*eptr++]) return FALSE; } |
{ |
2857 |
|
while (length-- > 0) |
2858 |
|
if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; |
2859 |
|
} |
2860 |
else |
else |
2861 |
{ while (length-- > 0) if (*p++ != *eptr++) return FALSE; } |
{ while (length-- > 0) if (*p++ != *eptr++) return FALSE; } |
2862 |
|
|
2919 |
int number = op - OP_BRA; |
int number = op - OP_BRA; |
2920 |
int offset = number << 1; |
int offset = number << 1; |
2921 |
|
|
2922 |
DPRINTF(("start bracket %d\n", number)); |
#ifdef DEBUG |
2923 |
|
printf("start bracket %d subject=", number); |
2924 |
|
pchars(eptr, 16, TRUE, md); |
2925 |
|
printf("\n"); |
2926 |
|
#endif |
2927 |
|
|
2928 |
if (offset < md->offset_max) |
if (offset < md->offset_max) |
2929 |
{ |
{ |
3313 |
case OP_WORD_BOUNDARY: |
case OP_WORD_BOUNDARY: |
3314 |
{ |
{ |
3315 |
BOOL prev_is_word = (eptr != md->start_subject) && |
BOOL prev_is_word = (eptr != md->start_subject) && |
3316 |
((pcre_ctypes[eptr[-1]] & ctype_word) != 0); |
((md->ctypes[eptr[-1]] & ctype_word) != 0); |
3317 |
BOOL cur_is_word = (eptr < md->end_subject) && |
BOOL cur_is_word = (eptr < md->end_subject) && |
3318 |
((pcre_ctypes[*eptr] & ctype_word) != 0); |
((md->ctypes[*eptr] & ctype_word) != 0); |
3319 |
if ((*ecode++ == OP_WORD_BOUNDARY)? |
if ((*ecode++ == OP_WORD_BOUNDARY)? |
3320 |
cur_is_word == prev_is_word : cur_is_word != prev_is_word) |
cur_is_word == prev_is_word : cur_is_word != prev_is_word) |
3321 |
return FALSE; |
return FALSE; |
3332 |
break; |
break; |
3333 |
|
|
3334 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
3335 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) != 0) |
if (eptr >= md->end_subject || |
3336 |
|
(md->ctypes[*eptr++] & ctype_digit) != 0) |
3337 |
return FALSE; |
return FALSE; |
3338 |
ecode++; |
ecode++; |
3339 |
break; |
break; |
3340 |
|
|
3341 |
case OP_DIGIT: |
case OP_DIGIT: |
3342 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) == 0) |
if (eptr >= md->end_subject || |
3343 |
|
(md->ctypes[*eptr++] & ctype_digit) == 0) |
3344 |
return FALSE; |
return FALSE; |
3345 |
ecode++; |
ecode++; |
3346 |
break; |
break; |
3347 |
|
|
3348 |
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
3349 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) != 0) |
if (eptr >= md->end_subject || |
3350 |
|
(md->ctypes[*eptr++] & ctype_space) != 0) |
3351 |
return FALSE; |
return FALSE; |
3352 |
ecode++; |
ecode++; |
3353 |
break; |
break; |
3354 |
|
|
3355 |
case OP_WHITESPACE: |
case OP_WHITESPACE: |
3356 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) == 0) |
if (eptr >= md->end_subject || |
3357 |
|
(md->ctypes[*eptr++] & ctype_space) == 0) |
3358 |
return FALSE; |
return FALSE; |
3359 |
ecode++; |
ecode++; |
3360 |
break; |
break; |
3361 |
|
|
3362 |
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
3363 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) != 0) |
if (eptr >= md->end_subject || |
3364 |
|
(md->ctypes[*eptr++] & ctype_word) != 0) |
3365 |
return FALSE; |
return FALSE; |
3366 |
ecode++; |
ecode++; |
3367 |
break; |
break; |
3368 |
|
|
3369 |
case OP_WORDCHAR: |
case OP_WORDCHAR: |
3370 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) == 0) |
if (eptr >= md->end_subject || |
3371 |
|
(md->ctypes[*eptr++] & ctype_word) == 0) |
3372 |
return FALSE; |
return FALSE; |
3373 |
ecode++; |
ecode++; |
3374 |
break; |
break; |
3600 |
if (length > md->end_subject - eptr) return FALSE; |
if (length > md->end_subject - eptr) return FALSE; |
3601 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
3602 |
{ |
{ |
3603 |
while (length-- > 0) if (pcre_lcc[*ecode++] != pcre_lcc[*eptr++]) return FALSE; |
while (length-- > 0) |
3604 |
|
if (md->lcc[*ecode++] != md->lcc[*eptr++]) |
3605 |
|
return FALSE; |
3606 |
} |
} |
3607 |
else |
else |
3608 |
{ |
{ |
3659 |
|
|
3660 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
3661 |
{ |
{ |
3662 |
c = pcre_lcc[c]; |
c = md->lcc[c]; |
3663 |
for (i = 1; i <= min; i++) if (c != pcre_lcc[*eptr++]) return FALSE; |
for (i = 1; i <= min; i++) |
3664 |
|
if (c != md->lcc[*eptr++]) return FALSE; |
3665 |
if (min == max) continue; |
if (min == max) continue; |
3666 |
if (minimize) |
if (minimize) |
3667 |
{ |
{ |
3669 |
{ |
{ |
3670 |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) |
3671 |
return TRUE; |
return TRUE; |
3672 |
if (i >= max || eptr >= md->end_subject || c != pcre_lcc[*eptr++]) |
if (i >= max || eptr >= md->end_subject || |
3673 |
|
c != md->lcc[*eptr++]) |
3674 |
return FALSE; |
return FALSE; |
3675 |
} |
} |
3676 |
/* Control never gets here */ |
/* Control never gets here */ |
3680 |
const uschar *pp = eptr; |
const uschar *pp = eptr; |
3681 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3682 |
{ |
{ |
3683 |
if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break; |
if (eptr >= md->end_subject || c != md->lcc[*eptr]) break; |
3684 |
eptr++; |
eptr++; |
3685 |
} |
} |
3686 |
while (eptr >= pp) |
while (eptr >= pp) |
3730 |
ecode++; |
ecode++; |
3731 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
3732 |
{ |
{ |
3733 |
if (pcre_lcc[*ecode++] == pcre_lcc[*eptr++]) return FALSE; |
if (md->lcc[*ecode++] == md->lcc[*eptr++]) return FALSE; |
3734 |
} |
} |
3735 |
else |
else |
3736 |
{ |
{ |
3790 |
|
|
3791 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
3792 |
{ |
{ |
3793 |
c = pcre_lcc[c]; |
c = md->lcc[c]; |
3794 |
for (i = 1; i <= min; i++) if (c == pcre_lcc[*eptr++]) return FALSE; |
for (i = 1; i <= min; i++) |
3795 |
|
if (c == md->lcc[*eptr++]) return FALSE; |
3796 |
if (min == max) continue; |
if (min == max) continue; |
3797 |
if (minimize) |
if (minimize) |
3798 |
{ |
{ |
3800 |
{ |
{ |
3801 |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) |
3802 |
return TRUE; |
return TRUE; |
3803 |
if (i >= max || eptr >= md->end_subject || c == pcre_lcc[*eptr++]) |
if (i >= max || eptr >= md->end_subject || |
3804 |
|
c == md->lcc[*eptr++]) |
3805 |
return FALSE; |
return FALSE; |
3806 |
} |
} |
3807 |
/* Control never gets here */ |
/* Control never gets here */ |
3811 |
const uschar *pp = eptr; |
const uschar *pp = eptr; |
3812 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3813 |
{ |
{ |
3814 |
if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break; |
if (eptr >= md->end_subject || c == md->lcc[*eptr]) break; |
3815 |
eptr++; |
eptr++; |
3816 |
} |
} |
3817 |
while (eptr >= pp) |
while (eptr >= pp) |
3905 |
|
|
3906 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
3907 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
3908 |
if ((pcre_ctypes[*eptr++] & ctype_digit) != 0) return FALSE; |
if ((md->ctypes[*eptr++] & ctype_digit) != 0) return FALSE; |
3909 |
break; |
break; |
3910 |
|
|
3911 |
case OP_DIGIT: |
case OP_DIGIT: |
3912 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
3913 |
if ((pcre_ctypes[*eptr++] & ctype_digit) == 0) return FALSE; |
if ((md->ctypes[*eptr++] & ctype_digit) == 0) return FALSE; |
3914 |
break; |
break; |
3915 |
|
|
3916 |
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
3917 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
3918 |
if ((pcre_ctypes[*eptr++] & ctype_space) != 0) return FALSE; |
if ((md->ctypes[*eptr++] & ctype_space) != 0) return FALSE; |
3919 |
break; |
break; |
3920 |
|
|
3921 |
case OP_WHITESPACE: |
case OP_WHITESPACE: |
3922 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
3923 |
if ((pcre_ctypes[*eptr++] & ctype_space) == 0) return FALSE; |
if ((md->ctypes[*eptr++] & ctype_space) == 0) return FALSE; |
3924 |
break; |
break; |
3925 |
|
|
3926 |
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
3927 |
for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) != 0) |
for (i = 1; i <= min; i++) |
3928 |
return FALSE; |
if ((md->ctypes[*eptr++] & ctype_word) != 0) |
3929 |
|
return FALSE; |
3930 |
break; |
break; |
3931 |
|
|
3932 |
case OP_WORDCHAR: |
case OP_WORDCHAR: |
3933 |
for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) == 0) |
for (i = 1; i <= min; i++) |
3934 |
return FALSE; |
if ((md->ctypes[*eptr++] & ctype_word) == 0) |
3935 |
|
return FALSE; |
3936 |
break; |
break; |
3937 |
} |
} |
3938 |
|
|
3941 |
if (min == max) continue; |
if (min == max) continue; |
3942 |
|
|
3943 |
/* If minimizing, we have to test the rest of the pattern before each |
/* If minimizing, we have to test the rest of the pattern before each |
3944 |
subsequent match, so inlining isn't much help; just use the function. */ |
subsequent match. */ |
3945 |
|
|
3946 |
if (minimize) |
if (minimize) |
3947 |
{ |
{ |
3948 |
for (i = min;; i++) |
for (i = min;; i++) |
3949 |
{ |
{ |
3950 |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) return TRUE; |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) return TRUE; |
3951 |
if (i >= max || eptr >= md->end_subject || |
if (i >= max || eptr >= md->end_subject) return FALSE; |
3952 |
!match_type(ctype, *eptr++, (ims & PCRE_DOTALL) != 0)) |
|
3953 |
return FALSE; |
c = *eptr++; |
3954 |
|
switch(ctype) |
3955 |
|
{ |
3956 |
|
case OP_ANY: |
3957 |
|
if ((ims & PCRE_DOTALL) == 0 && c == '\n') return FALSE; |
3958 |
|
break; |
3959 |
|
|
3960 |
|
case OP_NOT_DIGIT: |
3961 |
|
if ((md->ctypes[c] & ctype_digit) != 0) return FALSE; |
3962 |
|
break; |
3963 |
|
|
3964 |
|
case OP_DIGIT: |
3965 |
|
if ((md->ctypes[c] & ctype_digit) == 0) return FALSE; |
3966 |
|
break; |
3967 |
|
|
3968 |
|
case OP_NOT_WHITESPACE: |
3969 |
|
if ((md->ctypes[c] & ctype_space) != 0) return FALSE; |
3970 |
|
break; |
3971 |
|
|
3972 |
|
case OP_WHITESPACE: |
3973 |
|
if ((md->ctypes[c] & ctype_space) == 0) return FALSE; |
3974 |
|
break; |
3975 |
|
|
3976 |
|
case OP_NOT_WORDCHAR: |
3977 |
|
if ((md->ctypes[c] & ctype_word) != 0) return FALSE; |
3978 |
|
break; |
3979 |
|
|
3980 |
|
case OP_WORDCHAR: |
3981 |
|
if ((md->ctypes[c] & ctype_word) == 0) return FALSE; |
3982 |
|
break; |
3983 |
|
} |
3984 |
} |
} |
3985 |
/* Control never gets here */ |
/* Control never gets here */ |
3986 |
} |
} |
4013 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
4014 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
4015 |
{ |
{ |
4016 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) != 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0) |
4017 |
break; |
break; |
4018 |
eptr++; |
eptr++; |
4019 |
} |
} |
4022 |
case OP_DIGIT: |
case OP_DIGIT: |
4023 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
4024 |
{ |
{ |
4025 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) == 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0) |
4026 |
break; |
break; |
4027 |
eptr++; |
eptr++; |
4028 |
} |
} |
4031 |
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
4032 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
4033 |
{ |
{ |
4034 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) != 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0) |
4035 |
break; |
break; |
4036 |
eptr++; |
eptr++; |
4037 |
} |
} |
4040 |
case OP_WHITESPACE: |
case OP_WHITESPACE: |
4041 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
4042 |
{ |
{ |
4043 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) == 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0) |
4044 |
break; |
break; |
4045 |
eptr++; |
eptr++; |
4046 |
} |
} |
4049 |
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
4050 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
4051 |
{ |
{ |
4052 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) != 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0) |
4053 |
break; |
break; |
4054 |
eptr++; |
eptr++; |
4055 |
} |
} |
4058 |
case OP_WORDCHAR: |
case OP_WORDCHAR: |
4059 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
4060 |
{ |
{ |
4061 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) == 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0) |
4062 |
break; |
break; |
4063 |
eptr++; |
eptr++; |
4064 |
} |
} |
4148 |
|
|
4149 |
match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */ |
match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */ |
4150 |
|
|
4151 |
|
match_block.lcc = re->tables + lcc_offset; |
4152 |
|
match_block.ctypes = re->tables + ctypes_offset; |
4153 |
|
|
4154 |
/* The ims options can vary during the matching as a result of the presence |
/* The ims options can vary during the matching as a result of the presence |
4155 |
of (?ims) items in the pattern. They are kept in a local variable so that |
of (?ims) items in the pattern. They are kept in a local variable so that |
4156 |
restoring at the exit of a group is easy. */ |
restoring at the exit of a group is easy. */ |
4185 |
resetcount = 2 + re->top_bracket * 2; |
resetcount = 2 + re->top_bracket * 2; |
4186 |
if (resetcount > offsetcount) resetcount = ocount; |
if (resetcount > offsetcount) resetcount = ocount; |
4187 |
|
|
4188 |
|
/* Reset the working variable associated with each extraction. These should |
4189 |
|
never be used unless previously set, but they get saved and restored, and so we |
4190 |
|
initialize them to avoid reading uninitialized locations. */ |
4191 |
|
|
4192 |
|
if (match_block.offset_vector != NULL) |
4193 |
|
{ |
4194 |
|
register int *iptr = match_block.offset_vector + ocount; |
4195 |
|
register int *iend = iptr - resetcount/2 + 1; |
4196 |
|
while (--iptr >= iend) *iptr = -1; |
4197 |
|
} |
4198 |
|
|
4199 |
/* Set up the first character to match, if available. The first_char value is |
/* Set up the first character to match, if available. The first_char value is |
4200 |
never set for an anchored regular expression, but the anchoring may be forced |
never set for an anchored regular expression, but the anchoring may be forced |
4201 |
at run time, so we have to test for anchoring. The first char may be unset for |
at run time, so we have to test for anchoring. The first char may be unset for |
4207 |
if ((re->options & PCRE_FIRSTSET) != 0) |
if ((re->options & PCRE_FIRSTSET) != 0) |
4208 |
{ |
{ |
4209 |
first_char = re->first_char; |
first_char = re->first_char; |
4210 |
if ((ims & PCRE_CASELESS) != 0) first_char = pcre_lcc[first_char]; |
if ((ims & PCRE_CASELESS) != 0) first_char = match_block.lcc[first_char]; |
4211 |
} |
} |
4212 |
else |
else |
4213 |
if (!startline && extra != NULL && |
if (!startline && extra != NULL && |
4232 |
if (first_char >= 0) |
if (first_char >= 0) |
4233 |
{ |
{ |
4234 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
4235 |
while (start_match < end_subject && pcre_lcc[*start_match] != first_char) |
while (start_match < end_subject && |
4236 |
|
match_block.lcc[*start_match] != first_char) |
4237 |
start_match++; |
start_match++; |
4238 |
else |
else |
4239 |
while (start_match < end_subject && *start_match != first_char) |
while (start_match < end_subject && *start_match != first_char) |
4306 |
DPRINTF((">>>> returning %d\n", rc)); |
DPRINTF((">>>> returning %d\n", rc)); |
4307 |
return rc; |
return rc; |
4308 |
} |
} |
4309 |
|
|
4310 |
|
/* This "while" is the end of the "do" above */ |
4311 |
|
|
4312 |
while (!anchored && |
while (!anchored && |
4313 |
match_block.errorcode == PCRE_ERROR_NOMATCH && |
match_block.errorcode == PCRE_ERROR_NOMATCH && |
4314 |
start_match++ < end_subject); |
start_match++ < end_subject); |