48 |
/* Text forms of OP_ values and things, for debugging */ |
/* Text forms of OP_ values and things, for debugging */ |
49 |
|
|
50 |
#ifdef DEBUG |
#ifdef DEBUG |
51 |
static char *OP_names[] = { "End", "\\A", "\\B", "\\b", "\\D", "\\d", |
static const char *OP_names[] = { |
52 |
|
"End", "\\A", "\\B", "\\b", "\\D", "\\d", |
53 |
"\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars", |
"\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars", |
54 |
"not", |
"not", |
55 |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
82 |
|
|
83 |
/* Definition to allow mutual recursion */ |
/* Definition to allow mutual recursion */ |
84 |
|
|
85 |
static BOOL compile_regex(int, int *,uschar **,uschar **,char **); |
static BOOL compile_regex(int, int *, uschar **, const uschar **, const char **); |
86 |
|
|
87 |
/* Structure for passing "static" information around between the functions |
/* Structure for passing "static" information around between the functions |
88 |
doing the matching, so that they are thread-safe. */ |
doing the matching, so that they are thread-safe. */ |
99 |
BOOL noteol; /* NOTEOL flag */ |
BOOL noteol; /* NOTEOL flag */ |
100 |
BOOL dotall; /* Dot matches any char */ |
BOOL dotall; /* Dot matches any char */ |
101 |
BOOL endonly; /* Dollar not before final \n */ |
BOOL endonly; /* Dollar not before final \n */ |
102 |
uschar *start_subject; /* Start of the subject string */ |
const uschar *start_subject; /* Start of the subject string */ |
103 |
uschar *end_subject; /* End of the subject string */ |
const uschar *end_subject; /* End of the subject string */ |
104 |
jmp_buf fail_env; /* Environment for longjump() break out */ |
jmp_buf fail_env; /* Environment for longjump() break out */ |
105 |
uschar *end_match_ptr; /* Subject position at end match */ |
const uschar *end_match_ptr; /* Subject position at end match */ |
106 |
int end_offset_top; /* Highwater mark at end of match */ |
int end_offset_top; /* Highwater mark at end of match */ |
107 |
} match_data; |
} match_data; |
108 |
|
|
127 |
* Return version string * |
* Return version string * |
128 |
*************************************************/ |
*************************************************/ |
129 |
|
|
130 |
char * |
const char * |
131 |
pcre_version(void) |
pcre_version(void) |
132 |
{ |
{ |
133 |
return PCRE_VERSION; |
return PCRE_VERSION; |
157 |
int |
int |
158 |
pcre_info(const pcre *external_re, int *optptr, int *first_char) |
pcre_info(const pcre *external_re, int *optptr, int *first_char) |
159 |
{ |
{ |
160 |
real_pcre *re = (real_pcre *)external_re; |
const real_pcre *re = (const real_pcre *)external_re; |
161 |
if (re == NULL) return PCRE_ERROR_NULL; |
if (re == NULL) return PCRE_ERROR_NULL; |
162 |
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
163 |
if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS); |
if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS); |
229 |
/* Test an embedded subpattern; if it could not be empty, break the |
/* Test an embedded subpattern; if it could not be empty, break the |
230 |
loop. Otherwise carry on in the branch. */ |
loop. Otherwise carry on in the branch. */ |
231 |
|
|
232 |
if ((int)(*cc) >= OP_BRA) |
if ((int)(*cc) >= OP_BRA || (int)(*cc) == OP_ONCE) |
233 |
{ |
{ |
234 |
if (!could_be_empty(cc)) break; |
if (!could_be_empty(cc)) break; |
235 |
do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT); |
do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT); |
273 |
case OP_MINSTAR: |
case OP_MINSTAR: |
274 |
case OP_QUERY: |
case OP_QUERY: |
275 |
case OP_MINQUERY: |
case OP_MINQUERY: |
276 |
|
case OP_NOTSTAR: |
277 |
|
case OP_NOTMINSTAR: |
278 |
|
case OP_NOTQUERY: |
279 |
|
case OP_NOTMINQUERY: |
280 |
case OP_TYPESTAR: |
case OP_TYPESTAR: |
281 |
case OP_TYPEMINSTAR: |
case OP_TYPEMINSTAR: |
282 |
case OP_TYPEQUERY: |
case OP_TYPEQUERY: |
297 |
|
|
298 |
case OP_CLASS: |
case OP_CLASS: |
299 |
case OP_REF: |
case OP_REF: |
300 |
cc += (*cc == OP_REF)? 2 : 4 + 2 * cc[2] + cc[3]; |
cc += (*cc == OP_REF)? 2 : 33; |
301 |
|
|
302 |
switch (*cc) |
switch (*cc) |
303 |
{ |
{ |
361 |
*/ |
*/ |
362 |
|
|
363 |
static int |
static int |
364 |
check_escape(uschar **ptrptr, char **errorptr, int bracount, int options, |
check_escape(const uschar **ptrptr, const char **errorptr, int bracount, |
365 |
BOOL isclass) |
int options, BOOL isclass) |
366 |
{ |
{ |
367 |
uschar *ptr = *ptrptr; |
const uschar *ptr = *ptrptr; |
368 |
int c = *(++ptr) & 255; /* Ensure > 0 on signed-char systems */ |
int c = *(++ptr) & 255; /* Ensure > 0 on signed-char systems */ |
369 |
int i; |
int i; |
370 |
|
|
383 |
|
|
384 |
else |
else |
385 |
{ |
{ |
386 |
uschar *oldptr; |
const uschar *oldptr; |
387 |
switch (c) |
switch (c) |
388 |
{ |
{ |
389 |
/* The handling of escape sequences consisting of a string of digits |
/* The handling of escape sequences consisting of a string of digits |
503 |
*/ |
*/ |
504 |
|
|
505 |
static BOOL |
static BOOL |
506 |
is_counted_repeat(uschar *p) |
is_counted_repeat(const uschar *p) |
507 |
{ |
{ |
508 |
if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE; |
if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE; |
509 |
while ((pcre_ctypes[*p] & ctype_digit) != 0) p++; |
while ((pcre_ctypes[*p] & ctype_digit) != 0) p++; |
538 |
current ptr on error, with errorptr set |
current ptr on error, with errorptr set |
539 |
*/ |
*/ |
540 |
|
|
541 |
static uschar * |
static const uschar * |
542 |
read_repeat_counts(uschar *p, int *minp, int *maxp, char **errorptr) |
read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr) |
543 |
{ |
{ |
544 |
int min = 0; |
int min = 0; |
545 |
int max = -1; |
int max = -1; |
593 |
*/ |
*/ |
594 |
|
|
595 |
static BOOL |
static BOOL |
596 |
compile_branch(int options, int *brackets, uschar **codeptr, uschar **ptrptr, |
compile_branch(int options, int *brackets, uschar **codeptr, |
597 |
char **errorptr) |
const uschar **ptrptr, const char **errorptr) |
598 |
{ |
{ |
599 |
int repeat_type, op_type; |
int repeat_type, op_type; |
600 |
int repeat_min, repeat_max; |
int repeat_min, repeat_max; |
601 |
int bravalue, length; |
int bravalue, length; |
602 |
register int c; |
register int c; |
603 |
register uschar *code = *codeptr; |
register uschar *code = *codeptr; |
604 |
uschar *ptr = *ptrptr; |
const uschar *ptr = *ptrptr; |
605 |
|
const uschar *oldptr; |
606 |
uschar *previous = NULL; |
uschar *previous = NULL; |
|
uschar *oldptr; |
|
607 |
uschar class[32]; |
uschar class[32]; |
608 |
|
|
609 |
/* Switch on next character until the end of the branch */ |
/* Switch on next character until the end of the branch */ |
698 |
/* Backslash may introduce a single character, or it may introduce one |
/* Backslash may introduce a single character, or it may introduce one |
699 |
of the specials, which just set a flag. Escaped items are checked for |
of the specials, which just set a flag. Escaped items are checked for |
700 |
validity in the pre-compiling pass. The sequence \b is a special case. |
validity in the pre-compiling pass. The sequence \b is a special case. |
701 |
Inside a class (and only there) it is treated as backslash. Elsewhere |
Inside a class (and only there) it is treated as backspace. Elsewhere |
702 |
it marks a word boundary. Other escapes have preset maps ready to |
it marks a word boundary. Other escapes have preset maps ready to |
703 |
or into the one we are building. We assume they have more than one |
or into the one we are building. We assume they have more than one |
704 |
character in them, so set class_count bigger than one. */ |
character in them, so set class_count bigger than one. */ |
1215 |
continue; |
continue; |
1216 |
} |
} |
1217 |
|
|
1218 |
/* Reset and fall through */ |
/* Data character: reset and fall through */ |
1219 |
|
|
1220 |
ptr = oldptr; |
ptr = oldptr; |
1221 |
c = '\\'; |
c = '\\'; |
1306 |
*/ |
*/ |
1307 |
|
|
1308 |
static BOOL |
static BOOL |
1309 |
compile_regex(int options, int *brackets, uschar **codeptr, uschar **ptrptr, |
compile_regex(int options, int *brackets, uschar **codeptr, |
1310 |
char **errorptr) |
const uschar **ptrptr, const char **errorptr) |
1311 |
{ |
{ |
1312 |
uschar *ptr = *ptrptr; |
const uschar *ptr = *ptrptr; |
1313 |
uschar *code = *codeptr; |
uschar *code = *codeptr; |
1314 |
uschar *start_bracket = code; |
uschar *start_bracket = code; |
1315 |
|
|
1375 |
*/ |
*/ |
1376 |
|
|
1377 |
static BOOL |
static BOOL |
1378 |
is_anchored(register uschar *code, BOOL multiline) |
is_anchored(register const uschar *code, BOOL multiline) |
1379 |
{ |
{ |
1380 |
do { |
do { |
1381 |
int op = (int)code[3]; |
int op = (int)code[3]; |
1404 |
*/ |
*/ |
1405 |
|
|
1406 |
static BOOL |
static BOOL |
1407 |
is_startline(uschar *code) |
is_startline(const uschar *code) |
1408 |
{ |
{ |
1409 |
do { |
do { |
1410 |
if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT) |
if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT) |
1489 |
*/ |
*/ |
1490 |
|
|
1491 |
pcre * |
pcre * |
1492 |
pcre_compile(const char *pattern, int options, char **errorptr, |
pcre_compile(const char *pattern, int options, const char **errorptr, |
1493 |
int *erroroffset) |
int *erroroffset) |
1494 |
{ |
{ |
1495 |
real_pcre *re; |
real_pcre *re; |
1499 |
int c, size; |
int c, size; |
1500 |
int bracount = 0; |
int bracount = 0; |
1501 |
int brastack[200]; |
int brastack[200]; |
|
int brastackptr = 0; |
|
1502 |
int top_backref = 0; |
int top_backref = 0; |
1503 |
uschar *code, *ptr; |
unsigned int brastackptr = 0; |
1504 |
|
uschar *code; |
1505 |
|
const uschar *ptr; |
1506 |
|
|
1507 |
#ifdef DEBUG |
#ifdef DEBUG |
1508 |
uschar *code_base, *code_end; |
uschar *code_base, *code_end; |
1541 |
if an "extended" flag setting appears late in the pattern. We can't be so |
if an "extended" flag setting appears late in the pattern. We can't be so |
1542 |
clever for #-comments. */ |
clever for #-comments. */ |
1543 |
|
|
1544 |
ptr = (uschar *)(pattern - 1); |
ptr = (const uschar *)(pattern - 1); |
1545 |
while ((c = *(++ptr)) != 0) |
while ((c = *(++ptr)) != 0) |
1546 |
{ |
{ |
1547 |
int min, max; |
int min, max; |
1568 |
|
|
1569 |
case '\\': |
case '\\': |
1570 |
{ |
{ |
1571 |
uschar *save_ptr = ptr; |
const uschar *save_ptr = ptr; |
1572 |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
1573 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
1574 |
if (c >= 0) |
if (c >= 0) |
1833 |
|
|
1834 |
if (c == '\\') |
if (c == '\\') |
1835 |
{ |
{ |
1836 |
uschar *saveptr = ptr; |
const uschar *saveptr = ptr; |
1837 |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
1838 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
1839 |
if (c < 0) { ptr = saveptr; break; } |
if (c < 0) { ptr = saveptr; break; } |
1881 |
error, *errorptr will be set non-NULL, so we don't need to look at the result |
error, *errorptr will be set non-NULL, so we don't need to look at the result |
1882 |
of the function here. */ |
of the function here. */ |
1883 |
|
|
1884 |
ptr = (uschar *)pattern; |
ptr = (const uschar *)pattern; |
1885 |
code = re->code; |
code = re->code; |
1886 |
*code = OP_BRA; |
*code = OP_BRA; |
1887 |
bracount = 0; |
bracount = 0; |
1908 |
{ |
{ |
1909 |
(pcre_free)(re); |
(pcre_free)(re); |
1910 |
PCRE_ERROR_RETURN: |
PCRE_ERROR_RETURN: |
1911 |
*erroroffset = ptr - (uschar *)pattern; |
*erroroffset = ptr - (const uschar *)pattern; |
1912 |
return NULL; |
return NULL; |
1913 |
} |
} |
1914 |
|
|
2206 |
*/ |
*/ |
2207 |
|
|
2208 |
static BOOL |
static BOOL |
2209 |
match_ref(int number, register uschar *eptr, int length, match_data *md) |
match_ref(int number, register const uschar *eptr, int length, match_data *md) |
2210 |
{ |
{ |
2211 |
uschar *p = md->start_subject + md->offset_vector[number]; |
const uschar *p = md->start_subject + md->offset_vector[number]; |
2212 |
|
|
2213 |
#ifdef DEBUG |
#ifdef DEBUG |
2214 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
2255 |
*/ |
*/ |
2256 |
|
|
2257 |
static BOOL |
static BOOL |
2258 |
match(register uschar *eptr, register uschar *ecode, int offset_top, |
match(register const uschar *eptr, register const uschar *ecode, int offset_top, |
2259 |
match_data *md) |
match_data *md) |
2260 |
{ |
{ |
2261 |
for (;;) |
for (;;) |
2263 |
int min, max, ctype; |
int min, max, ctype; |
2264 |
register int i; |
register int i; |
2265 |
register int c; |
register int c; |
2266 |
BOOL minimize; |
BOOL minimize = FALSE; |
2267 |
|
|
2268 |
/* Opening bracket. Check the alternative branches in turn, failing if none |
/* Opening bracket. Check the alternative branches in turn, failing if none |
2269 |
match. We have to set the start offset if required and there is space |
match. We have to set the start offset if required and there is space |
2276 |
if ((int)*ecode >= OP_BRA) |
if ((int)*ecode >= OP_BRA) |
2277 |
{ |
{ |
2278 |
int number = (*ecode - OP_BRA) << 1; |
int number = (*ecode - OP_BRA) << 1; |
2279 |
int save_offset1, save_offset2; |
int save_offset1 = 0, save_offset2 = 0; |
2280 |
|
|
2281 |
#ifdef DEBUG |
#ifdef DEBUG |
2282 |
printf("start bracket %d\n", number/2); |
printf("start bracket %d\n", number/2); |
2366 |
|
|
2367 |
/* "Once" brackets are like assertion brackets except that after a match, |
/* "Once" brackets are like assertion brackets except that after a match, |
2368 |
the point in the subject string is not moved back. Thus there can never be |
the point in the subject string is not moved back. Thus there can never be |
2369 |
a back into the brackets. Check the alternative branches in turn - the |
a move back into the brackets. Check the alternative branches in turn - the |
2370 |
matching won't pass the KET for this kind of subpattern. If any one branch |
matching won't pass the KET for this kind of subpattern. If any one branch |
2371 |
matches, we carry on, leaving the subject pointer. */ |
matches, we carry on, leaving the subject pointer. */ |
2372 |
|
|
2403 |
|
|
2404 |
case OP_BRAZERO: |
case OP_BRAZERO: |
2405 |
{ |
{ |
2406 |
uschar *next = ecode+1; |
const uschar *next = ecode+1; |
2407 |
if (match(eptr, next, offset_top, md)) return TRUE; |
if (match(eptr, next, offset_top, md)) return TRUE; |
2408 |
do next += (next[1] << 8) + next[2]; while (*next == OP_ALT); |
do next += (next[1] << 8) + next[2]; while (*next == OP_ALT); |
2409 |
ecode = next + 3; |
ecode = next + 3; |
2412 |
|
|
2413 |
case OP_BRAMINZERO: |
case OP_BRAMINZERO: |
2414 |
{ |
{ |
2415 |
uschar *next = ecode+1; |
const uschar *next = ecode+1; |
2416 |
do next += (next[1] << 8) + next[2]; while (*next == OP_ALT); |
do next += (next[1] << 8) + next[2]; while (*next == OP_ALT); |
2417 |
if (match(eptr, next+3, offset_top, md)) return TRUE; |
if (match(eptr, next+3, offset_top, md)) return TRUE; |
2418 |
ecode++; |
ecode++; |
2428 |
case OP_KETRMAX: |
case OP_KETRMAX: |
2429 |
{ |
{ |
2430 |
int number; |
int number; |
2431 |
uschar *prev = ecode - (ecode[1] << 8) - ecode[2]; |
const uschar *prev = ecode - (ecode[1] << 8) - ecode[2]; |
2432 |
|
|
2433 |
if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE) |
if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE) |
2434 |
{ |
{ |
2677 |
|
|
2678 |
else |
else |
2679 |
{ |
{ |
2680 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
2681 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
2682 |
{ |
{ |
2683 |
if (!match_ref(number, eptr, length, md)) break; |
if (!match_ref(number, eptr, length, md)) break; |
2701 |
|
|
2702 |
case OP_CLASS: |
case OP_CLASS: |
2703 |
{ |
{ |
2704 |
uschar *data = ecode + 1; /* Save for matching */ |
const uschar *data = ecode + 1; /* Save for matching */ |
2705 |
ecode += 33; /* Advance past the item */ |
ecode += 33; /* Advance past the item */ |
2706 |
|
|
2707 |
switch (*ecode) |
switch (*ecode) |
2708 |
{ |
{ |
2785 |
|
|
2786 |
else |
else |
2787 |
{ |
{ |
2788 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
2789 |
for (i = min; i < max; eptr++, i++) |
for (i = min; i < max; eptr++, i++) |
2790 |
{ |
{ |
2791 |
if (eptr >= md->end_subject) break; |
if (eptr >= md->end_subject) break; |
2903 |
} |
} |
2904 |
else |
else |
2905 |
{ |
{ |
2906 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
2907 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
2908 |
{ |
{ |
2909 |
if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break; |
if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break; |
2933 |
} |
} |
2934 |
else |
else |
2935 |
{ |
{ |
2936 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
2937 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
2938 |
{ |
{ |
2939 |
if (eptr >= md->end_subject || c != *eptr) break; |
if (eptr >= md->end_subject || c != *eptr) break; |
3030 |
} |
} |
3031 |
else |
else |
3032 |
{ |
{ |
3033 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
3034 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3035 |
{ |
{ |
3036 |
if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break; |
if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break; |
3060 |
} |
} |
3061 |
else |
else |
3062 |
{ |
{ |
3063 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
3064 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3065 |
{ |
{ |
3066 |
if (eptr >= md->end_subject || c == *eptr) break; |
if (eptr >= md->end_subject || c == *eptr) break; |
3177 |
|
|
3178 |
else |
else |
3179 |
{ |
{ |
3180 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
3181 |
switch(ctype) |
switch(ctype) |
3182 |
{ |
{ |
3183 |
case OP_ANY: |
case OP_ANY: |
3309 |
int ocount = offsetcount; |
int ocount = offsetcount; |
3310 |
int first_char = -1; |
int first_char = -1; |
3311 |
match_data match_block; |
match_data match_block; |
3312 |
uschar *start_bits = NULL; |
const uschar *start_bits = NULL; |
3313 |
uschar *start_match = (uschar *)subject; |
const uschar *start_match = (uschar *)subject; |
3314 |
uschar *end_subject; |
const uschar *end_subject; |
3315 |
real_pcre *re = (real_pcre *)external_re; |
const real_pcre *re = (const real_pcre *)external_re; |
3316 |
real_pcre_extra *extra = (real_pcre_extra *)external_extra; |
const real_pcre_extra *extra = (const real_pcre_extra *)external_extra; |
3317 |
BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0; |
BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0; |
3318 |
BOOL startline = (re->options & PCRE_STARTLINE) != 0; |
BOOL startline = (re->options & PCRE_STARTLINE) != 0; |
3319 |
|
|
3323 |
(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; |
(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; |
3324 |
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
3325 |
|
|
3326 |
match_block.start_subject = (uschar *)subject; |
match_block.start_subject = (const uschar *)subject; |
3327 |
match_block.end_subject = match_block.start_subject + length; |
match_block.end_subject = match_block.start_subject + length; |
3328 |
end_subject = match_block.end_subject; |
end_subject = match_block.end_subject; |
3329 |
|
|