33 |
|
|
34 |
/* #define DEBUG */ |
/* #define DEBUG */ |
35 |
|
|
36 |
|
/* Use a macro for debugging printing, 'cause that eliminates the the use |
37 |
|
of #ifdef inline, and there are *still* stupid compilers about that don't like |
38 |
|
indented pre-processor statements. I suppose it's only been 10 years... */ |
39 |
|
|
40 |
|
#ifdef DEBUG |
41 |
|
#define DPRINTF(p) printf p |
42 |
|
#else |
43 |
|
#define DPRINTF(p) /*nothing*/ |
44 |
|
#endif |
45 |
|
|
46 |
/* Include the internals header, which itself includes Standard C headers plus |
/* Include the internals header, which itself includes Standard C headers plus |
47 |
the external pcre header. */ |
the external pcre header. */ |
57 |
/* Text forms of OP_ values and things, for debugging */ |
/* Text forms of OP_ values and things, for debugging */ |
58 |
|
|
59 |
#ifdef DEBUG |
#ifdef DEBUG |
60 |
static char *OP_names[] = { "End", "\\A", "\\B", "\\b", "\\D", "\\d", |
static const char *OP_names[] = { |
61 |
|
"End", "\\A", "\\B", "\\b", "\\D", "\\d", |
62 |
"\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars", |
"\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars", |
63 |
"not", |
"not", |
64 |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
91 |
|
|
92 |
/* Definition to allow mutual recursion */ |
/* Definition to allow mutual recursion */ |
93 |
|
|
94 |
static BOOL compile_regex(int, int *,uschar **,uschar **,char **); |
static BOOL compile_regex(int, int *, uschar **, const uschar **, const char **); |
95 |
|
|
96 |
/* Structure for passing "static" information around between the functions |
/* Structure for passing "static" information around between the functions |
97 |
doing the matching, so that they are thread-safe. */ |
doing the matching, so that they are thread-safe. */ |
108 |
BOOL noteol; /* NOTEOL flag */ |
BOOL noteol; /* NOTEOL flag */ |
109 |
BOOL dotall; /* Dot matches any char */ |
BOOL dotall; /* Dot matches any char */ |
110 |
BOOL endonly; /* Dollar not before final \n */ |
BOOL endonly; /* Dollar not before final \n */ |
111 |
uschar *start_subject; /* Start of the subject string */ |
const uschar *start_subject; /* Start of the subject string */ |
112 |
uschar *end_subject; /* End of the subject string */ |
const uschar *end_subject; /* End of the subject string */ |
113 |
jmp_buf fail_env; /* Environment for longjump() break out */ |
jmp_buf fail_env; /* Environment for longjump() break out */ |
114 |
uschar *end_match_ptr; /* Subject position at end match */ |
const uschar *end_match_ptr; /* Subject position at end match */ |
115 |
int end_offset_top; /* Highwater mark at end of match */ |
int end_offset_top; /* Highwater mark at end of match */ |
116 |
} match_data; |
} match_data; |
117 |
|
|
136 |
* Return version string * |
* Return version string * |
137 |
*************************************************/ |
*************************************************/ |
138 |
|
|
139 |
char * |
const char * |
140 |
pcre_version(void) |
pcre_version(void) |
141 |
{ |
{ |
142 |
return PCRE_VERSION; |
return PCRE_VERSION; |
166 |
int |
int |
167 |
pcre_info(const pcre *external_re, int *optptr, int *first_char) |
pcre_info(const pcre *external_re, int *optptr, int *first_char) |
168 |
{ |
{ |
169 |
real_pcre *re = (real_pcre *)external_re; |
const real_pcre *re = (const real_pcre *)external_re; |
170 |
if (re == NULL) return PCRE_ERROR_NULL; |
if (re == NULL) return PCRE_ERROR_NULL; |
171 |
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
172 |
if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS); |
if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS); |
196 |
Returns: nothing |
Returns: nothing |
197 |
*/ |
*/ |
198 |
|
|
199 |
static pchars(uschar *p, int length, BOOL is_subject, match_data *md) |
static void |
200 |
|
pchars(const uschar *p, int length, BOOL is_subject, match_data *md) |
201 |
{ |
{ |
202 |
int c; |
int c; |
203 |
if (is_subject && length > md->end_subject - p) length = md->end_subject - p; |
if (is_subject && length > md->end_subject - p) length = md->end_subject - p; |
371 |
*/ |
*/ |
372 |
|
|
373 |
static int |
static int |
374 |
check_escape(uschar **ptrptr, char **errorptr, int bracount, int options, |
check_escape(const uschar **ptrptr, const char **errorptr, int bracount, |
375 |
BOOL isclass) |
int options, BOOL isclass) |
376 |
{ |
{ |
377 |
uschar *ptr = *ptrptr; |
const uschar *ptr = *ptrptr; |
378 |
int c = *(++ptr) & 255; /* Ensure > 0 on signed-char systems */ |
int c = *(++ptr) & 255; /* Ensure > 0 on signed-char systems */ |
379 |
int i; |
int i; |
380 |
|
|
393 |
|
|
394 |
else |
else |
395 |
{ |
{ |
396 |
uschar *oldptr; |
const uschar *oldptr; |
397 |
switch (c) |
switch (c) |
398 |
{ |
{ |
399 |
/* The handling of escape sequences consisting of a string of digits |
/* The handling of escape sequences consisting of a string of digits |
513 |
*/ |
*/ |
514 |
|
|
515 |
static BOOL |
static BOOL |
516 |
is_counted_repeat(uschar *p) |
is_counted_repeat(const uschar *p) |
517 |
{ |
{ |
518 |
if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE; |
if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE; |
519 |
while ((pcre_ctypes[*p] & ctype_digit) != 0) p++; |
while ((pcre_ctypes[*p] & ctype_digit) != 0) p++; |
548 |
current ptr on error, with errorptr set |
current ptr on error, with errorptr set |
549 |
*/ |
*/ |
550 |
|
|
551 |
static uschar * |
static const uschar * |
552 |
read_repeat_counts(uschar *p, int *minp, int *maxp, char **errorptr) |
read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr) |
553 |
{ |
{ |
554 |
int min = 0; |
int min = 0; |
555 |
int max = -1; |
int max = -1; |
603 |
*/ |
*/ |
604 |
|
|
605 |
static BOOL |
static BOOL |
606 |
compile_branch(int options, int *brackets, uschar **codeptr, uschar **ptrptr, |
compile_branch(int options, int *brackets, uschar **codeptr, |
607 |
char **errorptr) |
const uschar **ptrptr, const char **errorptr) |
608 |
{ |
{ |
609 |
int repeat_type, op_type; |
int repeat_type, op_type; |
610 |
int repeat_min, repeat_max; |
int repeat_min, repeat_max; |
611 |
int bravalue, length; |
int bravalue, length; |
612 |
register int c; |
register int c; |
613 |
register uschar *code = *codeptr; |
register uschar *code = *codeptr; |
614 |
uschar *ptr = *ptrptr; |
const uschar *ptr = *ptrptr; |
615 |
|
const uschar *oldptr; |
616 |
uschar *previous = NULL; |
uschar *previous = NULL; |
|
uschar *oldptr; |
|
617 |
uschar class[32]; |
uschar class[32]; |
618 |
|
|
619 |
/* Switch on next character until the end of the branch */ |
/* Switch on next character until the end of the branch */ |
708 |
/* Backslash may introduce a single character, or it may introduce one |
/* Backslash may introduce a single character, or it may introduce one |
709 |
of the specials, which just set a flag. Escaped items are checked for |
of the specials, which just set a flag. Escaped items are checked for |
710 |
validity in the pre-compiling pass. The sequence \b is a special case. |
validity in the pre-compiling pass. The sequence \b is a special case. |
711 |
Inside a class (and only there) it is treated as backslash. Elsewhere |
Inside a class (and only there) it is treated as backspace. Elsewhere |
712 |
it marks a word boundary. Other escapes have preset maps ready to |
it marks a word boundary. Other escapes have preset maps ready to |
713 |
or into the one we are building. We assume they have more than one |
or into the one we are building. We assume they have more than one |
714 |
character in them, so set class_count bigger than one. */ |
character in them, so set class_count bigger than one. */ |
987 |
if (code == previous) code += 2; else previous[1]++; |
if (code == previous) code += 2; else previous[1]++; |
988 |
} |
} |
989 |
|
|
990 |
/* Insert an UPTO if the max is greater than the min. */ |
/* If the maximum is unlimited, insert an OP_STAR. */ |
991 |
|
|
992 |
|
if (repeat_max < 0) |
993 |
|
{ |
994 |
|
*code++ = c; |
995 |
|
*code++ = OP_STAR + repeat_type; |
996 |
|
} |
997 |
|
|
998 |
|
/* Else insert an UPTO if the max is greater than the min. */ |
999 |
|
|
1000 |
if (repeat_max != repeat_min) |
else if (repeat_max != repeat_min) |
1001 |
{ |
{ |
1002 |
*code++ = c; |
*code++ = c; |
1003 |
repeat_max -= repeat_min; |
repeat_max -= repeat_min; |
1041 |
else if ((int)*previous >= OP_BRA) |
else if ((int)*previous >= OP_BRA) |
1042 |
{ |
{ |
1043 |
int i; |
int i; |
1044 |
int length = code - previous; |
int len = code - previous; |
1045 |
|
|
1046 |
if (repeat_max == -1 && could_be_empty(previous)) |
if (repeat_max == -1 && could_be_empty(previous)) |
1047 |
{ |
{ |
1058 |
{ |
{ |
1059 |
for (i = 1; i < repeat_min; i++) |
for (i = 1; i < repeat_min; i++) |
1060 |
{ |
{ |
1061 |
memcpy(code, previous, length); |
memcpy(code, previous, len); |
1062 |
code += length; |
code += len; |
1063 |
} |
} |
1064 |
} |
} |
1065 |
|
|
1071 |
{ |
{ |
1072 |
if (repeat_min == 0) |
if (repeat_min == 0) |
1073 |
{ |
{ |
1074 |
memmove(previous+1, previous, length); |
memmove(previous+1, previous, len); |
1075 |
code++; |
code++; |
1076 |
*previous++ = OP_BRAZERO + repeat_type; |
*previous++ = OP_BRAZERO + repeat_type; |
1077 |
} |
} |
1078 |
|
|
1079 |
for (i = 1; i < repeat_min; i++) |
for (i = 1; i < repeat_min; i++) |
1080 |
{ |
{ |
1081 |
memcpy(code, previous, length); |
memcpy(code, previous, len); |
1082 |
code += length; |
code += len; |
1083 |
} |
} |
1084 |
|
|
1085 |
for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++) |
for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++) |
1086 |
{ |
{ |
1087 |
*code++ = OP_BRAZERO + repeat_type; |
*code++ = OP_BRAZERO + repeat_type; |
1088 |
memcpy(code, previous, length); |
memcpy(code, previous, len); |
1089 |
code += length; |
code += len; |
1090 |
} |
} |
1091 |
} |
} |
1092 |
|
|
1233 |
continue; |
continue; |
1234 |
} |
} |
1235 |
|
|
1236 |
/* Reset and fall through */ |
/* Data character: reset and fall through */ |
1237 |
|
|
1238 |
ptr = oldptr; |
ptr = oldptr; |
1239 |
c = '\\'; |
c = '\\'; |
1324 |
*/ |
*/ |
1325 |
|
|
1326 |
static BOOL |
static BOOL |
1327 |
compile_regex(int options, int *brackets, uschar **codeptr, uschar **ptrptr, |
compile_regex(int options, int *brackets, uschar **codeptr, |
1328 |
char **errorptr) |
const uschar **ptrptr, const char **errorptr) |
1329 |
{ |
{ |
1330 |
uschar *ptr = *ptrptr; |
const uschar *ptr = *ptrptr; |
1331 |
uschar *code = *codeptr; |
uschar *code = *codeptr; |
1332 |
uschar *start_bracket = code; |
uschar *start_bracket = code; |
1333 |
|
|
1393 |
*/ |
*/ |
1394 |
|
|
1395 |
static BOOL |
static BOOL |
1396 |
is_anchored(register uschar *code, BOOL multiline) |
is_anchored(register const uschar *code, BOOL multiline) |
1397 |
{ |
{ |
1398 |
do { |
do { |
1399 |
int op = (int)code[3]; |
int op = (int)code[3]; |
1422 |
*/ |
*/ |
1423 |
|
|
1424 |
static BOOL |
static BOOL |
1425 |
is_startline(uschar *code) |
is_startline(const uschar *code) |
1426 |
{ |
{ |
1427 |
do { |
do { |
1428 |
if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT) |
if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT) |
1507 |
*/ |
*/ |
1508 |
|
|
1509 |
pcre * |
pcre * |
1510 |
pcre_compile(const char *pattern, int options, char **errorptr, |
pcre_compile(const char *pattern, int options, const char **errorptr, |
1511 |
int *erroroffset) |
int *erroroffset) |
1512 |
{ |
{ |
1513 |
real_pcre *re; |
real_pcre *re; |
1517 |
int c, size; |
int c, size; |
1518 |
int bracount = 0; |
int bracount = 0; |
1519 |
int brastack[200]; |
int brastack[200]; |
|
int brastackptr = 0; |
|
1520 |
int top_backref = 0; |
int top_backref = 0; |
1521 |
uschar *code, *ptr; |
unsigned int brastackptr = 0; |
1522 |
|
uschar *code; |
1523 |
|
const uschar *ptr; |
1524 |
|
|
1525 |
#ifdef DEBUG |
#ifdef DEBUG |
1526 |
uschar *code_base, *code_end; |
uschar *code_base, *code_end; |
1547 |
return NULL; |
return NULL; |
1548 |
} |
} |
1549 |
|
|
1550 |
#ifdef DEBUG |
DPRINTF(("------------------------------------------------------------------\n")); |
1551 |
printf("------------------------------------------------------------------\n"); |
DPRINTF(("%s\n", pattern)); |
|
printf("%s\n", pattern); |
|
|
#endif |
|
1552 |
|
|
1553 |
/* The first thing to do is to make a pass over the pattern to compute the |
/* The first thing to do is to make a pass over the pattern to compute the |
1554 |
amount of store required to hold the compiled code. This does not have to be |
amount of store required to hold the compiled code. This does not have to be |
1557 |
if an "extended" flag setting appears late in the pattern. We can't be so |
if an "extended" flag setting appears late in the pattern. We can't be so |
1558 |
clever for #-comments. */ |
clever for #-comments. */ |
1559 |
|
|
1560 |
ptr = (uschar *)(pattern - 1); |
ptr = (const uschar *)(pattern - 1); |
1561 |
while ((c = *(++ptr)) != 0) |
while ((c = *(++ptr)) != 0) |
1562 |
{ |
{ |
1563 |
int min, max; |
int min, max; |
1584 |
|
|
1585 |
case '\\': |
case '\\': |
1586 |
{ |
{ |
1587 |
uschar *save_ptr = ptr; |
const uschar *save_ptr = ptr; |
1588 |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
1589 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
1590 |
if (c >= 0) |
if (c >= 0) |
1663 |
{ |
{ |
1664 |
if (*ptr == '\\') |
if (*ptr == '\\') |
1665 |
{ |
{ |
1666 |
int c = check_escape(&ptr, errorptr, bracount, options, TRUE); |
int ch = check_escape(&ptr, errorptr, bracount, options, TRUE); |
1667 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
1668 |
if (-c == ESC_b) class_charcount++; else class_charcount = 10; |
if (-ch == ESC_b) class_charcount++; else class_charcount = 10; |
1669 |
} |
} |
1670 |
else class_charcount++; |
else class_charcount++; |
1671 |
ptr++; |
ptr++; |
1680 |
|
|
1681 |
/* A repeat needs either 1 or 5 bytes. */ |
/* A repeat needs either 1 or 5 bytes. */ |
1682 |
|
|
1683 |
if (ptr[1] == '{' && is_counted_repeat(ptr+2)) |
if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2)) |
1684 |
{ |
{ |
1685 |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr); |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr); |
1686 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
1788 |
continue; |
continue; |
1789 |
|
|
1790 |
/* Handle ket. Look for subsequent max/min; for certain sets of values we |
/* Handle ket. Look for subsequent max/min; for certain sets of values we |
1791 |
have to replicate this bracket up to that many times. */ |
have to replicate this bracket up to that many times. If brastackptr is |
1792 |
|
0 this is an unmatched bracket which will generate an error, but take care |
1793 |
|
not to try to access brastack[-1]. */ |
1794 |
|
|
1795 |
case ')': |
case ')': |
1796 |
length += 3; |
length += 3; |
1797 |
{ |
{ |
1798 |
int min = 1; |
int minval = 1; |
1799 |
int max = 1; |
int maxval = 1; |
1800 |
int duplength = length - brastack[--brastackptr]; |
int duplength = (brastackptr > 0)? length - brastack[--brastackptr] : 0; |
1801 |
|
|
1802 |
/* Leave ptr at the final char; for read_repeat_counts this happens |
/* Leave ptr at the final char; for read_repeat_counts this happens |
1803 |
automatically; for the others we need an increment. */ |
automatically; for the others we need an increment. */ |
1804 |
|
|
1805 |
if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2)) |
if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2)) |
1806 |
{ |
{ |
1807 |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr); |
ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr); |
1808 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
1809 |
} |
} |
1810 |
else if (c == '*') { min = 0; max = -1; ptr++; } |
else if (c == '*') { minval = 0; maxval = -1; ptr++; } |
1811 |
else if (c == '+') { max = -1; ptr++; } |
else if (c == '+') { maxval = -1; ptr++; } |
1812 |
else if (c == '?') { min = 0; ptr++; } |
else if (c == '?') { minval = 0; ptr++; } |
1813 |
|
|
1814 |
/* If there is a minimum > 1 we have to replicate up to min-1 times; if |
/* If there is a minimum > 1 we have to replicate up to minval-1 times; |
1815 |
there is a limited maximum we have to replicate up to max-1 times and |
if there is a limited maximum we have to replicate up to maxval-1 times |
1816 |
allow for a BRAZERO item before each optional copy, as we also have to |
and allow for a BRAZERO item before each optional copy, as we also have |
1817 |
do before the first copy if the minimum is zero. */ |
to do before the first copy if the minimum is zero. */ |
1818 |
|
|
1819 |
if (min == 0) length++; |
if (minval == 0) length++; |
1820 |
else if (min > 1) length += (min - 1) * duplength; |
else if (minval > 1) length += (minval - 1) * duplength; |
1821 |
if (max > min) length += (max - min) * (duplength + 1); |
if (maxval > minval) length += (maxval - minval) * (duplength + 1); |
1822 |
} |
} |
|
|
|
1823 |
continue; |
continue; |
1824 |
|
|
1825 |
/* Non-special character. For a run of such characters the length required |
/* Non-special character. For a run of such characters the length required |
1850 |
|
|
1851 |
if (c == '\\') |
if (c == '\\') |
1852 |
{ |
{ |
1853 |
uschar *saveptr = ptr; |
const uschar *saveptr = ptr; |
1854 |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
1855 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
1856 |
if (c < 0) { ptr = saveptr; break; } |
if (c < 0) { ptr = saveptr; break; } |
1880 |
} |
} |
1881 |
|
|
1882 |
/* Compute the size of data block needed and get it, either from malloc or |
/* Compute the size of data block needed and get it, either from malloc or |
1883 |
externally provided function. Put in the magic number and the options. */ |
externally provided function. We specify "code[0]" in the offsetof() expression |
1884 |
|
rather than just "code", because it has been reported that one broken compiler |
1885 |
|
fails on "code" because it is also an independent variable. It should make no |
1886 |
|
difference to the value of the offsetof(). */ |
1887 |
|
|
1888 |
size = length + offsetof(real_pcre, code); |
size = length + offsetof(real_pcre, code[0]); |
1889 |
re = (real_pcre *)(pcre_malloc)(size); |
re = (real_pcre *)(pcre_malloc)(size); |
1890 |
|
|
1891 |
if (re == NULL) |
if (re == NULL) |
1894 |
return NULL; |
return NULL; |
1895 |
} |
} |
1896 |
|
|
1897 |
|
/* Put in the magic number and the options. */ |
1898 |
|
|
1899 |
re->magic_number = MAGIC_NUMBER; |
re->magic_number = MAGIC_NUMBER; |
1900 |
re->options = options; |
re->options = options; |
1901 |
|
|
1903 |
error, *errorptr will be set non-NULL, so we don't need to look at the result |
error, *errorptr will be set non-NULL, so we don't need to look at the result |
1904 |
of the function here. */ |
of the function here. */ |
1905 |
|
|
1906 |
ptr = (uschar *)pattern; |
ptr = (const uschar *)pattern; |
1907 |
code = re->code; |
code = re->code; |
1908 |
*code = OP_BRA; |
*code = OP_BRA; |
1909 |
bracount = 0; |
bracount = 0; |
1930 |
{ |
{ |
1931 |
(pcre_free)(re); |
(pcre_free)(re); |
1932 |
PCRE_ERROR_RETURN: |
PCRE_ERROR_RETURN: |
1933 |
*erroroffset = ptr - (uschar *)pattern; |
*erroroffset = ptr - (const uschar *)pattern; |
1934 |
return NULL; |
return NULL; |
1935 |
} |
} |
1936 |
|
|
1946 |
re->options |= PCRE_ANCHORED; |
re->options |= PCRE_ANCHORED; |
1947 |
else |
else |
1948 |
{ |
{ |
1949 |
int c = find_firstchar(re->code); |
int ch = find_firstchar(re->code); |
1950 |
if (c >= 0) |
if (ch >= 0) |
1951 |
{ |
{ |
1952 |
re->first_char = c; |
re->first_char = ch; |
1953 |
re->options |= PCRE_FIRSTSET; |
re->options |= PCRE_FIRSTSET; |
1954 |
} |
} |
1955 |
else if (is_startline(re->code)) |
else if (is_startline(re->code)) |
2086 |
|
|
2087 |
case OP_REF: |
case OP_REF: |
2088 |
printf(" \\%d", *(++code)); |
printf(" \\%d", *(++code)); |
2089 |
break; |
code ++; |
2090 |
|
goto CLASS_REF_REPEAT; |
2091 |
|
|
2092 |
case OP_CLASS: |
case OP_CLASS: |
2093 |
{ |
{ |
2117 |
printf("]"); |
printf("]"); |
2118 |
code += 32; |
code += 32; |
2119 |
|
|
2120 |
|
CLASS_REF_REPEAT: |
2121 |
|
|
2122 |
switch(*code) |
switch(*code) |
2123 |
{ |
{ |
2124 |
case OP_CRSTAR: |
case OP_CRSTAR: |
2231 |
*/ |
*/ |
2232 |
|
|
2233 |
static BOOL |
static BOOL |
2234 |
match_ref(int number, register uschar *eptr, int length, match_data *md) |
match_ref(int number, register const uschar *eptr, int length, match_data *md) |
2235 |
{ |
{ |
2236 |
uschar *p = md->start_subject + md->offset_vector[number]; |
const uschar *p = md->start_subject + md->offset_vector[number]; |
2237 |
|
|
2238 |
#ifdef DEBUG |
#ifdef DEBUG |
2239 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
2280 |
*/ |
*/ |
2281 |
|
|
2282 |
static BOOL |
static BOOL |
2283 |
match(register uschar *eptr, register uschar *ecode, int offset_top, |
match(register const uschar *eptr, register const uschar *ecode, int offset_top, |
2284 |
match_data *md) |
match_data *md) |
2285 |
{ |
{ |
2286 |
for (;;) |
for (;;) |
2288 |
int min, max, ctype; |
int min, max, ctype; |
2289 |
register int i; |
register int i; |
2290 |
register int c; |
register int c; |
2291 |
BOOL minimize; |
BOOL minimize = FALSE; |
2292 |
|
|
2293 |
/* Opening bracket. Check the alternative branches in turn, failing if none |
/* Opening bracket. Check the alternative branches in turn, failing if none |
2294 |
match. We have to set the start offset if required and there is space |
match. We have to set the start offset if required and there is space |
2301 |
if ((int)*ecode >= OP_BRA) |
if ((int)*ecode >= OP_BRA) |
2302 |
{ |
{ |
2303 |
int number = (*ecode - OP_BRA) << 1; |
int number = (*ecode - OP_BRA) << 1; |
2304 |
int save_offset1, save_offset2; |
int save_offset1 = 0, save_offset2 = 0; |
2305 |
|
|
2306 |
#ifdef DEBUG |
DPRINTF(("start bracket %d\n", number/2)); |
|
printf("start bracket %d\n", number/2); |
|
|
#endif |
|
2307 |
|
|
2308 |
if (number > 0 && number < md->offset_end) |
if (number > 0 && number < md->offset_end) |
2309 |
{ |
{ |
2311 |
save_offset2 = md->offset_vector[number+1]; |
save_offset2 = md->offset_vector[number+1]; |
2312 |
md->offset_vector[number] = eptr - md->start_subject; |
md->offset_vector[number] = eptr - md->start_subject; |
2313 |
|
|
2314 |
#ifdef DEBUG |
DPRINTF(("saving %d %d\n", save_offset1, save_offset2)); |
|
printf("saving %d %d\n", save_offset1, save_offset2); |
|
|
#endif |
|
2315 |
} |
} |
2316 |
|
|
2317 |
/* Recurse for all the alternatives. */ |
/* Recurse for all the alternatives. */ |
2323 |
} |
} |
2324 |
while (*ecode == OP_ALT); |
while (*ecode == OP_ALT); |
2325 |
|
|
2326 |
#ifdef DEBUG |
DPRINTF(("bracket %d failed\n", number/2)); |
|
printf("bracket %d failed\n", number/2); |
|
|
#endif |
|
2327 |
|
|
2328 |
if (number > 0 && number < md->offset_end) |
if (number > 0 && number < md->offset_end) |
2329 |
{ |
{ |
2422 |
|
|
2423 |
case OP_BRAZERO: |
case OP_BRAZERO: |
2424 |
{ |
{ |
2425 |
uschar *next = ecode+1; |
const uschar *next = ecode+1; |
2426 |
if (match(eptr, next, offset_top, md)) return TRUE; |
if (match(eptr, next, offset_top, md)) return TRUE; |
2427 |
do next += (next[1] << 8) + next[2]; while (*next == OP_ALT); |
do next += (next[1] << 8) + next[2]; while (*next == OP_ALT); |
2428 |
ecode = next + 3; |
ecode = next + 3; |
2431 |
|
|
2432 |
case OP_BRAMINZERO: |
case OP_BRAMINZERO: |
2433 |
{ |
{ |
2434 |
uschar *next = ecode+1; |
const uschar *next = ecode+1; |
2435 |
do next += (next[1] << 8) + next[2]; while (*next == OP_ALT); |
do next += (next[1] << 8) + next[2]; while (*next == OP_ALT); |
2436 |
if (match(eptr, next+3, offset_top, md)) return TRUE; |
if (match(eptr, next+3, offset_top, md)) return TRUE; |
2437 |
ecode++; |
ecode++; |
2447 |
case OP_KETRMAX: |
case OP_KETRMAX: |
2448 |
{ |
{ |
2449 |
int number; |
int number; |
2450 |
uschar *prev = ecode - (ecode[1] << 8) - ecode[2]; |
const uschar *prev = ecode - (ecode[1] << 8) - ecode[2]; |
2451 |
|
|
2452 |
if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE) |
if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE) |
2453 |
{ |
{ |
2462 |
|
|
2463 |
number = (*prev - OP_BRA) << 1; |
number = (*prev - OP_BRA) << 1; |
2464 |
|
|
2465 |
#ifdef DEBUG |
DPRINTF(("end bracket %d\n", number/2)); |
|
printf("end bracket %d\n", number/2); |
|
|
#endif |
|
2466 |
|
|
2467 |
if (number > 0) |
if (number > 0) |
2468 |
{ |
{ |
2694 |
|
|
2695 |
else |
else |
2696 |
{ |
{ |
2697 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
2698 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
2699 |
{ |
{ |
2700 |
if (!match_ref(number, eptr, length, md)) break; |
if (!match_ref(number, eptr, length, md)) break; |
2718 |
|
|
2719 |
case OP_CLASS: |
case OP_CLASS: |
2720 |
{ |
{ |
2721 |
uschar *data = ecode + 1; /* Save for matching */ |
const uschar *data = ecode + 1; /* Save for matching */ |
2722 |
ecode += 33; /* Advance past the item */ |
ecode += 33; /* Advance past the item */ |
2723 |
|
|
2724 |
switch (*ecode) |
switch (*ecode) |
2725 |
{ |
{ |
2802 |
|
|
2803 |
else |
else |
2804 |
{ |
{ |
2805 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
2806 |
for (i = min; i < max; eptr++, i++) |
for (i = min; i < max; eptr++, i++) |
2807 |
{ |
{ |
2808 |
if (eptr >= md->end_subject) break; |
if (eptr >= md->end_subject) break; |
2830 |
register int length = ecode[1]; |
register int length = ecode[1]; |
2831 |
ecode += 2; |
ecode += 2; |
2832 |
|
|
2833 |
#ifdef DEBUG |
#ifdef DEBUG /* Sigh. Some compilers never learn. */ |
2834 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
2835 |
printf("matching subject <null> against pattern "); |
printf("matching subject <null> against pattern "); |
2836 |
else |
else |
2841 |
} |
} |
2842 |
pchars(ecode, length, FALSE, md); |
pchars(ecode, length, FALSE, md); |
2843 |
printf("\n"); |
printf("\n"); |
2844 |
#endif |
#endif |
2845 |
|
|
2846 |
if (length > md->end_subject - eptr) return FALSE; |
if (length > md->end_subject - eptr) return FALSE; |
2847 |
if (md->caseless) |
if (md->caseless) |
2898 |
maximum. Alternatively, if maximizing, find the maximum number of |
maximum. Alternatively, if maximizing, find the maximum number of |
2899 |
characters and work backwards. */ |
characters and work backwards. */ |
2900 |
|
|
2901 |
#ifdef DEBUG |
DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max, |
2902 |
printf("matching %c{%d,%d} against subject %.*s\n", c, min, max, |
max, eptr)); |
|
max, eptr); |
|
|
#endif |
|
2903 |
|
|
2904 |
if (md->caseless) |
if (md->caseless) |
2905 |
{ |
{ |
2918 |
} |
} |
2919 |
else |
else |
2920 |
{ |
{ |
2921 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
2922 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
2923 |
{ |
{ |
2924 |
if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break; |
if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break; |
2948 |
} |
} |
2949 |
else |
else |
2950 |
{ |
{ |
2951 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
2952 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
2953 |
{ |
{ |
2954 |
if (eptr >= md->end_subject || c != *eptr) break; |
if (eptr >= md->end_subject || c != *eptr) break; |
2964 |
/* Match a negated single character */ |
/* Match a negated single character */ |
2965 |
|
|
2966 |
case OP_NOT: |
case OP_NOT: |
2967 |
if (eptr > md->end_subject) return FALSE; |
if (eptr >= md->end_subject) return FALSE; |
2968 |
ecode++; |
ecode++; |
2969 |
if (md->caseless) |
if (md->caseless) |
2970 |
{ |
{ |
3023 |
maximum. Alternatively, if maximizing, find the maximum number of |
maximum. Alternatively, if maximizing, find the maximum number of |
3024 |
characters and work backwards. */ |
characters and work backwards. */ |
3025 |
|
|
3026 |
#ifdef DEBUG |
DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max, |
3027 |
printf("negative matching %c{%d,%d} against subject %.*s\n", c, min, max, |
max, eptr)); |
|
max, eptr); |
|
|
#endif |
|
3028 |
|
|
3029 |
if (md->caseless) |
if (md->caseless) |
3030 |
{ |
{ |
3043 |
} |
} |
3044 |
else |
else |
3045 |
{ |
{ |
3046 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
3047 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3048 |
{ |
{ |
3049 |
if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break; |
if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break; |
3073 |
} |
} |
3074 |
else |
else |
3075 |
{ |
{ |
3076 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
3077 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3078 |
{ |
{ |
3079 |
if (eptr >= md->end_subject || c == *eptr) break; |
if (eptr >= md->end_subject || c == *eptr) break; |
3190 |
|
|
3191 |
else |
else |
3192 |
{ |
{ |
3193 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
3194 |
switch(ctype) |
switch(ctype) |
3195 |
{ |
{ |
3196 |
case OP_ANY: |
case OP_ANY: |
3274 |
/* There's been some horrible disaster. */ |
/* There's been some horrible disaster. */ |
3275 |
|
|
3276 |
default: |
default: |
3277 |
#ifdef DEBUG |
DPRINTF(("Unknown opcode %d\n", *ecode)); |
|
printf("Unknown opcode %d\n", *ecode); |
|
|
#endif |
|
3278 |
md->errorcode = PCRE_ERROR_UNKNOWN_NODE; |
md->errorcode = PCRE_ERROR_UNKNOWN_NODE; |
3279 |
return FALSE; |
return FALSE; |
3280 |
} |
} |
3290 |
|
|
3291 |
|
|
3292 |
/************************************************* |
/************************************************* |
3293 |
|
* Segregate setjmp() * |
3294 |
|
*************************************************/ |
3295 |
|
|
3296 |
|
/* The -Wall option of gcc gives warnings for all local variables when setjmp() |
3297 |
|
is used, even if the coding conforms to the rules of ANSI C. To avoid this, we |
3298 |
|
hide it in a separate function. This is called only when PCRE_EXTRA is set, |
3299 |
|
since it's needed only for the extension \X option, and with any luck, a good |
3300 |
|
compiler will spot the tail recursion and compile it efficiently. |
3301 |
|
|
3302 |
|
Arguments: The block containing the match data |
3303 |
|
Returns: The return from setjump() |
3304 |
|
*/ |
3305 |
|
|
3306 |
|
static int |
3307 |
|
my_setjmp(match_data *match_block) |
3308 |
|
{ |
3309 |
|
return setjmp(match_block->fail_env); |
3310 |
|
} |
3311 |
|
|
3312 |
|
|
3313 |
|
|
3314 |
|
/************************************************* |
3315 |
* Execute a Regular Expression * |
* Execute a Regular Expression * |
3316 |
*************************************************/ |
*************************************************/ |
3317 |
|
|
3342 |
int ocount = offsetcount; |
int ocount = offsetcount; |
3343 |
int first_char = -1; |
int first_char = -1; |
3344 |
match_data match_block; |
match_data match_block; |
3345 |
uschar *start_bits = NULL; |
const uschar *start_bits = NULL; |
3346 |
uschar *start_match = (uschar *)subject; |
const uschar *start_match = (const uschar *)subject; |
3347 |
uschar *end_subject; |
const uschar *end_subject; |
3348 |
real_pcre *re = (real_pcre *)external_re; |
const real_pcre *re = (const real_pcre *)external_re; |
3349 |
real_pcre_extra *extra = (real_pcre_extra *)external_extra; |
const real_pcre_extra *extra = (const real_pcre_extra *)external_extra; |
3350 |
BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0; |
BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0; |
3351 |
BOOL startline = (re->options & PCRE_STARTLINE) != 0; |
BOOL startline = (re->options & PCRE_STARTLINE) != 0; |
3352 |
|
|
3356 |
(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; |
(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; |
3357 |
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
3358 |
|
|
3359 |
match_block.start_subject = (uschar *)subject; |
match_block.start_subject = (const uschar *)subject; |
3360 |
match_block.end_subject = match_block.start_subject + length; |
match_block.end_subject = match_block.start_subject + length; |
3361 |
end_subject = match_block.end_subject; |
end_subject = match_block.end_subject; |
3362 |
|
|
3384 |
ocount = re->top_backref * 2 + 2; |
ocount = re->top_backref * 2 + 2; |
3385 |
match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int)); |
match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int)); |
3386 |
if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY; |
if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY; |
3387 |
#ifdef DEBUG |
DPRINTF(("Got memory to hold back references\n")); |
|
printf("Got memory to hold back references\n"); |
|
|
#endif |
|
3388 |
} |
} |
3389 |
else match_block.offset_vector = offsets; |
else match_block.offset_vector = offsets; |
3390 |
|
|
3437 |
|
|
3438 |
do |
do |
3439 |
{ |
{ |
3440 |
|
int rc; |
3441 |
register int *iptr = match_block.offset_vector; |
register int *iptr = match_block.offset_vector; |
3442 |
register int *iend = iptr + resetcount; |
register int *iend = iptr + resetcount; |
3443 |
|
|
3479 |
} |
} |
3480 |
} |
} |
3481 |
|
|
3482 |
#ifdef DEBUG |
#ifdef DEBUG /* Sigh. Some compilers never learn. */ |
3483 |
printf(">>>> Match against: "); |
printf(">>>> Match against: "); |
3484 |
pchars(start_match, end_subject - start_match, TRUE, &match_block); |
pchars(start_match, end_subject - start_match, TRUE, &match_block); |
3485 |
printf("\n"); |
printf("\n"); |
3486 |
#endif |
#endif |
3487 |
|
|
3488 |
/* When a match occurs, substrings will be set for all internal extractions; |
/* When a match occurs, substrings will be set for all internal extractions; |
3489 |
we just need to set up the whole thing as substring 0 before returning. If |
we just need to set up the whole thing as substring 0 before returning. If |
3493 |
if certain parts of the pattern were not used. |
if certain parts of the pattern were not used. |
3494 |
|
|
3495 |
Before starting the match, we have to set up a longjmp() target to enable |
Before starting the match, we have to set up a longjmp() target to enable |
3496 |
the "cut" operation to fail a match completely without backtracking. */ |
the "cut" operation to fail a match completely without backtracking. This |
3497 |
|
is done in a separate function to avoid compiler warnings. We need not do |
3498 |
if (setjmp(match_block.fail_env) == 0 && |
it unless PCRE_EXTRA is set, since only in that case is the "cut" operation |
3499 |
match(start_match, re->code, 2, &match_block)) |
enabled. */ |
|
{ |
|
|
int rc; |
|
3500 |
|
|
3501 |
if (ocount != offsetcount) |
if (((re->options & PCRE_EXTRA) != 0 && my_setjmp(&match_block) != 0) || |
3502 |
{ |
!match(start_match, re->code, 2, &match_block)) |
3503 |
if (offsetcount >= 4) |
continue; |
|
{ |
|
|
memcpy(offsets + 2, match_block.offset_vector + 2, |
|
|
(offsetcount - 2) * sizeof(int)); |
|
|
#ifdef DEBUG |
|
|
printf("Copied offsets; freeing temporary memory\n"); |
|
|
#endif |
|
|
} |
|
|
if (match_block.end_offset_top > offsetcount) |
|
|
match_block.offset_overflow = TRUE; |
|
3504 |
|
|
3505 |
#ifdef DEBUG |
/* Copy the offset information from temporary store if necessary */ |
|
printf("Freeing temporary memory\n"); |
|
|
#endif |
|
3506 |
|
|
3507 |
(pcre_free)(match_block.offset_vector); |
if (ocount != offsetcount) |
3508 |
|
{ |
3509 |
|
if (offsetcount >= 4) |
3510 |
|
{ |
3511 |
|
memcpy(offsets + 2, match_block.offset_vector + 2, |
3512 |
|
(offsetcount - 2) * sizeof(int)); |
3513 |
|
DPRINTF(("Copied offsets; freeing temporary memory\n")); |
3514 |
} |
} |
3515 |
|
if (match_block.end_offset_top > offsetcount) |
3516 |
|
match_block.offset_overflow = TRUE; |
3517 |
|
|
3518 |
rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2; |
DPRINTF(("Freeing temporary memory\n")); |
3519 |
|
(pcre_free)(match_block.offset_vector); |
3520 |
|
} |
3521 |
|
|
3522 |
if (match_block.offset_end < 2) rc = 0; else |
rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2; |
|
{ |
|
|
offsets[0] = start_match - match_block.start_subject; |
|
|
offsets[1] = match_block.end_match_ptr - match_block.start_subject; |
|
|
} |
|
3523 |
|
|
3524 |
#ifdef DEBUG |
if (match_block.offset_end < 2) rc = 0; else |
3525 |
printf(">>>> returning %d\n", rc); |
{ |
3526 |
#endif |
offsets[0] = start_match - match_block.start_subject; |
3527 |
return rc; |
offsets[1] = match_block.end_match_ptr - match_block.start_subject; |
3528 |
} |
} |
3529 |
|
|
3530 |
|
DPRINTF((">>>> returning %d\n", rc)); |
3531 |
|
return rc; |
3532 |
} |
} |
3533 |
while (!anchored && |
while (!anchored && |
3534 |
match_block.errorcode == PCRE_ERROR_NOMATCH && |
match_block.errorcode == PCRE_ERROR_NOMATCH && |
3535 |
start_match++ < end_subject); |
start_match++ < end_subject); |
3536 |
|
|
3537 |
#ifdef DEBUG |
DPRINTF((">>>> returning %d\n", match_block.errorcode)); |
|
printf(">>>> returning %d\n", match_block.errorcode); |
|
|
#endif |
|
3538 |
|
|
3539 |
return match_block.errorcode; |
return match_block.errorcode; |
3540 |
} |
} |