9 |
|
|
10 |
Written by: Philip Hazel <ph10@cam.ac.uk> |
Written by: Philip Hazel <ph10@cam.ac.uk> |
11 |
|
|
12 |
Copyright (c) 1997 University of Cambridge |
Copyright (c) 1998 University of Cambridge |
13 |
|
|
14 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
15 |
Permission is granted to anyone to use this software for any purpose on any |
Permission is granted to anyone to use this software for any purpose on any |
33 |
|
|
34 |
/* #define DEBUG */ |
/* #define DEBUG */ |
35 |
|
|
36 |
|
/* Use a macro for debugging printing, 'cause that eliminates the the use |
37 |
|
of #ifdef inline, and there are *still* stupid compilers about that don't like |
38 |
|
indented pre-processor statements. I suppose it's only been 10 years... */ |
39 |
|
|
40 |
|
#ifdef DEBUG |
41 |
|
#define DPRINTF(p) printf p |
42 |
|
#else |
43 |
|
#define DPRINTF(p) /*nothing*/ |
44 |
|
#endif |
45 |
|
|
46 |
/* Include the internals header, which itself includes Standard C headers plus |
/* Include the internals header, which itself includes Standard C headers plus |
47 |
the external pcre header. */ |
the external pcre header. */ |
49 |
#include "internal.h" |
#include "internal.h" |
50 |
|
|
51 |
|
|
52 |
|
/* Allow compilation as C++ source code, should anybody want to do that. */ |
53 |
|
|
54 |
|
#ifdef __cplusplus |
55 |
|
#define class pcre_class |
56 |
|
#endif |
57 |
|
|
58 |
|
|
59 |
/* Min and max values for the common repeats; for the maxima, 0 => infinity */ |
/* Min and max values for the common repeats; for the maxima, 0 => infinity */ |
60 |
|
|
61 |
static char rep_min[] = { 0, 0, 1, 1, 0, 0 }; |
static const char rep_min[] = { 0, 0, 1, 1, 0, 0 }; |
62 |
static char rep_max[] = { 0, 0, 0, 0, 1, 1 }; |
static const char rep_max[] = { 0, 0, 0, 0, 1, 1 }; |
63 |
|
|
64 |
/* Text forms of OP_ values and things, for debugging */ |
/* Text forms of OP_ values and things, for debugging (not all used) */ |
65 |
|
|
66 |
#ifdef DEBUG |
#ifdef DEBUG |
67 |
static char *OP_names[] = { "End", "\\A", "\\B", "\\b", "\\D", "\\d", |
static const char *OP_names[] = { |
68 |
|
"End", "\\A", "\\B", "\\b", "\\D", "\\d", |
69 |
"\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars", |
"\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars", |
70 |
"not", |
"not", |
71 |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
72 |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
73 |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
74 |
"*", "*?", "+", "+?", "?", "??", "{", "{", |
"*", "*?", "+", "+?", "?", "??", "{", "{", |
75 |
"class", "Ref", |
"class", "negclass", "Ref", |
76 |
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once", |
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once", |
77 |
"Brazero", "Braminzero", "Bra" |
"Brazero", "Braminzero", "Bra" |
78 |
}; |
}; |
83 |
on. Zero means further processing is needed (for things like \x), or the escape |
on. Zero means further processing is needed (for things like \x), or the escape |
84 |
is invalid. */ |
is invalid. */ |
85 |
|
|
86 |
static short int escapes[] = { |
static const short int escapes[] = { |
87 |
0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */ |
0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */ |
88 |
0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */ |
0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */ |
89 |
'@', -ESC_A, -ESC_B, 0, -ESC_D, 0, 0, 0, /* @ - G */ |
'@', -ESC_A, -ESC_B, 0, -ESC_D, 0, 0, 0, /* @ - G */ |
98 |
|
|
99 |
/* Definition to allow mutual recursion */ |
/* Definition to allow mutual recursion */ |
100 |
|
|
101 |
static BOOL compile_regex(int, int *,uschar **,uschar **,char **); |
static BOOL |
102 |
|
compile_regex(int, int *, uschar **, const uschar **, const char **); |
103 |
|
|
104 |
/* Structure for passing "static" information around between the functions |
/* Structure for passing "static" information around between the functions |
105 |
doing the matching, so that they are thread-safe. */ |
doing the matching, so that they are thread-safe. */ |
116 |
BOOL noteol; /* NOTEOL flag */ |
BOOL noteol; /* NOTEOL flag */ |
117 |
BOOL dotall; /* Dot matches any char */ |
BOOL dotall; /* Dot matches any char */ |
118 |
BOOL endonly; /* Dollar not before final \n */ |
BOOL endonly; /* Dollar not before final \n */ |
119 |
uschar *start_subject; /* Start of the subject string */ |
const uschar *start_subject; /* Start of the subject string */ |
120 |
uschar *end_subject; /* End of the subject string */ |
const uschar *end_subject; /* End of the subject string */ |
121 |
jmp_buf fail_env; /* Environment for longjump() break out */ |
jmp_buf fail_env; /* Environment for longjump() break out */ |
122 |
uschar *end_match_ptr; /* Subject position at end match */ |
const uschar *end_match_ptr; /* Subject position at end match */ |
123 |
int end_offset_top; /* Highwater mark at end of match */ |
int end_offset_top; /* Highwater mark at end of match */ |
124 |
} match_data; |
} match_data; |
125 |
|
|
144 |
* Return version string * |
* Return version string * |
145 |
*************************************************/ |
*************************************************/ |
146 |
|
|
147 |
char * |
const char * |
148 |
pcre_version(void) |
pcre_version(void) |
149 |
{ |
{ |
150 |
return PCRE_VERSION; |
return PCRE_VERSION; |
174 |
int |
int |
175 |
pcre_info(const pcre *external_re, int *optptr, int *first_char) |
pcre_info(const pcre *external_re, int *optptr, int *first_char) |
176 |
{ |
{ |
177 |
real_pcre *re = (real_pcre *)external_re; |
const real_pcre *re = (const real_pcre *)external_re; |
178 |
if (re == NULL) return PCRE_ERROR_NULL; |
if (re == NULL) return PCRE_ERROR_NULL; |
179 |
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
180 |
if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS); |
if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS); |
204 |
Returns: nothing |
Returns: nothing |
205 |
*/ |
*/ |
206 |
|
|
207 |
static pchars(uschar *p, int length, BOOL is_subject, match_data *md) |
static void |
208 |
|
pchars(const uschar *p, int length, BOOL is_subject, match_data *md) |
209 |
{ |
{ |
210 |
int c; |
int c; |
211 |
if (is_subject && length > md->end_subject - p) length = md->end_subject - p; |
if (is_subject && length > md->end_subject - p) length = md->end_subject - p; |
247 |
/* Test an embedded subpattern; if it could not be empty, break the |
/* Test an embedded subpattern; if it could not be empty, break the |
248 |
loop. Otherwise carry on in the branch. */ |
loop. Otherwise carry on in the branch. */ |
249 |
|
|
250 |
if ((int)(*cc) >= OP_BRA) |
if ((int)(*cc) >= OP_BRA || (int)(*cc) == OP_ONCE) |
251 |
{ |
{ |
252 |
if (!could_be_empty(cc)) break; |
if (!could_be_empty(cc)) break; |
253 |
do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT); |
do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT); |
264 |
case OP_KETRMIN: |
case OP_KETRMIN: |
265 |
return TRUE; |
return TRUE; |
266 |
|
|
267 |
|
/* Skip over entire bracket groups with zero lower bound */ |
268 |
|
|
269 |
|
case OP_BRAZERO: |
270 |
|
case OP_BRAMINZERO: |
271 |
|
cc++; |
272 |
|
/* Fall through */ |
273 |
|
|
274 |
/* Skip over assertive subpatterns */ |
/* Skip over assertive subpatterns */ |
275 |
|
|
276 |
case OP_ASSERT: |
case OP_ASSERT: |
285 |
case OP_EOD: |
case OP_EOD: |
286 |
case OP_CIRC: |
case OP_CIRC: |
287 |
case OP_DOLL: |
case OP_DOLL: |
|
case OP_BRAZERO: |
|
|
case OP_BRAMINZERO: |
|
288 |
case OP_NOT_WORD_BOUNDARY: |
case OP_NOT_WORD_BOUNDARY: |
289 |
case OP_WORD_BOUNDARY: |
case OP_WORD_BOUNDARY: |
290 |
cc++; |
cc++; |
296 |
case OP_MINSTAR: |
case OP_MINSTAR: |
297 |
case OP_QUERY: |
case OP_QUERY: |
298 |
case OP_MINQUERY: |
case OP_MINQUERY: |
299 |
|
case OP_NOTSTAR: |
300 |
|
case OP_NOTMINSTAR: |
301 |
|
case OP_NOTQUERY: |
302 |
|
case OP_NOTMINQUERY: |
303 |
case OP_TYPESTAR: |
case OP_TYPESTAR: |
304 |
case OP_TYPEMINSTAR: |
case OP_TYPEMINSTAR: |
305 |
case OP_TYPEQUERY: |
case OP_TYPEQUERY: |
319 |
/* Check a class or a back reference for a zero minimum */ |
/* Check a class or a back reference for a zero minimum */ |
320 |
|
|
321 |
case OP_CLASS: |
case OP_CLASS: |
322 |
|
case OP_NEGCLASS: |
323 |
case OP_REF: |
case OP_REF: |
324 |
cc += (*cc == OP_REF)? 2 : 4 + 2 * cc[2] + cc[3]; |
cc += (*cc == OP_REF)? 2 : 33; |
325 |
|
|
326 |
switch (*cc) |
switch (*cc) |
327 |
{ |
{ |
385 |
*/ |
*/ |
386 |
|
|
387 |
static int |
static int |
388 |
check_escape(uschar **ptrptr, char **errorptr, int bracount, int options, |
check_escape(const uschar **ptrptr, const char **errorptr, int bracount, |
389 |
BOOL isclass) |
int options, BOOL isclass) |
390 |
{ |
{ |
391 |
uschar *ptr = *ptrptr; |
const uschar *ptr = *ptrptr; |
392 |
int c = *(++ptr) & 255; /* Ensure > 0 on signed-char systems */ |
int c = *(++ptr) & 255; /* Ensure > 0 on signed-char systems */ |
393 |
int i; |
int i; |
394 |
|
|
407 |
|
|
408 |
else |
else |
409 |
{ |
{ |
410 |
uschar *oldptr; |
const uschar *oldptr; |
411 |
switch (c) |
switch (c) |
412 |
{ |
{ |
413 |
/* The handling of escape sequences consisting of a string of digits |
/* The handling of escape sequences consisting of a string of digits |
527 |
*/ |
*/ |
528 |
|
|
529 |
static BOOL |
static BOOL |
530 |
is_counted_repeat(uschar *p) |
is_counted_repeat(const uschar *p) |
531 |
{ |
{ |
532 |
if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE; |
if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE; |
533 |
while ((pcre_ctypes[*p] & ctype_digit) != 0) p++; |
while ((pcre_ctypes[*p] & ctype_digit) != 0) p++; |
562 |
current ptr on error, with errorptr set |
current ptr on error, with errorptr set |
563 |
*/ |
*/ |
564 |
|
|
565 |
static uschar * |
static const uschar * |
566 |
read_repeat_counts(uschar *p, int *minp, int *maxp, char **errorptr) |
read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr) |
567 |
{ |
{ |
568 |
int min = 0; |
int min = 0; |
569 |
int max = -1; |
int max = -1; |
617 |
*/ |
*/ |
618 |
|
|
619 |
static BOOL |
static BOOL |
620 |
compile_branch(int options, int *brackets, uschar **codeptr, uschar **ptrptr, |
compile_branch(int options, int *brackets, uschar **codeptr, |
621 |
char **errorptr) |
const uschar **ptrptr, const char **errorptr) |
622 |
{ |
{ |
623 |
int repeat_type, op_type; |
int repeat_type, op_type; |
624 |
int repeat_min, repeat_max; |
int repeat_min, repeat_max; |
625 |
int bravalue, length; |
int bravalue, length; |
626 |
register int c; |
register int c; |
627 |
register uschar *code = *codeptr; |
register uschar *code = *codeptr; |
628 |
uschar *ptr = *ptrptr; |
const uschar *ptr = *ptrptr; |
629 |
|
const uschar *oldptr; |
630 |
uschar *previous = NULL; |
uschar *previous = NULL; |
|
uschar *oldptr; |
|
631 |
uschar class[32]; |
uschar class[32]; |
632 |
|
|
633 |
/* Switch on next character until the end of the branch */ |
/* Switch on next character until the end of the branch */ |
684 |
|
|
685 |
case '[': |
case '[': |
686 |
previous = code; |
previous = code; |
|
*code++ = OP_CLASS; |
|
687 |
|
|
688 |
/* If the first character is '^', set the negation flag */ |
/* If the first character is '^', set the negation flag, and use a |
689 |
|
different opcode. This only matters if caseless matching is specified at |
690 |
|
runtime. */ |
691 |
|
|
692 |
if ((c = *(++ptr)) == '^') |
if ((c = *(++ptr)) == '^') |
693 |
{ |
{ |
694 |
negate_class = TRUE; |
negate_class = TRUE; |
695 |
|
*code++ = OP_NEGCLASS; |
696 |
c = *(++ptr); |
c = *(++ptr); |
697 |
} |
} |
698 |
else negate_class = FALSE; |
else |
699 |
|
{ |
700 |
|
negate_class = FALSE; |
701 |
|
*code++ = OP_CLASS; |
702 |
|
} |
703 |
|
|
704 |
/* Keep a count of chars so that we can optimize the case of just a single |
/* Keep a count of chars so that we can optimize the case of just a single |
705 |
character. */ |
character. */ |
728 |
/* Backslash may introduce a single character, or it may introduce one |
/* Backslash may introduce a single character, or it may introduce one |
729 |
of the specials, which just set a flag. Escaped items are checked for |
of the specials, which just set a flag. Escaped items are checked for |
730 |
validity in the pre-compiling pass. The sequence \b is a special case. |
validity in the pre-compiling pass. The sequence \b is a special case. |
731 |
Inside a class (and only there) it is treated as backslash. Elsewhere |
Inside a class (and only there) it is treated as backspace. Elsewhere |
732 |
it marks a word boundary. Other escapes have preset maps ready to |
it marks a word boundary. Other escapes have preset maps ready to |
733 |
or into the one we are building. We assume they have more than one |
or into the one we are building. We assume they have more than one |
734 |
character in them, so set class_count bigger than one. */ |
character in them, so set class_count bigger than one. */ |
1007 |
if (code == previous) code += 2; else previous[1]++; |
if (code == previous) code += 2; else previous[1]++; |
1008 |
} |
} |
1009 |
|
|
1010 |
/* Insert an UPTO if the max is greater than the min. */ |
/* If the maximum is unlimited, insert an OP_STAR. */ |
1011 |
|
|
1012 |
|
if (repeat_max < 0) |
1013 |
|
{ |
1014 |
|
*code++ = c; |
1015 |
|
*code++ = OP_STAR + repeat_type; |
1016 |
|
} |
1017 |
|
|
1018 |
|
/* Else insert an UPTO if the max is greater than the min. */ |
1019 |
|
|
1020 |
if (repeat_max != repeat_min) |
else if (repeat_max != repeat_min) |
1021 |
{ |
{ |
1022 |
*code++ = c; |
*code++ = c; |
1023 |
repeat_max -= repeat_min; |
repeat_max -= repeat_min; |
1035 |
/* If previous was a character class or a back reference, we put the repeat |
/* If previous was a character class or a back reference, we put the repeat |
1036 |
stuff after it. */ |
stuff after it. */ |
1037 |
|
|
1038 |
else if (*previous == OP_CLASS || *previous == OP_REF) |
else if (*previous == OP_CLASS || *previous == OP_NEGCLASS || |
1039 |
|
*previous == OP_REF) |
1040 |
{ |
{ |
1041 |
if (repeat_min == 0 && repeat_max == -1) |
if (repeat_min == 0 && repeat_max == -1) |
1042 |
*code++ = OP_CRSTAR + repeat_type; |
*code++ = OP_CRSTAR + repeat_type; |
1062 |
else if ((int)*previous >= OP_BRA) |
else if ((int)*previous >= OP_BRA) |
1063 |
{ |
{ |
1064 |
int i; |
int i; |
1065 |
int length = code - previous; |
int len = code - previous; |
1066 |
|
|
1067 |
if (repeat_max == -1 && could_be_empty(previous)) |
if (repeat_max == -1 && could_be_empty(previous)) |
1068 |
{ |
{ |
1079 |
{ |
{ |
1080 |
for (i = 1; i < repeat_min; i++) |
for (i = 1; i < repeat_min; i++) |
1081 |
{ |
{ |
1082 |
memcpy(code, previous, length); |
memcpy(code, previous, len); |
1083 |
code += length; |
code += len; |
1084 |
} |
} |
1085 |
} |
} |
1086 |
|
|
1092 |
{ |
{ |
1093 |
if (repeat_min == 0) |
if (repeat_min == 0) |
1094 |
{ |
{ |
1095 |
memmove(previous+1, previous, length); |
memmove(previous+1, previous, len); |
1096 |
code++; |
code++; |
1097 |
*previous++ = OP_BRAZERO + repeat_type; |
*previous++ = OP_BRAZERO + repeat_type; |
1098 |
} |
} |
1099 |
|
|
1100 |
for (i = 1; i < repeat_min; i++) |
for (i = 1; i < repeat_min; i++) |
1101 |
{ |
{ |
1102 |
memcpy(code, previous, length); |
memcpy(code, previous, len); |
1103 |
code += length; |
code += len; |
1104 |
} |
} |
1105 |
|
|
1106 |
for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++) |
for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++) |
1107 |
{ |
{ |
1108 |
*code++ = OP_BRAZERO + repeat_type; |
*code++ = OP_BRAZERO + repeat_type; |
1109 |
memcpy(code, previous, length); |
memcpy(code, previous, len); |
1110 |
code += length; |
code += len; |
1111 |
} |
} |
1112 |
} |
} |
1113 |
|
|
1254 |
continue; |
continue; |
1255 |
} |
} |
1256 |
|
|
1257 |
/* Reset and fall through */ |
/* Data character: reset and fall through */ |
1258 |
|
|
1259 |
ptr = oldptr; |
ptr = oldptr; |
1260 |
c = '\\'; |
c = '\\'; |
1308 |
the next state. */ |
the next state. */ |
1309 |
|
|
1310 |
previous[1] = length; |
previous[1] = length; |
1311 |
ptr--; |
if (length < 255) ptr--; |
1312 |
break; |
break; |
1313 |
} |
} |
1314 |
} /* end of big loop */ |
} /* end of big loop */ |
1345 |
*/ |
*/ |
1346 |
|
|
1347 |
static BOOL |
static BOOL |
1348 |
compile_regex(int options, int *brackets, uschar **codeptr, uschar **ptrptr, |
compile_regex(int options, int *brackets, uschar **codeptr, |
1349 |
char **errorptr) |
const uschar **ptrptr, const char **errorptr) |
1350 |
{ |
{ |
1351 |
uschar *ptr = *ptrptr; |
const uschar *ptr = *ptrptr; |
1352 |
uschar *code = *codeptr; |
uschar *code = *codeptr; |
1353 |
uschar *start_bracket = code; |
uschar *start_bracket = code; |
1354 |
|
|
1414 |
*/ |
*/ |
1415 |
|
|
1416 |
static BOOL |
static BOOL |
1417 |
is_anchored(register uschar *code, BOOL multiline) |
is_anchored(register const uschar *code, BOOL multiline) |
1418 |
{ |
{ |
1419 |
do { |
do { |
1420 |
int op = (int)code[3]; |
int op = (int)code[3]; |
1443 |
*/ |
*/ |
1444 |
|
|
1445 |
static BOOL |
static BOOL |
1446 |
is_startline(uschar *code) |
is_startline(const uschar *code) |
1447 |
{ |
{ |
1448 |
do { |
do { |
1449 |
if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT) |
if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT) |
1528 |
*/ |
*/ |
1529 |
|
|
1530 |
pcre * |
pcre * |
1531 |
pcre_compile(const char *pattern, int options, char **errorptr, |
pcre_compile(const char *pattern, int options, const char **errorptr, |
1532 |
int *erroroffset) |
int *erroroffset) |
1533 |
{ |
{ |
1534 |
real_pcre *re; |
real_pcre *re; |
1538 |
int c, size; |
int c, size; |
1539 |
int bracount = 0; |
int bracount = 0; |
1540 |
int brastack[200]; |
int brastack[200]; |
|
int brastackptr = 0; |
|
1541 |
int top_backref = 0; |
int top_backref = 0; |
1542 |
uschar *code, *ptr; |
unsigned int brastackptr = 0; |
1543 |
|
uschar *code; |
1544 |
|
const uschar *ptr; |
1545 |
|
|
1546 |
#ifdef DEBUG |
#ifdef DEBUG |
1547 |
uschar *code_base, *code_end; |
uschar *code_base, *code_end; |
1568 |
return NULL; |
return NULL; |
1569 |
} |
} |
1570 |
|
|
1571 |
#ifdef DEBUG |
DPRINTF(("------------------------------------------------------------------\n")); |
1572 |
printf("------------------------------------------------------------------\n"); |
DPRINTF(("%s\n", pattern)); |
|
printf("%s\n", pattern); |
|
|
#endif |
|
1573 |
|
|
1574 |
/* The first thing to do is to make a pass over the pattern to compute the |
/* The first thing to do is to make a pass over the pattern to compute the |
1575 |
amount of store required to hold the compiled code. This does not have to be |
amount of store required to hold the compiled code. This does not have to be |
1578 |
if an "extended" flag setting appears late in the pattern. We can't be so |
if an "extended" flag setting appears late in the pattern. We can't be so |
1579 |
clever for #-comments. */ |
clever for #-comments. */ |
1580 |
|
|
1581 |
ptr = (uschar *)(pattern - 1); |
ptr = (const uschar *)(pattern - 1); |
1582 |
while ((c = *(++ptr)) != 0) |
while ((c = *(++ptr)) != 0) |
1583 |
{ |
{ |
1584 |
int min, max; |
int min, max; |
1605 |
|
|
1606 |
case '\\': |
case '\\': |
1607 |
{ |
{ |
1608 |
uschar *save_ptr = ptr; |
const uschar *save_ptr = ptr; |
1609 |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
1610 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
1611 |
if (c >= 0) |
if (c >= 0) |
1684 |
{ |
{ |
1685 |
if (*ptr == '\\') |
if (*ptr == '\\') |
1686 |
{ |
{ |
1687 |
int c = check_escape(&ptr, errorptr, bracount, options, TRUE); |
int ch = check_escape(&ptr, errorptr, bracount, options, TRUE); |
1688 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
1689 |
if (-c == ESC_b) class_charcount++; else class_charcount = 10; |
if (-ch == ESC_b) class_charcount++; else class_charcount = 10; |
1690 |
} |
} |
1691 |
else class_charcount++; |
else class_charcount++; |
1692 |
ptr++; |
ptr++; |
1701 |
|
|
1702 |
/* A repeat needs either 1 or 5 bytes. */ |
/* A repeat needs either 1 or 5 bytes. */ |
1703 |
|
|
1704 |
if (ptr[1] == '{' && is_counted_repeat(ptr+2)) |
if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2)) |
1705 |
{ |
{ |
1706 |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr); |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr); |
1707 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
1809 |
continue; |
continue; |
1810 |
|
|
1811 |
/* Handle ket. Look for subsequent max/min; for certain sets of values we |
/* Handle ket. Look for subsequent max/min; for certain sets of values we |
1812 |
have to replicate this bracket up to that many times. */ |
have to replicate this bracket up to that many times. If brastackptr is |
1813 |
|
0 this is an unmatched bracket which will generate an error, but take care |
1814 |
|
not to try to access brastack[-1]. */ |
1815 |
|
|
1816 |
case ')': |
case ')': |
1817 |
length += 3; |
length += 3; |
1818 |
{ |
{ |
1819 |
int min = 1; |
int minval = 1; |
1820 |
int max = 1; |
int maxval = 1; |
1821 |
int duplength = length - brastack[--brastackptr]; |
int duplength = (brastackptr > 0)? length - brastack[--brastackptr] : 0; |
1822 |
|
|
1823 |
/* Leave ptr at the final char; for read_repeat_counts this happens |
/* Leave ptr at the final char; for read_repeat_counts this happens |
1824 |
automatically; for the others we need an increment. */ |
automatically; for the others we need an increment. */ |
1825 |
|
|
1826 |
if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2)) |
if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2)) |
1827 |
{ |
{ |
1828 |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr); |
ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr); |
1829 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
1830 |
} |
} |
1831 |
else if (c == '*') { min = 0; max = -1; ptr++; } |
else if (c == '*') { minval = 0; maxval = -1; ptr++; } |
1832 |
else if (c == '+') { max = -1; ptr++; } |
else if (c == '+') { maxval = -1; ptr++; } |
1833 |
else if (c == '?') { min = 0; ptr++; } |
else if (c == '?') { minval = 0; ptr++; } |
1834 |
|
|
1835 |
/* If there is a minimum > 1 we have to replicate up to min-1 times; if |
/* If there is a minimum > 1 we have to replicate up to minval-1 times; |
1836 |
there is a limited maximum we have to replicate up to max-1 times and |
if there is a limited maximum we have to replicate up to maxval-1 times |
1837 |
allow for a BRAZERO item before each optional copy, as we also have to |
and allow for a BRAZERO item before each optional copy, as we also have |
1838 |
do before the first copy if the minimum is zero. */ |
to do before the first copy if the minimum is zero. */ |
1839 |
|
|
1840 |
if (min == 0) length++; |
if (minval == 0) length++; |
1841 |
else if (min > 1) length += (min - 1) * duplength; |
else if (minval > 1) length += (minval - 1) * duplength; |
1842 |
if (max > min) length += (max - min) * (duplength + 1); |
if (maxval > minval) length += (maxval - minval) * (duplength + 1); |
1843 |
} |
} |
|
|
|
1844 |
continue; |
continue; |
1845 |
|
|
1846 |
/* Non-special character. For a run of such characters the length required |
/* Non-special character. For a run of such characters the length required |
1871 |
|
|
1872 |
if (c == '\\') |
if (c == '\\') |
1873 |
{ |
{ |
1874 |
uschar *saveptr = ptr; |
const uschar *saveptr = ptr; |
1875 |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
1876 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
1877 |
if (c < 0) { ptr = saveptr; break; } |
if (c < 0) { ptr = saveptr; break; } |
1901 |
} |
} |
1902 |
|
|
1903 |
/* Compute the size of data block needed and get it, either from malloc or |
/* Compute the size of data block needed and get it, either from malloc or |
1904 |
externally provided function. Put in the magic number and the options. */ |
externally provided function. We specify "code[0]" in the offsetof() expression |
1905 |
|
rather than just "code", because it has been reported that one broken compiler |
1906 |
|
fails on "code" because it is also an independent variable. It should make no |
1907 |
|
difference to the value of the offsetof(). */ |
1908 |
|
|
1909 |
size = length + offsetof(real_pcre, code); |
size = length + offsetof(real_pcre, code[0]); |
1910 |
re = (real_pcre *)(pcre_malloc)(size); |
re = (real_pcre *)(pcre_malloc)(size); |
1911 |
|
|
1912 |
if (re == NULL) |
if (re == NULL) |
1915 |
return NULL; |
return NULL; |
1916 |
} |
} |
1917 |
|
|
1918 |
|
/* Put in the magic number and the options. */ |
1919 |
|
|
1920 |
re->magic_number = MAGIC_NUMBER; |
re->magic_number = MAGIC_NUMBER; |
1921 |
re->options = options; |
re->options = options; |
1922 |
|
|
1924 |
error, *errorptr will be set non-NULL, so we don't need to look at the result |
error, *errorptr will be set non-NULL, so we don't need to look at the result |
1925 |
of the function here. */ |
of the function here. */ |
1926 |
|
|
1927 |
ptr = (uschar *)pattern; |
ptr = (const uschar *)pattern; |
1928 |
code = re->code; |
code = re->code; |
1929 |
*code = OP_BRA; |
*code = OP_BRA; |
1930 |
bracount = 0; |
bracount = 0; |
1951 |
{ |
{ |
1952 |
(pcre_free)(re); |
(pcre_free)(re); |
1953 |
PCRE_ERROR_RETURN: |
PCRE_ERROR_RETURN: |
1954 |
*erroroffset = ptr - (uschar *)pattern; |
*erroroffset = ptr - (const uschar *)pattern; |
1955 |
return NULL; |
return NULL; |
1956 |
} |
} |
1957 |
|
|
1967 |
re->options |= PCRE_ANCHORED; |
re->options |= PCRE_ANCHORED; |
1968 |
else |
else |
1969 |
{ |
{ |
1970 |
int c = find_firstchar(re->code); |
int ch = find_firstchar(re->code); |
1971 |
if (c >= 0) |
if (ch >= 0) |
1972 |
{ |
{ |
1973 |
re->first_char = c; |
re->first_char = ch; |
1974 |
re->options |= PCRE_FIRSTSET; |
re->options |= PCRE_FIRSTSET; |
1975 |
} |
} |
1976 |
else if (is_startline(re->code)) |
else if (is_startline(re->code)) |
2062 |
case OP_MINUPTO: |
case OP_MINUPTO: |
2063 |
if (isprint(c = code[3])) printf(" %c{", c); |
if (isprint(c = code[3])) printf(" %c{", c); |
2064 |
else printf(" \\x%02x{", c); |
else printf(" \\x%02x{", c); |
2065 |
if (*code != OP_EXACT) printf(","); |
if (*code != OP_EXACT) printf("0,"); |
2066 |
printf("%d}", (code[1] << 8) + code[2]); |
printf("%d}", (code[1] << 8) + code[2]); |
2067 |
if (*code == OP_MINUPTO) printf("?"); |
if (*code == OP_MINUPTO) printf("?"); |
2068 |
code += 3; |
code += 3; |
2107 |
|
|
2108 |
case OP_REF: |
case OP_REF: |
2109 |
printf(" \\%d", *(++code)); |
printf(" \\%d", *(++code)); |
2110 |
break; |
code ++; |
2111 |
|
goto CLASS_REF_REPEAT; |
2112 |
|
|
2113 |
case OP_CLASS: |
case OP_CLASS: |
2114 |
|
case OP_NEGCLASS: |
2115 |
{ |
{ |
2116 |
int i, min, max; |
int i, min, max; |
2117 |
|
|
2118 |
code++; |
if (*code++ == OP_CLASS) printf(" ["); |
2119 |
printf(" ["); |
else printf(" ^["); |
2120 |
|
|
2121 |
for (i = 0; i < 256; i++) |
for (i = 0; i < 256; i++) |
2122 |
{ |
{ |
2139 |
printf("]"); |
printf("]"); |
2140 |
code += 32; |
code += 32; |
2141 |
|
|
2142 |
|
CLASS_REF_REPEAT: |
2143 |
|
|
2144 |
switch(*code) |
switch(*code) |
2145 |
{ |
{ |
2146 |
case OP_CRSTAR: |
case OP_CRSTAR: |
2253 |
*/ |
*/ |
2254 |
|
|
2255 |
static BOOL |
static BOOL |
2256 |
match_ref(int number, register uschar *eptr, int length, match_data *md) |
match_ref(int number, register const uschar *eptr, int length, match_data *md) |
2257 |
{ |
{ |
2258 |
uschar *p = md->start_subject + md->offset_vector[number]; |
const uschar *p = md->start_subject + md->offset_vector[number]; |
2259 |
|
|
2260 |
#ifdef DEBUG |
#ifdef DEBUG |
2261 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
2302 |
*/ |
*/ |
2303 |
|
|
2304 |
static BOOL |
static BOOL |
2305 |
match(register uschar *eptr, register uschar *ecode, int offset_top, |
match(register const uschar *eptr, register const uschar *ecode, int offset_top, |
2306 |
match_data *md) |
match_data *md) |
2307 |
{ |
{ |
2308 |
for (;;) |
for (;;) |
2310 |
int min, max, ctype; |
int min, max, ctype; |
2311 |
register int i; |
register int i; |
2312 |
register int c; |
register int c; |
2313 |
BOOL minimize; |
BOOL minimize = FALSE; |
2314 |
|
|
2315 |
/* Opening bracket. Check the alternative branches in turn, failing if none |
/* Opening bracket. Check the alternative branches in turn, failing if none |
2316 |
match. We have to set the start offset if required and there is space |
match. We have to set the start offset if required and there is space |
2323 |
if ((int)*ecode >= OP_BRA) |
if ((int)*ecode >= OP_BRA) |
2324 |
{ |
{ |
2325 |
int number = (*ecode - OP_BRA) << 1; |
int number = (*ecode - OP_BRA) << 1; |
2326 |
int save_offset1, save_offset2; |
int save_offset1 = 0, save_offset2 = 0; |
2327 |
|
|
2328 |
#ifdef DEBUG |
DPRINTF(("start bracket %d\n", number/2)); |
|
printf("start bracket %d\n", number/2); |
|
|
#endif |
|
2329 |
|
|
2330 |
if (number > 0 && number < md->offset_end) |
if (number > 0 && number < md->offset_end) |
2331 |
{ |
{ |
2333 |
save_offset2 = md->offset_vector[number+1]; |
save_offset2 = md->offset_vector[number+1]; |
2334 |
md->offset_vector[number] = eptr - md->start_subject; |
md->offset_vector[number] = eptr - md->start_subject; |
2335 |
|
|
2336 |
#ifdef DEBUG |
DPRINTF(("saving %d %d\n", save_offset1, save_offset2)); |
|
printf("saving %d %d\n", save_offset1, save_offset2); |
|
|
#endif |
|
2337 |
} |
} |
2338 |
|
|
2339 |
/* Recurse for all the alternatives. */ |
/* Recurse for all the alternatives. */ |
2345 |
} |
} |
2346 |
while (*ecode == OP_ALT); |
while (*ecode == OP_ALT); |
2347 |
|
|
2348 |
#ifdef DEBUG |
DPRINTF(("bracket %d failed\n", number/2)); |
|
printf("bracket %d failed\n", number/2); |
|
|
#endif |
|
2349 |
|
|
2350 |
if (number > 0 && number < md->offset_end) |
if (number > 0 && number < md->offset_end) |
2351 |
{ |
{ |
2407 |
|
|
2408 |
/* "Once" brackets are like assertion brackets except that after a match, |
/* "Once" brackets are like assertion brackets except that after a match, |
2409 |
the point in the subject string is not moved back. Thus there can never be |
the point in the subject string is not moved back. Thus there can never be |
2410 |
a back into the brackets. Check the alternative branches in turn - the |
a move back into the brackets. Check the alternative branches in turn - the |
2411 |
matching won't pass the KET for this kind of subpattern. If any one branch |
matching won't pass the KET for this kind of subpattern. If any one branch |
2412 |
matches, we carry on, leaving the subject pointer. */ |
matches, we carry on, leaving the subject pointer. */ |
2413 |
|
|
2444 |
|
|
2445 |
case OP_BRAZERO: |
case OP_BRAZERO: |
2446 |
{ |
{ |
2447 |
uschar *next = ecode+1; |
const uschar *next = ecode+1; |
2448 |
if (match(eptr, next, offset_top, md)) return TRUE; |
if (match(eptr, next, offset_top, md)) return TRUE; |
2449 |
do next += (next[1] << 8) + next[2]; while (*next == OP_ALT); |
do next += (next[1] << 8) + next[2]; while (*next == OP_ALT); |
2450 |
ecode = next + 3; |
ecode = next + 3; |
2453 |
|
|
2454 |
case OP_BRAMINZERO: |
case OP_BRAMINZERO: |
2455 |
{ |
{ |
2456 |
uschar *next = ecode+1; |
const uschar *next = ecode+1; |
2457 |
do next += (next[1] << 8) + next[2]; while (*next == OP_ALT); |
do next += (next[1] << 8) + next[2]; while (*next == OP_ALT); |
2458 |
if (match(eptr, next+3, offset_top, md)) return TRUE; |
if (match(eptr, next+3, offset_top, md)) return TRUE; |
2459 |
ecode++; |
ecode++; |
2469 |
case OP_KETRMAX: |
case OP_KETRMAX: |
2470 |
{ |
{ |
2471 |
int number; |
int number; |
2472 |
uschar *prev = ecode - (ecode[1] << 8) - ecode[2]; |
const uschar *prev = ecode - (ecode[1] << 8) - ecode[2]; |
2473 |
|
|
2474 |
if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE) |
if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE) |
2475 |
{ |
{ |
2484 |
|
|
2485 |
number = (*prev - OP_BRA) << 1; |
number = (*prev - OP_BRA) << 1; |
2486 |
|
|
2487 |
#ifdef DEBUG |
DPRINTF(("end bracket %d\n", number/2)); |
|
printf("end bracket %d\n", number/2); |
|
|
#endif |
|
2488 |
|
|
2489 |
if (number > 0) |
if (number > 0) |
2490 |
{ |
{ |
2716 |
|
|
2717 |
else |
else |
2718 |
{ |
{ |
2719 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
2720 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
2721 |
{ |
{ |
2722 |
if (!match_ref(number, eptr, length, md)) break; |
if (!match_ref(number, eptr, length, md)) break; |
2736 |
item to see if there is repeat information following. Then obey similar |
item to see if there is repeat information following. Then obey similar |
2737 |
code to character type repeats - written out again for speed. If caseless |
code to character type repeats - written out again for speed. If caseless |
2738 |
matching was set at runtime but not at compile time, we have to check both |
matching was set at runtime but not at compile time, we have to check both |
2739 |
versions of a character. */ |
versions of a character, and we have to behave differently for positive and |
2740 |
|
negative classes. This is the only time where OP_CLASS and OP_NEGCLASS are |
2741 |
|
treated differently. */ |
2742 |
|
|
2743 |
case OP_CLASS: |
case OP_CLASS: |
2744 |
|
case OP_NEGCLASS: |
2745 |
{ |
{ |
2746 |
uschar *data = ecode + 1; /* Save for matching */ |
BOOL nasty_case = *ecode == OP_NEGCLASS && md->runtime_caseless; |
2747 |
ecode += 33; /* Advance past the item */ |
const uschar *data = ecode + 1; /* Save for matching */ |
2748 |
|
ecode += 33; /* Advance past the item */ |
2749 |
|
|
2750 |
switch (*ecode) |
switch (*ecode) |
2751 |
{ |
{ |
2772 |
break; |
break; |
2773 |
|
|
2774 |
default: /* No repeat follows */ |
default: /* No repeat follows */ |
2775 |
if (eptr >= md->end_subject) return FALSE; |
min = max = 1; |
2776 |
c = *eptr++; |
break; |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; /* With main loop */ |
|
|
if (md->runtime_caseless) |
|
|
{ |
|
|
c = pcre_fcc[c]; |
|
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; /* With main loop */ |
|
|
} |
|
|
return FALSE; |
|
2777 |
} |
} |
2778 |
|
|
2779 |
/* First, ensure the minimum number of matches are present. */ |
/* First, ensure the minimum number of matches are present. */ |
2782 |
{ |
{ |
2783 |
if (eptr >= md->end_subject) return FALSE; |
if (eptr >= md->end_subject) return FALSE; |
2784 |
c = *eptr++; |
c = *eptr++; |
2785 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
|
2786 |
if (md->runtime_caseless) |
/* Either not runtime caseless, or it was a positive class. For |
2787 |
|
runtime caseless, continue if either case is in the map. */ |
2788 |
|
|
2789 |
|
if (!nasty_case) |
2790 |
{ |
{ |
2791 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
2792 |
|
if (md->runtime_caseless) |
2793 |
|
{ |
2794 |
|
c = pcre_fcc[c]; |
2795 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
2796 |
|
} |
2797 |
|
} |
2798 |
|
|
2799 |
|
/* Runtime caseless and it was a negative class. Continue only if |
2800 |
|
both cases are in the map. */ |
2801 |
|
|
2802 |
|
else |
2803 |
|
{ |
2804 |
|
if ((data[c/8] & (1 << (c&7))) == 0) return FALSE; |
2805 |
c = pcre_fcc[c]; |
c = pcre_fcc[c]; |
2806 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
2807 |
} |
} |
2808 |
|
|
2809 |
return FALSE; |
return FALSE; |
2810 |
} |
} |
2811 |
|
|
2824 |
if (match(eptr, ecode, offset_top, md)) return TRUE; |
if (match(eptr, ecode, offset_top, md)) return TRUE; |
2825 |
if (i >= max || eptr >= md->end_subject) return FALSE; |
if (i >= max || eptr >= md->end_subject) return FALSE; |
2826 |
c = *eptr++; |
c = *eptr++; |
2827 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
|
2828 |
if (md->runtime_caseless) |
/* Either not runtime caseless, or it was a positive class. For |
2829 |
|
runtime caseless, continue if either case is in the map. */ |
2830 |
|
|
2831 |
|
if (!nasty_case) |
2832 |
{ |
{ |
2833 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
2834 |
|
if (md->runtime_caseless) |
2835 |
|
{ |
2836 |
|
c = pcre_fcc[c]; |
2837 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
2838 |
|
} |
2839 |
|
} |
2840 |
|
|
2841 |
|
/* Runtime caseless and it was a negative class. Continue only if |
2842 |
|
both cases are in the map. */ |
2843 |
|
|
2844 |
|
else |
2845 |
|
{ |
2846 |
|
if ((data[c/8] & (1 << (c&7))) == 0) return FALSE; |
2847 |
c = pcre_fcc[c]; |
c = pcre_fcc[c]; |
2848 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
2849 |
} |
} |
2850 |
|
|
2851 |
return FALSE; |
return FALSE; |
2852 |
} |
} |
2853 |
/* Control never gets here */ |
/* Control never gets here */ |
2857 |
|
|
2858 |
else |
else |
2859 |
{ |
{ |
2860 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
2861 |
for (i = min; i < max; eptr++, i++) |
for (i = min; i < max; eptr++, i++) |
2862 |
{ |
{ |
2863 |
if (eptr >= md->end_subject) break; |
if (eptr >= md->end_subject) break; |
2864 |
c = *eptr; |
c = *eptr; |
2865 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
|
2866 |
if (md->runtime_caseless) |
/* Either not runtime caseless, or it was a positive class. For |
2867 |
|
runtime caseless, continue if either case is in the map. */ |
2868 |
|
|
2869 |
|
if (!nasty_case) |
2870 |
|
{ |
2871 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
2872 |
|
if (md->runtime_caseless) |
2873 |
|
{ |
2874 |
|
c = pcre_fcc[c]; |
2875 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
2876 |
|
} |
2877 |
|
} |
2878 |
|
|
2879 |
|
/* Runtime caseless and it was a negative class. Continue only if |
2880 |
|
both cases are in the map. */ |
2881 |
|
|
2882 |
|
else |
2883 |
{ |
{ |
2884 |
|
if ((data[c/8] & (1 << (c&7))) == 0) break; |
2885 |
c = pcre_fcc[c]; |
c = pcre_fcc[c]; |
2886 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
2887 |
} |
} |
2888 |
|
|
2889 |
break; |
break; |
2890 |
} |
} |
2891 |
|
|
2903 |
register int length = ecode[1]; |
register int length = ecode[1]; |
2904 |
ecode += 2; |
ecode += 2; |
2905 |
|
|
2906 |
#ifdef DEBUG |
#ifdef DEBUG /* Sigh. Some compilers never learn. */ |
2907 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
2908 |
printf("matching subject <null> against pattern "); |
printf("matching subject <null> against pattern "); |
2909 |
else |
else |
2914 |
} |
} |
2915 |
pchars(ecode, length, FALSE, md); |
pchars(ecode, length, FALSE, md); |
2916 |
printf("\n"); |
printf("\n"); |
2917 |
#endif |
#endif |
2918 |
|
|
2919 |
if (length > md->end_subject - eptr) return FALSE; |
if (length > md->end_subject - eptr) return FALSE; |
2920 |
if (md->caseless) |
if (md->caseless) |
2971 |
maximum. Alternatively, if maximizing, find the maximum number of |
maximum. Alternatively, if maximizing, find the maximum number of |
2972 |
characters and work backwards. */ |
characters and work backwards. */ |
2973 |
|
|
2974 |
#ifdef DEBUG |
DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max, |
2975 |
printf("matching %c{%d,%d} against subject %.*s\n", c, min, max, |
max, eptr)); |
|
max, eptr); |
|
|
#endif |
|
2976 |
|
|
2977 |
if (md->caseless) |
if (md->caseless) |
2978 |
{ |
{ |
2991 |
} |
} |
2992 |
else |
else |
2993 |
{ |
{ |
2994 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
2995 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
2996 |
{ |
{ |
2997 |
if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break; |
if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break; |
3021 |
} |
} |
3022 |
else |
else |
3023 |
{ |
{ |
3024 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
3025 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3026 |
{ |
{ |
3027 |
if (eptr >= md->end_subject || c != *eptr) break; |
if (eptr >= md->end_subject || c != *eptr) break; |
3037 |
/* Match a negated single character */ |
/* Match a negated single character */ |
3038 |
|
|
3039 |
case OP_NOT: |
case OP_NOT: |
3040 |
if (eptr > md->end_subject) return FALSE; |
if (eptr >= md->end_subject) return FALSE; |
3041 |
ecode++; |
ecode++; |
3042 |
if (md->caseless) |
if (md->caseless) |
3043 |
{ |
{ |
3096 |
maximum. Alternatively, if maximizing, find the maximum number of |
maximum. Alternatively, if maximizing, find the maximum number of |
3097 |
characters and work backwards. */ |
characters and work backwards. */ |
3098 |
|
|
3099 |
#ifdef DEBUG |
DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max, |
3100 |
printf("negative matching %c{%d,%d} against subject %.*s\n", c, min, max, |
max, eptr)); |
|
max, eptr); |
|
|
#endif |
|
3101 |
|
|
3102 |
if (md->caseless) |
if (md->caseless) |
3103 |
{ |
{ |
3116 |
} |
} |
3117 |
else |
else |
3118 |
{ |
{ |
3119 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
3120 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3121 |
{ |
{ |
3122 |
if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break; |
if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break; |
3146 |
} |
} |
3147 |
else |
else |
3148 |
{ |
{ |
3149 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
3150 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
3151 |
{ |
{ |
3152 |
if (eptr >= md->end_subject || c == *eptr) break; |
if (eptr >= md->end_subject || c == *eptr) break; |
3263 |
|
|
3264 |
else |
else |
3265 |
{ |
{ |
3266 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
3267 |
switch(ctype) |
switch(ctype) |
3268 |
{ |
{ |
3269 |
case OP_ANY: |
case OP_ANY: |
3347 |
/* There's been some horrible disaster. */ |
/* There's been some horrible disaster. */ |
3348 |
|
|
3349 |
default: |
default: |
3350 |
#ifdef DEBUG |
DPRINTF(("Unknown opcode %d\n", *ecode)); |
|
printf("Unknown opcode %d\n", *ecode); |
|
|
#endif |
|
3351 |
md->errorcode = PCRE_ERROR_UNKNOWN_NODE; |
md->errorcode = PCRE_ERROR_UNKNOWN_NODE; |
3352 |
return FALSE; |
return FALSE; |
3353 |
} |
} |
3363 |
|
|
3364 |
|
|
3365 |
/************************************************* |
/************************************************* |
3366 |
|
* Segregate setjmp() * |
3367 |
|
*************************************************/ |
3368 |
|
|
3369 |
|
/* The -Wall option of gcc gives warnings for all local variables when setjmp() |
3370 |
|
is used, even if the coding conforms to the rules of ANSI C. To avoid this, we |
3371 |
|
hide it in a separate function. This is called only when PCRE_EXTRA is set, |
3372 |
|
since it's needed only for the extension \X option, and with any luck, a good |
3373 |
|
compiler will spot the tail recursion and compile it efficiently. |
3374 |
|
|
3375 |
|
Arguments: |
3376 |
|
eptr pointer in subject |
3377 |
|
ecode position in code |
3378 |
|
offset_top current top pointer |
3379 |
|
md pointer to "static" info for the match |
3380 |
|
|
3381 |
|
Returns: TRUE if matched |
3382 |
|
*/ |
3383 |
|
|
3384 |
|
static BOOL |
3385 |
|
match_with_setjmp(const uschar *eptr, const uschar *ecode, int offset_top, |
3386 |
|
match_data *match_block) |
3387 |
|
{ |
3388 |
|
return setjmp(match_block->fail_env) == 0 && |
3389 |
|
match(eptr, ecode, offset_top, match_block); |
3390 |
|
} |
3391 |
|
|
3392 |
|
|
3393 |
|
|
3394 |
|
/************************************************* |
3395 |
* Execute a Regular Expression * |
* Execute a Regular Expression * |
3396 |
*************************************************/ |
*************************************************/ |
3397 |
|
|
3418 |
pcre_exec(const pcre *external_re, const pcre_extra *external_extra, |
pcre_exec(const pcre *external_re, const pcre_extra *external_extra, |
3419 |
const char *subject, int length, int options, int *offsets, int offsetcount) |
const char *subject, int length, int options, int *offsets, int offsetcount) |
3420 |
{ |
{ |
3421 |
int resetcount; |
int resetcount, ocount; |
|
int ocount = offsetcount; |
|
3422 |
int first_char = -1; |
int first_char = -1; |
3423 |
match_data match_block; |
match_data match_block; |
3424 |
uschar *start_bits = NULL; |
const uschar *start_bits = NULL; |
3425 |
uschar *start_match = (uschar *)subject; |
const uschar *start_match = (const uschar *)subject; |
3426 |
uschar *end_subject; |
const uschar *end_subject; |
3427 |
real_pcre *re = (real_pcre *)external_re; |
const real_pcre *re = (const real_pcre *)external_re; |
3428 |
real_pcre_extra *extra = (real_pcre_extra *)external_extra; |
const real_pcre_extra *extra = (const real_pcre_extra *)external_extra; |
3429 |
|
BOOL using_temporary_offsets = FALSE; |
3430 |
BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0; |
BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0; |
3431 |
BOOL startline = (re->options & PCRE_STARTLINE) != 0; |
BOOL startline = (re->options & PCRE_STARTLINE) != 0; |
3432 |
|
|
3436 |
(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; |
(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; |
3437 |
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
3438 |
|
|
3439 |
match_block.start_subject = (uschar *)subject; |
match_block.start_subject = (const uschar *)subject; |
3440 |
match_block.end_subject = match_block.start_subject + length; |
match_block.end_subject = match_block.start_subject + length; |
3441 |
end_subject = match_block.end_subject; |
end_subject = match_block.end_subject; |
3442 |
|
|
3455 |
|
|
3456 |
/* If the expression has got more back references than the offsets supplied can |
/* If the expression has got more back references than the offsets supplied can |
3457 |
hold, we get a temporary bit of working store to use during the matching. |
hold, we get a temporary bit of working store to use during the matching. |
3458 |
Otherwise, we can use the vector supplied, rounding down the size of it to a |
Otherwise, we can use the vector supplied, rounding down its size to a multiple |
3459 |
multiple of 2. */ |
of 2. */ |
3460 |
|
|
3461 |
ocount &= (-2); |
ocount = offsetcount & (-2); |
3462 |
if (re->top_backref > 0 && re->top_backref + 1 >= ocount/2) |
if (re->top_backref > 0 && re->top_backref >= ocount/2) |
3463 |
{ |
{ |
3464 |
ocount = re->top_backref * 2 + 2; |
ocount = re->top_backref * 2 + 2; |
3465 |
match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int)); |
match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int)); |
3466 |
if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY; |
if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY; |
3467 |
#ifdef DEBUG |
using_temporary_offsets = TRUE; |
3468 |
printf("Got memory to hold back references\n"); |
DPRINTF(("Got memory to hold back references\n")); |
|
#endif |
|
3469 |
} |
} |
3470 |
else match_block.offset_vector = offsets; |
else match_block.offset_vector = offsets; |
3471 |
|
|
3518 |
|
|
3519 |
do |
do |
3520 |
{ |
{ |
3521 |
|
int rc; |
3522 |
register int *iptr = match_block.offset_vector; |
register int *iptr = match_block.offset_vector; |
3523 |
register int *iend = iptr + resetcount; |
register int *iend = iptr + resetcount; |
3524 |
|
|
3560 |
} |
} |
3561 |
} |
} |
3562 |
|
|
3563 |
#ifdef DEBUG |
#ifdef DEBUG /* Sigh. Some compilers never learn. */ |
3564 |
printf(">>>> Match against: "); |
printf(">>>> Match against: "); |
3565 |
pchars(start_match, end_subject - start_match, TRUE, &match_block); |
pchars(start_match, end_subject - start_match, TRUE, &match_block); |
3566 |
printf("\n"); |
printf("\n"); |
3567 |
#endif |
#endif |
3568 |
|
|
3569 |
/* When a match occurs, substrings will be set for all internal extractions; |
/* When a match occurs, substrings will be set for all internal extractions; |
3570 |
we just need to set up the whole thing as substring 0 before returning. If |
we just need to set up the whole thing as substring 0 before returning. If |
3574 |
if certain parts of the pattern were not used. |
if certain parts of the pattern were not used. |
3575 |
|
|
3576 |
Before starting the match, we have to set up a longjmp() target to enable |
Before starting the match, we have to set up a longjmp() target to enable |
3577 |
the "cut" operation to fail a match completely without backtracking. */ |
the "cut" operation to fail a match completely without backtracking. This |
3578 |
|
is done in a separate function to avoid compiler warnings. We need not do |
3579 |
|
it unless PCRE_EXTRA is set, since only in that case is the "cut" operation |
3580 |
|
enabled. */ |
3581 |
|
|
3582 |
if (setjmp(match_block.fail_env) == 0 && |
if ((re->options & PCRE_EXTRA) != 0) |
|
match(start_match, re->code, 2, &match_block)) |
|
3583 |
{ |
{ |
3584 |
int rc; |
if (!match_with_setjmp(start_match, re->code, 2, &match_block)) |
3585 |
|
continue; |
3586 |
if (ocount != offsetcount) |
} |
3587 |
{ |
else if (!match(start_match, re->code, 2, &match_block)) continue; |
|
if (offsetcount >= 4) |
|
|
{ |
|
|
memcpy(offsets + 2, match_block.offset_vector + 2, |
|
|
(offsetcount - 2) * sizeof(int)); |
|
|
#ifdef DEBUG |
|
|
printf("Copied offsets; freeing temporary memory\n"); |
|
|
#endif |
|
|
} |
|
|
if (match_block.end_offset_top > offsetcount) |
|
|
match_block.offset_overflow = TRUE; |
|
3588 |
|
|
3589 |
#ifdef DEBUG |
/* Copy the offset information from temporary store if necessary */ |
|
printf("Freeing temporary memory\n"); |
|
|
#endif |
|
3590 |
|
|
3591 |
(pcre_free)(match_block.offset_vector); |
if (using_temporary_offsets) |
3592 |
|
{ |
3593 |
|
if (offsetcount >= 4) |
3594 |
|
{ |
3595 |
|
memcpy(offsets + 2, match_block.offset_vector + 2, |
3596 |
|
(offsetcount - 2) * sizeof(int)); |
3597 |
|
DPRINTF(("Copied offsets from temporary memory\n")); |
3598 |
} |
} |
3599 |
|
if (match_block.end_offset_top > offsetcount) |
3600 |
|
match_block.offset_overflow = TRUE; |
3601 |
|
|
3602 |
rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2; |
DPRINTF(("Freeing temporary memory\n")); |
3603 |
|
(pcre_free)(match_block.offset_vector); |
3604 |
|
} |
3605 |
|
|
3606 |
if (match_block.offset_end < 2) rc = 0; else |
rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2; |
|
{ |
|
|
offsets[0] = start_match - match_block.start_subject; |
|
|
offsets[1] = match_block.end_match_ptr - match_block.start_subject; |
|
|
} |
|
3607 |
|
|
3608 |
#ifdef DEBUG |
if (match_block.offset_end < 2) rc = 0; else |
3609 |
printf(">>>> returning %d\n", rc); |
{ |
3610 |
#endif |
offsets[0] = start_match - match_block.start_subject; |
3611 |
return rc; |
offsets[1] = match_block.end_match_ptr - match_block.start_subject; |
3612 |
} |
} |
3613 |
|
|
3614 |
|
DPRINTF((">>>> returning %d\n", rc)); |
3615 |
|
return rc; |
3616 |
} |
} |
3617 |
while (!anchored && |
while (!anchored && |
3618 |
match_block.errorcode == PCRE_ERROR_NOMATCH && |
match_block.errorcode == PCRE_ERROR_NOMATCH && |
3619 |
start_match++ < end_subject); |
start_match++ < end_subject); |
3620 |
|
|
3621 |
#ifdef DEBUG |
if (using_temporary_offsets) |
3622 |
printf(">>>> returning %d\n", match_block.errorcode); |
{ |
3623 |
#endif |
DPRINTF(("Freeing temporary memory\n")); |
3624 |
|
(pcre_free)(match_block.offset_vector); |
3625 |
|
} |
3626 |
|
|
3627 |
|
DPRINTF((">>>> returning %d\n", match_block.errorcode)); |
3628 |
|
|
3629 |
return match_block.errorcode; |
return match_block.errorcode; |
3630 |
} |
} |