9 |
|
|
10 |
Written by: Philip Hazel <ph10@cam.ac.uk> |
Written by: Philip Hazel <ph10@cam.ac.uk> |
11 |
|
|
12 |
Copyright (c) 1997-2000 University of Cambridge |
Copyright (c) 1997-2001 University of Cambridge |
13 |
|
|
14 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
15 |
Permission is granted to anyone to use this software for any purpose on any |
Permission is granted to anyone to use this software for any purpose on any |
60 |
#endif |
#endif |
61 |
|
|
62 |
|
|
63 |
/* Number of items on the nested bracket stacks at compile time. This should |
/* Maximum number of items on the nested bracket stacks at compile time. This |
64 |
not be set greater than 200. */ |
applies to the nesting of all kinds of parentheses. It does not limit |
65 |
|
un-nested, non-capturing parentheses. This number can be made bigger if |
66 |
|
necessary - it is used to dimension one int and one unsigned char vector at |
67 |
|
compile time. */ |
68 |
|
|
69 |
#define BRASTACK_SIZE 200 |
#define BRASTACK_SIZE 200 |
70 |
|
|
98 |
"class", "Ref", "Recurse", |
"class", "Ref", "Recurse", |
99 |
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", |
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", |
100 |
"AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref", |
"AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref", |
101 |
"Brazero", "Braminzero", "Bra" |
"Brazero", "Braminzero", "Branumber", "Bra" |
102 |
}; |
}; |
103 |
#endif |
#endif |
104 |
|
|
114 |
0, 0, 0, 0, 0, 0, 0, 0, /* H - O */ |
0, 0, 0, 0, 0, 0, 0, 0, /* H - O */ |
115 |
0, 0, 0, -ESC_S, 0, 0, 0, -ESC_W, /* P - W */ |
0, 0, 0, -ESC_S, 0, 0, 0, -ESC_W, /* P - W */ |
116 |
0, 0, -ESC_Z, '[', '\\', ']', '^', '_', /* X - _ */ |
0, 0, -ESC_Z, '[', '\\', ']', '^', '_', /* X - _ */ |
117 |
'`', 7, -ESC_b, 0, -ESC_d, 27, '\f', 0, /* ` - g */ |
'`', 7, -ESC_b, 0, -ESC_d, ESC_E, ESC_F, 0, /* ` - g */ |
118 |
0, 0, 0, 0, 0, 0, '\n', 0, /* h - o */ |
0, 0, 0, 0, 0, 0, ESC_N, 0, /* h - o */ |
119 |
0, 0, '\r', -ESC_s, '\t', 0, 0, -ESC_w, /* p - w */ |
0, 0, ESC_R, -ESC_s, ESC_T, 0, 0, -ESC_w, /* p - w */ |
120 |
0, 0, -ESC_z /* x - z */ |
0, 0, -ESC_z /* x - z */ |
121 |
}; |
}; |
122 |
|
|
817 |
/* Skip over things that don't match chars */ |
/* Skip over things that don't match chars */ |
818 |
|
|
819 |
case OP_REVERSE: |
case OP_REVERSE: |
820 |
|
case OP_BRANUMBER: |
821 |
|
case OP_CREF: |
822 |
cc++; |
cc++; |
823 |
/* Fall through */ |
/* Fall through */ |
824 |
|
|
|
case OP_CREF: |
|
825 |
case OP_OPT: |
case OP_OPT: |
826 |
cc++; |
cc++; |
827 |
/* Fall through */ |
/* Fall through */ |
875 |
/* Check a class for variable quantification */ |
/* Check a class for variable quantification */ |
876 |
|
|
877 |
case OP_CLASS: |
case OP_CLASS: |
878 |
cc += (*cc == OP_REF)? 2 : 33; |
cc += 33; |
879 |
|
|
880 |
switch (*cc) |
switch (*cc) |
881 |
{ |
{ |
982 |
|
|
983 |
Arguments: |
Arguments: |
984 |
options the option bits |
options the option bits |
985 |
brackets points to number of brackets used |
brackets points to number of extracting brackets used |
986 |
code points to the pointer to the current code point |
code points to the pointer to the current code point |
987 |
ptrptr points to the current pattern pointer |
ptrptr points to the current pattern pointer |
988 |
errorptr points to pointer to error message |
errorptr points to pointer to error message |
1033 |
int class_charcount; |
int class_charcount; |
1034 |
int class_lastchar; |
int class_lastchar; |
1035 |
int newoptions; |
int newoptions; |
1036 |
int condref; |
int skipbytes; |
1037 |
int subreqchar; |
int subreqchar; |
1038 |
|
|
1039 |
c = *ptr; |
c = *ptr; |
1044 |
{ |
{ |
1045 |
/* The space before the ; is to avoid a warning on a silly compiler |
/* The space before the ; is to avoid a warning on a silly compiler |
1046 |
on the Macintosh. */ |
on the Macintosh. */ |
1047 |
while ((c = *(++ptr)) != 0 && c != '\n') ; |
while ((c = *(++ptr)) != 0 && c != NEWLINE) ; |
1048 |
continue; |
continue; |
1049 |
} |
} |
1050 |
} |
} |
1582 |
OP_BRAZERO in front of it, and because the group appears once in the |
OP_BRAZERO in front of it, and because the group appears once in the |
1583 |
data, whereas in other cases it appears the minimum number of times. For |
data, whereas in other cases it appears the minimum number of times. For |
1584 |
this reason, it is simplest to treat this case separately, as otherwise |
this reason, it is simplest to treat this case separately, as otherwise |
1585 |
the code gets far too mess. There are several special subcases when the |
the code gets far too messy. There are several special subcases when the |
1586 |
minimum is zero. */ |
minimum is zero. */ |
1587 |
|
|
1588 |
if (repeat_min == 0) |
if (repeat_min == 0) |
1733 |
|
|
1734 |
case '(': |
case '(': |
1735 |
newoptions = options; |
newoptions = options; |
1736 |
condref = -1; |
skipbytes = 0; |
1737 |
|
|
1738 |
if (*(++ptr) == '?') |
if (*(++ptr) == '?') |
1739 |
{ |
{ |
1756 |
bravalue = OP_COND; /* Conditional group */ |
bravalue = OP_COND; /* Conditional group */ |
1757 |
if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0) |
if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0) |
1758 |
{ |
{ |
1759 |
condref = *ptr - '0'; |
int condref = *ptr - '0'; |
1760 |
while (*(++ptr) != ')') condref = condref*10 + *ptr - '0'; |
while (*(++ptr) != ')') condref = condref*10 + *ptr - '0'; |
1761 |
if (condref == 0) |
if (condref == 0) |
1762 |
{ |
{ |
1764 |
goto FAILED; |
goto FAILED; |
1765 |
} |
} |
1766 |
ptr++; |
ptr++; |
1767 |
|
code[3] = OP_CREF; |
1768 |
|
code[4] = condref >> 8; |
1769 |
|
code[5] = condref & 255; |
1770 |
|
skipbytes = 3; |
1771 |
} |
} |
1772 |
else ptr--; |
else ptr--; |
1773 |
break; |
break; |
1870 |
} |
} |
1871 |
} |
} |
1872 |
|
|
1873 |
/* Else we have a referencing group; adjust the opcode. */ |
/* Else we have a referencing group; adjust the opcode. If the bracket |
1874 |
|
number is greater than EXTRACT_BASIC_MAX, we set the opcode one higher, and |
1875 |
|
arrange for the true number to follow later, in an OP_BRANUMBER item. */ |
1876 |
|
|
1877 |
else |
else |
1878 |
{ |
{ |
1879 |
if (++(*brackets) > EXTRACT_MAX) |
if (++(*brackets) > EXTRACT_BASIC_MAX) |
1880 |
{ |
{ |
1881 |
*errorptr = ERR13; |
bravalue = OP_BRA + EXTRACT_BASIC_MAX + 1; |
1882 |
goto FAILED; |
code[3] = OP_BRANUMBER; |
1883 |
|
code[4] = *brackets >> 8; |
1884 |
|
code[5] = *brackets & 255; |
1885 |
|
skipbytes = 3; |
1886 |
} |
} |
1887 |
bravalue = OP_BRA + *brackets; |
else bravalue = OP_BRA + *brackets; |
1888 |
} |
} |
1889 |
|
|
1890 |
/* Process nested bracketed re. Assertions may not be repeated, but other |
/* Process nested bracketed re. Assertions may not be repeated, but other |
1900 |
options | PCRE_INGROUP, /* Set for all nested groups */ |
options | PCRE_INGROUP, /* Set for all nested groups */ |
1901 |
((options & PCRE_IMS) != (newoptions & PCRE_IMS))? |
((options & PCRE_IMS) != (newoptions & PCRE_IMS))? |
1902 |
newoptions & PCRE_IMS : -1, /* Pass ims options if changed */ |
newoptions & PCRE_IMS : -1, /* Pass ims options if changed */ |
1903 |
brackets, /* Bracket level */ |
brackets, /* Extracting bracket count */ |
1904 |
&tempcode, /* Where to put code (updated) */ |
&tempcode, /* Where to put code (updated) */ |
1905 |
&ptr, /* Input pointer (updated) */ |
&ptr, /* Input pointer (updated) */ |
1906 |
errorptr, /* Where to put an error message */ |
errorptr, /* Where to put an error message */ |
1907 |
(bravalue == OP_ASSERTBACK || |
(bravalue == OP_ASSERTBACK || |
1908 |
bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ |
bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ |
1909 |
condref, /* Condition reference number */ |
skipbytes, /* Skip over OP_COND/OP_BRANUMBER */ |
1910 |
&subreqchar, /* For possible last char */ |
&subreqchar, /* For possible last char */ |
1911 |
&subcountlits, /* For literal count */ |
&subcountlits, /* For literal count */ |
1912 |
cd)) /* Tables block */ |
cd)) /* Tables block */ |
1920 |
/* If this is a conditional bracket, check that there are no more than |
/* If this is a conditional bracket, check that there are no more than |
1921 |
two branches in the group. */ |
two branches in the group. */ |
1922 |
|
|
1923 |
if (bravalue == OP_COND) |
else if (bravalue == OP_COND) |
1924 |
{ |
{ |
1925 |
uschar *tc = code; |
uschar *tc = code; |
1926 |
condcount = 0; |
condcount = 0; |
1987 |
{ |
{ |
1988 |
if (-c >= ESC_REF) |
if (-c >= ESC_REF) |
1989 |
{ |
{ |
1990 |
|
int number = -c - ESC_REF; |
1991 |
previous = code; |
previous = code; |
1992 |
*code++ = OP_REF; |
*code++ = OP_REF; |
1993 |
*code++ = -c - ESC_REF; |
*code++ = number >> 8; |
1994 |
|
*code++ = number & 255; |
1995 |
} |
} |
1996 |
else |
else |
1997 |
{ |
{ |
2026 |
{ |
{ |
2027 |
/* The space before the ; is to avoid a warning on a silly compiler |
/* The space before the ; is to avoid a warning on a silly compiler |
2028 |
on the Macintosh. */ |
on the Macintosh. */ |
2029 |
while ((c = *(++ptr)) != 0 && c != '\n') ; |
while ((c = *(++ptr)) != 0 && c != NEWLINE) ; |
2030 |
if (c == 0) break; |
if (c == 0) break; |
2031 |
continue; |
continue; |
2032 |
} |
} |
2115 |
ptrptr -> the address of the current pattern pointer |
ptrptr -> the address of the current pattern pointer |
2116 |
errorptr -> pointer to error message |
errorptr -> pointer to error message |
2117 |
lookbehind TRUE if this is a lookbehind assertion |
lookbehind TRUE if this is a lookbehind assertion |
2118 |
condref >= 0 for OPT_CREF setting at start of conditional group |
skipbytes skip this many bytes at start (for OP_COND, OP_BRANUMBER) |
2119 |
reqchar -> place to put the last required character, or a negative number |
reqchar -> place to put the last required character, or a negative number |
2120 |
countlits -> place to put the shortest literal count of any branch |
countlits -> place to put the shortest literal count of any branch |
2121 |
cd points to the data block with tables pointers |
cd points to the data block with tables pointers |
2125 |
|
|
2126 |
static BOOL |
static BOOL |
2127 |
compile_regex(int options, int optchanged, int *brackets, uschar **codeptr, |
compile_regex(int options, int optchanged, int *brackets, uschar **codeptr, |
2128 |
const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref, |
const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int skipbytes, |
2129 |
int *reqchar, int *countlits, compile_data *cd) |
int *reqchar, int *countlits, compile_data *cd) |
2130 |
{ |
{ |
2131 |
const uschar *ptr = *ptrptr; |
const uschar *ptr = *ptrptr; |
2138 |
|
|
2139 |
*reqchar = -1; |
*reqchar = -1; |
2140 |
*countlits = INT_MAX; |
*countlits = INT_MAX; |
2141 |
code += 3; |
code += 3 + skipbytes; |
|
|
|
|
/* At the start of a reference-based conditional group, insert the reference |
|
|
number as an OP_CREF item. */ |
|
|
|
|
|
if (condref >= 0) |
|
|
{ |
|
|
*code++ = OP_CREF; |
|
|
*code++ = condref; |
|
|
} |
|
2142 |
|
|
2143 |
/* Loop for each alternative branch */ |
/* Loop for each alternative branch */ |
2144 |
|
|
2290 |
break; |
break; |
2291 |
|
|
2292 |
case OP_CREF: |
case OP_CREF: |
2293 |
code += 2; |
case OP_BRANUMBER: |
2294 |
|
code += 3; |
2295 |
break; |
break; |
2296 |
|
|
2297 |
case OP_WORD_BOUNDARY: |
case OP_WORD_BOUNDARY: |
2554 |
{ |
{ |
2555 |
int min, max; |
int min, max; |
2556 |
int class_charcount; |
int class_charcount; |
2557 |
|
int bracket_length; |
2558 |
|
|
2559 |
if ((options & PCRE_EXTENDED) != 0) |
if ((options & PCRE_EXTENDED) != 0) |
2560 |
{ |
{ |
2563 |
{ |
{ |
2564 |
/* The space before the ; is to avoid a warning on a silly compiler |
/* The space before the ; is to avoid a warning on a silly compiler |
2565 |
on the Macintosh. */ |
on the Macintosh. */ |
2566 |
while ((c = *(++ptr)) != 0 && c != '\n') ; |
while ((c = *(++ptr)) != 0 && c != NEWLINE) ; |
2567 |
continue; |
continue; |
2568 |
} |
} |
2569 |
} |
} |
2589 |
} |
} |
2590 |
length++; |
length++; |
2591 |
|
|
2592 |
/* A back reference needs an additional char, plus either one or 5 |
/* A back reference needs an additional 2 bytes, plus either one or 5 |
2593 |
bytes for a repeat. We also need to keep the value of the highest |
bytes for a repeat. We also need to keep the value of the highest |
2594 |
back reference. */ |
back reference. */ |
2595 |
|
|
2597 |
{ |
{ |
2598 |
int refnum = -c - ESC_REF; |
int refnum = -c - ESC_REF; |
2599 |
if (refnum > top_backref) top_backref = refnum; |
if (refnum > top_backref) top_backref = refnum; |
2600 |
length++; /* For single back reference */ |
length += 2; /* For single back reference */ |
2601 |
if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block)) |
if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block)) |
2602 |
{ |
{ |
2603 |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block); |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block); |
2695 |
|
|
2696 |
case '(': |
case '(': |
2697 |
branch_newextra = 0; |
branch_newextra = 0; |
2698 |
|
bracket_length = 3; |
2699 |
|
|
2700 |
/* Handle special forms of bracket, which all start (? */ |
/* Handle special forms of bracket, which all start (? */ |
2701 |
|
|
2763 |
if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0) |
if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0) |
2764 |
{ |
{ |
2765 |
ptr += 4; |
ptr += 4; |
2766 |
length += 2; |
length += 3; |
2767 |
while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++; |
while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++; |
2768 |
if (*ptr != ')') |
if (*ptr != ')') |
2769 |
{ |
{ |
2890 |
} |
} |
2891 |
|
|
2892 |
/* Extracting brackets must be counted so we can process escapes in a |
/* Extracting brackets must be counted so we can process escapes in a |
2893 |
Perlish way. */ |
Perlish way. If the number exceeds EXTRACT_BASIC_MAX we are going to |
2894 |
|
need an additional 3 bytes of store per extracting bracket. */ |
2895 |
|
|
2896 |
else bracount++; |
else |
2897 |
|
{ |
2898 |
|
bracount++; |
2899 |
|
if (bracount > EXTRACT_BASIC_MAX) bracket_length += 3; |
2900 |
|
} |
2901 |
|
|
2902 |
/* Non-special forms of bracket. Save length for computing whole length |
/* Save length for computing whole length at end if there's a repeat that |
2903 |
at end if there's a repeat that requires duplication of the group. Also |
requires duplication of the group. Also save the current value of |
2904 |
save the current value of branch_extra, and start the new group with |
branch_extra, and start the new group with the new value. If non-zero, this |
2905 |
the new value. If non-zero, this will either be 2 for a (?imsx: group, or 3 |
will either be 2 for a (?imsx: group, or 3 for a lookbehind assertion. */ |
|
for a lookbehind assertion. */ |
|
2906 |
|
|
2907 |
if (brastackptr >= sizeof(brastack)/sizeof(int)) |
if (brastackptr >= sizeof(brastack)/sizeof(int)) |
2908 |
{ |
{ |
2914 |
branch_extra = branch_newextra; |
branch_extra = branch_newextra; |
2915 |
|
|
2916 |
brastack[brastackptr++] = length; |
brastack[brastackptr++] = length; |
2917 |
length += 3; |
length += bracket_length; |
2918 |
continue; |
continue; |
2919 |
|
|
2920 |
/* Handle ket. Look for subsequent max/min; for certain sets of values we |
/* Handle ket. Look for subsequent max/min; for certain sets of values we |
2994 |
{ |
{ |
2995 |
/* The space before the ; is to avoid a warning on a silly compiler |
/* The space before the ; is to avoid a warning on a silly compiler |
2996 |
on the Macintosh. */ |
on the Macintosh. */ |
2997 |
while ((c = *(++ptr)) != 0 && c != '\n') ; |
while ((c = *(++ptr)) != 0 && c != NEWLINE) ; |
2998 |
continue; |
continue; |
2999 |
} |
} |
3000 |
} |
} |
3075 |
code = re->code; |
code = re->code; |
3076 |
*code = OP_BRA; |
*code = OP_BRA; |
3077 |
bracount = 0; |
bracount = 0; |
3078 |
(void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1, |
(void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, 0, |
3079 |
&reqchar, &countlits, &compile_block); |
&reqchar, &countlits, &compile_block); |
3080 |
re->top_bracket = bracount; |
re->top_bracket = bracount; |
3081 |
re->top_backref = top_backref; |
re->top_backref = top_backref; |
3189 |
|
|
3190 |
if (*code >= OP_BRA) |
if (*code >= OP_BRA) |
3191 |
{ |
{ |
3192 |
printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA); |
if (*code - OP_BRA > EXTRACT_BASIC_MAX) |
3193 |
|
printf("%3d Bra extra", (code[1] << 8) + code[2]); |
3194 |
|
else |
3195 |
|
printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA); |
3196 |
code += 2; |
code += 2; |
3197 |
} |
} |
3198 |
|
|
3203 |
code++; |
code++; |
3204 |
break; |
break; |
3205 |
|
|
|
case OP_COND: |
|
|
printf("%3d Cond", (code[1] << 8) + code[2]); |
|
|
code += 2; |
|
|
break; |
|
|
|
|
|
case OP_CREF: |
|
|
printf(" %.2d %s", code[1], OP_names[*code]); |
|
|
code++; |
|
|
break; |
|
|
|
|
3206 |
case OP_CHARS: |
case OP_CHARS: |
3207 |
charlength = *(++code); |
charlength = *(++code); |
3208 |
printf("%3d ", charlength); |
printf("%3d ", charlength); |
3219 |
case OP_ASSERTBACK: |
case OP_ASSERTBACK: |
3220 |
case OP_ASSERTBACK_NOT: |
case OP_ASSERTBACK_NOT: |
3221 |
case OP_ONCE: |
case OP_ONCE: |
|
printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]); |
|
|
code += 2; |
|
|
break; |
|
|
|
|
3222 |
case OP_REVERSE: |
case OP_REVERSE: |
3223 |
|
case OP_BRANUMBER: |
3224 |
|
case OP_COND: |
3225 |
|
case OP_CREF: |
3226 |
printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]); |
printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]); |
3227 |
code += 2; |
code += 2; |
3228 |
break; |
break; |
3295 |
break; |
break; |
3296 |
|
|
3297 |
case OP_REF: |
case OP_REF: |
3298 |
printf(" \\%d", *(++code)); |
printf(" \\%d", (code[1] << 8) | code[2]); |
3299 |
code ++; |
code += 3; |
3300 |
goto CLASS_REF_REPEAT; |
goto CLASS_REF_REPEAT; |
3301 |
|
|
3302 |
case OP_CLASS: |
case OP_CLASS: |
3509 |
|
|
3510 |
if (op > OP_BRA) |
if (op > OP_BRA) |
3511 |
{ |
{ |
3512 |
|
int offset; |
3513 |
int number = op - OP_BRA; |
int number = op - OP_BRA; |
3514 |
int offset = number << 1; |
|
3515 |
|
/* For extended extraction brackets (large number), we have to fish out the |
3516 |
|
number from a dummy opcode at the start. */ |
3517 |
|
|
3518 |
|
if (number > EXTRACT_BASIC_MAX) number = (ecode[4] << 8) | ecode[5]; |
3519 |
|
offset = number << 1; |
3520 |
|
|
3521 |
#ifdef DEBUG |
#ifdef DEBUG |
3522 |
printf("start bracket %d subject=", number); |
printf("start bracket %d subject=", number); |
3546 |
md->offset_vector[offset] = save_offset1; |
md->offset_vector[offset] = save_offset1; |
3547 |
md->offset_vector[offset+1] = save_offset2; |
md->offset_vector[offset+1] = save_offset2; |
3548 |
md->offset_vector[md->offset_end - number] = save_offset3; |
md->offset_vector[md->offset_end - number] = save_offset3; |
3549 |
|
|
3550 |
return FALSE; |
return FALSE; |
3551 |
} |
} |
3552 |
|
|
3579 |
case OP_COND: |
case OP_COND: |
3580 |
if (ecode[3] == OP_CREF) /* Condition is extraction test */ |
if (ecode[3] == OP_CREF) /* Condition is extraction test */ |
3581 |
{ |
{ |
3582 |
int offset = ecode[4] << 1; /* Doubled reference number */ |
int offset = (ecode[4] << 9) | (ecode[5] << 1); /* Doubled ref number */ |
3583 |
return match(eptr, |
return match(eptr, |
3584 |
ecode + ((offset < offset_top && md->offset_vector[offset] >= 0)? |
ecode + ((offset < offset_top && md->offset_vector[offset] >= 0)? |
3585 |
5 : 3 + (ecode[1] << 8) + ecode[2]), |
6 : 3 + (ecode[1] << 8) + ecode[2]), |
3586 |
offset_top, md, ims, eptrb, match_isgroup); |
offset_top, md, ims, eptrb, match_isgroup); |
3587 |
} |
} |
3588 |
|
|
3602 |
} |
} |
3603 |
/* Control never reaches here */ |
/* Control never reaches here */ |
3604 |
|
|
3605 |
/* Skip over conditional reference data if encountered (should not be) */ |
/* Skip over conditional reference or large extraction number data if |
3606 |
|
encountered. */ |
3607 |
|
|
3608 |
case OP_CREF: |
case OP_CREF: |
3609 |
ecode += 2; |
case OP_BRANUMBER: |
3610 |
|
ecode += 3; |
3611 |
break; |
break; |
3612 |
|
|
3613 |
/* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched |
/* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched |
3873 |
|
|
3874 |
if (*prev != OP_COND) |
if (*prev != OP_COND) |
3875 |
{ |
{ |
3876 |
|
int offset; |
3877 |
int number = *prev - OP_BRA; |
int number = *prev - OP_BRA; |
3878 |
int offset = number << 1; |
|
3879 |
|
/* For extended extraction brackets (large number), we have to fish out |
3880 |
|
the number from a dummy opcode at the start. */ |
3881 |
|
|
3882 |
|
if (number > EXTRACT_BASIC_MAX) number = (prev[4] << 8) | prev[5]; |
3883 |
|
offset = number << 1; |
3884 |
|
|
3885 |
#ifdef DEBUG |
#ifdef DEBUG |
3886 |
printf("end bracket %d", number); |
printf("end bracket %d", number); |
3940 |
if (md->notbol && eptr == md->start_subject) return FALSE; |
if (md->notbol && eptr == md->start_subject) return FALSE; |
3941 |
if ((ims & PCRE_MULTILINE) != 0) |
if ((ims & PCRE_MULTILINE) != 0) |
3942 |
{ |
{ |
3943 |
if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE; |
if (eptr != md->start_subject && eptr[-1] != NEWLINE) return FALSE; |
3944 |
ecode++; |
ecode++; |
3945 |
break; |
break; |
3946 |
} |
} |
3959 |
case OP_DOLL: |
case OP_DOLL: |
3960 |
if ((ims & PCRE_MULTILINE) != 0) |
if ((ims & PCRE_MULTILINE) != 0) |
3961 |
{ |
{ |
3962 |
if (eptr < md->end_subject) { if (*eptr != '\n') return FALSE; } |
if (eptr < md->end_subject) { if (*eptr != NEWLINE) return FALSE; } |
3963 |
else { if (md->noteol) return FALSE; } |
else { if (md->noteol) return FALSE; } |
3964 |
ecode++; |
ecode++; |
3965 |
break; |
break; |
3970 |
if (!md->endonly) |
if (!md->endonly) |
3971 |
{ |
{ |
3972 |
if (eptr < md->end_subject - 1 || |
if (eptr < md->end_subject - 1 || |
3973 |
(eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE; |
(eptr == md->end_subject - 1 && *eptr != NEWLINE)) return FALSE; |
3974 |
|
|
3975 |
ecode++; |
ecode++; |
3976 |
break; |
break; |
3989 |
|
|
3990 |
case OP_EODN: |
case OP_EODN: |
3991 |
if (eptr < md->end_subject - 1 || |
if (eptr < md->end_subject - 1 || |
3992 |
(eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE; |
(eptr == md->end_subject - 1 && *eptr != NEWLINE)) return FALSE; |
3993 |
ecode++; |
ecode++; |
3994 |
break; |
break; |
3995 |
|
|
4011 |
/* Match a single character type; inline for speed */ |
/* Match a single character type; inline for speed */ |
4012 |
|
|
4013 |
case OP_ANY: |
case OP_ANY: |
4014 |
if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == '\n') |
if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE) |
4015 |
return FALSE; |
return FALSE; |
4016 |
if (eptr++ >= md->end_subject) return FALSE; |
if (eptr++ >= md->end_subject) return FALSE; |
4017 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
4074 |
case OP_REF: |
case OP_REF: |
4075 |
{ |
{ |
4076 |
int length; |
int length; |
4077 |
int offset = ecode[1] << 1; /* Doubled reference number */ |
int offset = (ecode[1] << 9) | (ecode[2] << 1); /* Doubled ref number */ |
4078 |
ecode += 2; /* Advance past the item */ |
ecode += 3; /* Advance past item */ |
4079 |
|
|
4080 |
/* If the reference is unset, set the length to be longer than the amount |
/* If the reference is unset, set the length to be longer than the amount |
4081 |
of subject left; this ensures that every attempt at a match fails. We |
of subject left; this ensures that every attempt at a match fails. We |
4619 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
4620 |
{ |
{ |
4621 |
if (eptr >= md->end_subject || |
if (eptr >= md->end_subject || |
4622 |
(*eptr++ == '\n' && (ims & PCRE_DOTALL) == 0)) |
(*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0)) |
4623 |
return FALSE; |
return FALSE; |
4624 |
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
4625 |
} |
} |
4628 |
#endif |
#endif |
4629 |
/* Non-UTF8 can be faster */ |
/* Non-UTF8 can be faster */ |
4630 |
if ((ims & PCRE_DOTALL) == 0) |
if ((ims & PCRE_DOTALL) == 0) |
4631 |
{ for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; } |
{ for (i = 1; i <= min; i++) if (*eptr++ == NEWLINE) return FALSE; } |
4632 |
else eptr += min; |
else eptr += min; |
4633 |
break; |
break; |
4634 |
|
|
4683 |
switch(ctype) |
switch(ctype) |
4684 |
{ |
{ |
4685 |
case OP_ANY: |
case OP_ANY: |
4686 |
if ((ims & PCRE_DOTALL) == 0 && c == '\n') return FALSE; |
if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) return FALSE; |
4687 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
4688 |
if (md->utf8) |
if (md->utf8) |
4689 |
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
4738 |
{ |
{ |
4739 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
4740 |
{ |
{ |
4741 |
if (eptr >= md->end_subject || *eptr++ == '\n') break; |
if (eptr >= md->end_subject || *eptr++ == NEWLINE) break; |
4742 |
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
4743 |
} |
} |
4744 |
} |
} |
4758 |
{ |
{ |
4759 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
4760 |
{ |
{ |
4761 |
if (eptr >= md->end_subject || *eptr == '\n') break; |
if (eptr >= md->end_subject || *eptr == NEWLINE) break; |
4762 |
eptr++; |
eptr++; |
4763 |
} |
} |
4764 |
} |
} |
4899 |
const real_pcre *re = (const real_pcre *)external_re; |
const real_pcre *re = (const real_pcre *)external_re; |
4900 |
const real_pcre_extra *extra = (const real_pcre_extra *)external_extra; |
const real_pcre_extra *extra = (const real_pcre_extra *)external_extra; |
4901 |
BOOL using_temporary_offsets = FALSE; |
BOOL using_temporary_offsets = FALSE; |
4902 |
BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0; |
BOOL anchored; |
4903 |
BOOL startline = (re->options & PCRE_STARTLINE) != 0; |
BOOL startline; |
4904 |
|
|
4905 |
if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION; |
if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION; |
4906 |
|
|
4908 |
(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; |
(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; |
4909 |
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
4910 |
|
|
4911 |
|
anchored = ((re->options | options) & PCRE_ANCHORED) != 0; |
4912 |
|
startline = (re->options & PCRE_STARTLINE) != 0; |
4913 |
|
|
4914 |
match_block.start_pattern = re->code; |
match_block.start_pattern = re->code; |
4915 |
match_block.start_subject = (const uschar *)subject; |
match_block.start_subject = (const uschar *)subject; |
4916 |
match_block.end_subject = match_block.start_subject + length; |
match_block.end_subject = match_block.start_subject + length; |
5039 |
{ |
{ |
5040 |
if (start_match > match_block.start_subject + start_offset) |
if (start_match > match_block.start_subject + start_offset) |
5041 |
{ |
{ |
5042 |
while (start_match < end_subject && start_match[-1] != '\n') |
while (start_match < end_subject && start_match[-1] != NEWLINE) |
5043 |
start_match++; |
start_match++; |
5044 |
} |
} |
5045 |
} |
} |