100 |
#ifndef EBCDIC |
#ifndef EBCDIC |
101 |
|
|
102 |
/* This is the "normal" table for ASCII systems or for EBCDIC systems running |
/* This is the "normal" table for ASCII systems or for EBCDIC systems running |
103 |
in UTF-8 mode. */ |
in UTF-8 mode. */ |
104 |
|
|
105 |
static const short int escapes[] = { |
static const short int escapes[] = { |
106 |
0, 0, |
0, 0, |
107 |
|
0, 0, |
108 |
|
0, 0, |
109 |
0, 0, |
0, 0, |
|
0, 0, |
|
110 |
0, 0, |
0, 0, |
|
0, 0, |
|
111 |
CHAR_COLON, CHAR_SEMICOLON, |
CHAR_COLON, CHAR_SEMICOLON, |
112 |
CHAR_LESS_THAN_SIGN, CHAR_EQUALS_SIGN, |
CHAR_LESS_THAN_SIGN, CHAR_EQUALS_SIGN, |
113 |
CHAR_GREATER_THAN_SIGN, CHAR_QUESTION_MARK, |
CHAR_GREATER_THAN_SIGN, CHAR_QUESTION_MARK, |
114 |
CHAR_COMMERCIAL_AT, -ESC_A, |
CHAR_COMMERCIAL_AT, -ESC_A, |
115 |
-ESC_B, -ESC_C, |
-ESC_B, -ESC_C, |
116 |
-ESC_D, -ESC_E, |
-ESC_D, -ESC_E, |
117 |
0, -ESC_G, |
0, -ESC_G, |
118 |
-ESC_H, 0, |
-ESC_H, 0, |
119 |
0, -ESC_K, |
0, -ESC_K, |
120 |
0, 0, |
0, 0, |
121 |
0, 0, |
0, 0, |
122 |
-ESC_P, -ESC_Q, |
-ESC_P, -ESC_Q, |
123 |
-ESC_R, -ESC_S, |
-ESC_R, -ESC_S, |
124 |
0, 0, |
0, 0, |
125 |
-ESC_V, -ESC_W, |
-ESC_V, -ESC_W, |
126 |
-ESC_X, 0, |
-ESC_X, 0, |
127 |
-ESC_Z, CHAR_LEFT_SQUARE_BRACKET, |
-ESC_Z, CHAR_LEFT_SQUARE_BRACKET, |
128 |
CHAR_BACKSLASH, CHAR_RIGHT_SQUARE_BRACKET, |
CHAR_BACKSLASH, CHAR_RIGHT_SQUARE_BRACKET, |
129 |
CHAR_CIRCUMFLEX_ACCENT, CHAR_UNDERSCORE, |
CHAR_CIRCUMFLEX_ACCENT, CHAR_UNDERSCORE, |
130 |
CHAR_GRAVE_ACCENT, 7, |
CHAR_GRAVE_ACCENT, 7, |
131 |
-ESC_b, 0, |
-ESC_b, 0, |
132 |
-ESC_d, ESC_e, |
-ESC_d, ESC_e, |
133 |
ESC_f, 0, |
ESC_f, 0, |
134 |
-ESC_h, 0, |
-ESC_h, 0, |
135 |
0, -ESC_k, |
0, -ESC_k, |
136 |
0, 0, |
0, 0, |
137 |
ESC_n, 0, |
ESC_n, 0, |
138 |
-ESC_p, 0, |
-ESC_p, 0, |
139 |
ESC_r, -ESC_s, |
ESC_r, -ESC_s, |
140 |
ESC_tee, 0, |
ESC_tee, 0, |
141 |
-ESC_v, -ESC_w, |
-ESC_v, -ESC_w, |
142 |
0, 0, |
0, 0, |
143 |
-ESC_z |
-ESC_z |
144 |
}; |
}; |
145 |
|
|
146 |
#else |
#else |
147 |
|
|
148 |
/* This is the "abnormal" table for EBCDIC systems without UTF-8 support. */ |
/* This is the "abnormal" table for EBCDIC systems without UTF-8 support. */ |
149 |
|
|
177 |
|
|
178 |
/* Table of special "verbs" like (*PRUNE). This is a short table, so it is |
/* Table of special "verbs" like (*PRUNE). This is a short table, so it is |
179 |
searched linearly. Put all the names into a single string, in order to reduce |
searched linearly. Put all the names into a single string, in order to reduce |
180 |
the number of relocations when a shared library is dynamically linked. The |
the number of relocations when a shared library is dynamically linked. The |
181 |
string is built from string macros so that it works in UTF-8 mode on EBCDIC |
string is built from string macros so that it works in UTF-8 mode on EBCDIC |
182 |
platforms. */ |
platforms. */ |
183 |
|
|
184 |
typedef struct verbitem { |
typedef struct verbitem { |
215 |
for handling case independence. */ |
for handling case independence. */ |
216 |
|
|
217 |
static const char posix_names[] = |
static const char posix_names[] = |
218 |
STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0 |
STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0 |
219 |
STRING_ascii0 STRING_blank0 STRING_cntrl0 STRING_digit0 |
STRING_ascii0 STRING_blank0 STRING_cntrl0 STRING_digit0 |
220 |
STRING_graph0 STRING_print0 STRING_punct0 STRING_space0 |
STRING_graph0 STRING_print0 STRING_punct0 STRING_space0 |
221 |
STRING_word0 STRING_xdigit; |
STRING_word0 STRING_xdigit; |
222 |
|
|
360 |
|
|
361 |
Then we can use ctype_digit and ctype_xdigit in the code. */ |
Then we can use ctype_digit and ctype_xdigit in the code. */ |
362 |
|
|
363 |
#ifndef EBCDIC |
#ifndef EBCDIC |
364 |
|
|
365 |
/* This is the "normal" case, for ASCII systems, and EBCDIC systems running in |
/* This is the "normal" case, for ASCII systems, and EBCDIC systems running in |
366 |
UTF-8 mode. */ |
UTF-8 mode. */ |
367 |
|
|
368 |
static const unsigned char digitab[] = |
static const unsigned char digitab[] = |
400 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */ |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */ |
401 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */ |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */ |
402 |
|
|
403 |
#else |
#else |
404 |
|
|
405 |
/* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */ |
/* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */ |
406 |
|
|
1057 |
/* Skip over character classes; this logic must be similar to the way they |
/* Skip over character classes; this logic must be similar to the way they |
1058 |
are handled for real. If the first character is '^', skip it. Also, if the |
are handled for real. If the first character is '^', skip it. Also, if the |
1059 |
first few characters (either before or after ^) are \Q\E or \E we skip them |
first few characters (either before or after ^) are \Q\E or \E we skip them |
1060 |
too. This makes for compatibility with Perl. Note the use of STR macros to |
too. This makes for compatibility with Perl. Note the use of STR macros to |
1061 |
encode "Q\\E" so that it works in UTF-8 on EBCDIC platforms. */ |
encode "Q\\E" so that it works in UTF-8 on EBCDIC platforms. */ |
1062 |
|
|
1063 |
if (*ptr == CHAR_LEFT_SQUARE_BRACKET) |
if (*ptr == CHAR_LEFT_SQUARE_BRACKET) |
1068 |
int c = *(++ptr); |
int c = *(++ptr); |
1069 |
if (c == CHAR_BACKSLASH) |
if (c == CHAR_BACKSLASH) |
1070 |
{ |
{ |
1071 |
if (ptr[1] == CHAR_E) |
if (ptr[1] == CHAR_E) |
1072 |
ptr++; |
ptr++; |
1073 |
else if (strncmp((const char *)ptr+1, |
else if (strncmp((const char *)ptr+1, |
1074 |
STR_Q STR_BACKSLASH STR_E, 3) == 0) |
STR_Q STR_BACKSLASH STR_E, 3) == 0) |
1075 |
ptr += 3; |
ptr += 3; |
1076 |
else |
else |
1077 |
break; |
break; |
1078 |
} |
} |
1079 |
else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT) |
else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT) |
1084 |
/* If the next character is ']', it is a data character that must be |
/* If the next character is ']', it is a data character that must be |
1085 |
skipped, except in JavaScript compatibility mode. */ |
skipped, except in JavaScript compatibility mode. */ |
1086 |
|
|
1087 |
if (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET && |
if (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET && |
1088 |
(cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0) |
(cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0) |
1089 |
ptr++; |
ptr++; |
1090 |
|
|
1130 |
|
|
1131 |
/* We have to disambiguate (?<! and (?<= from (?<name> */ |
/* We have to disambiguate (?<! and (?<= from (?<name> */ |
1132 |
|
|
1133 |
if ((*ptr != CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_EXCLAMATION_MARK || |
if ((*ptr != CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_EXCLAMATION_MARK || |
1134 |
ptr[1] == CHAR_EQUALS_SIGN) && *ptr != CHAR_APOSTROPHE) |
ptr[1] == CHAR_EQUALS_SIGN) && *ptr != CHAR_APOSTROPHE) |
1135 |
continue; |
continue; |
1136 |
|
|
2173 |
|
|
2174 |
/* If the next thing is itself optional, we have to give up. */ |
/* If the next thing is itself optional, we have to give up. */ |
2175 |
|
|
2176 |
if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK || |
if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK || |
2177 |
strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0) |
strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0) |
2178 |
return FALSE; |
return FALSE; |
2179 |
|
|
2639 |
/* Fill in length of a previous callout, except when the next thing is |
/* Fill in length of a previous callout, except when the next thing is |
2640 |
a quantifier. */ |
a quantifier. */ |
2641 |
|
|
2642 |
is_quantifier = |
is_quantifier = |
2643 |
c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK || |
c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK || |
2644 |
(c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1)); |
(c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1)); |
2645 |
|
|
2759 |
/* PCRE supports POSIX class stuff inside a class. Perl gives an error if |
/* PCRE supports POSIX class stuff inside a class. Perl gives an error if |
2760 |
they are encountered at the top level, so we'll do that too. */ |
they are encountered at the top level, so we'll do that too. */ |
2761 |
|
|
2762 |
if ((ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT || |
if ((ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT || |
2763 |
ptr[1] == CHAR_EQUALS_SIGN) && |
ptr[1] == CHAR_EQUALS_SIGN) && |
2764 |
check_posix_syntax(ptr, &tempptr)) |
check_posix_syntax(ptr, &tempptr)) |
2765 |
{ |
{ |
2777 |
c = *(++ptr); |
c = *(++ptr); |
2778 |
if (c == CHAR_BACKSLASH) |
if (c == CHAR_BACKSLASH) |
2779 |
{ |
{ |
2780 |
if (ptr[1] == CHAR_E) |
if (ptr[1] == CHAR_E) |
2781 |
ptr++; |
ptr++; |
2782 |
else if (strncmp((const char *)ptr+1, |
else if (strncmp((const char *)ptr+1, |
2783 |
STR_Q STR_BACKSLASH STR_E, 3) == 0) |
STR_Q STR_BACKSLASH STR_E, 3) == 0) |
2784 |
ptr += 3; |
ptr += 3; |
2785 |
else |
else |
2786 |
break; |
break; |
2787 |
} |
} |
2788 |
else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT) |
else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT) |
2795 |
that. In JS mode, [] must always fail, so generate OP_FAIL, whereas |
that. In JS mode, [] must always fail, so generate OP_FAIL, whereas |
2796 |
[^] must match any character, so generate OP_ALLANY. */ |
[^] must match any character, so generate OP_ALLANY. */ |
2797 |
|
|
2798 |
if (c == CHAR_RIGHT_SQUARE_BRACKET && |
if (c == CHAR_RIGHT_SQUARE_BRACKET && |
2799 |
(cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0) |
(cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0) |
2800 |
{ |
{ |
2801 |
*code++ = negate_class? OP_ALLANY : OP_FAIL; |
*code++ = negate_class? OP_ALLANY : OP_FAIL; |
2877 |
5.6 and 5.8 do. */ |
5.6 and 5.8 do. */ |
2878 |
|
|
2879 |
if (c == CHAR_LEFT_SQUARE_BRACKET && |
if (c == CHAR_LEFT_SQUARE_BRACKET && |
2880 |
(ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT || |
(ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT || |
2881 |
ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &tempptr)) |
ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &tempptr)) |
2882 |
{ |
{ |
2883 |
BOOL local_negate = FALSE; |
BOOL local_negate = FALSE; |
3227 |
while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_Q) |
while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_Q) |
3228 |
{ |
{ |
3229 |
ptr += 2; |
ptr += 2; |
3230 |
if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) |
if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) |
3231 |
{ ptr += 2; continue; } |
{ ptr += 2; continue; } |
3232 |
inescq = TRUE; |
inescq = TRUE; |
3233 |
break; |
break; |
4427 |
} |
} |
4428 |
namelen = ptr - name; |
namelen = ptr - name; |
4429 |
|
|
4430 |
if ((terminator > 0 && *ptr++ != terminator) || |
if ((terminator > 0 && *ptr++ != terminator) || |
4431 |
*ptr++ != CHAR_RIGHT_PARENTHESIS) |
*ptr++ != CHAR_RIGHT_PARENTHESIS) |
4432 |
{ |
{ |
4433 |
ptr--; /* Error offset */ |
ptr--; /* Error offset */ |
4626 |
|
|
4627 |
/* ------------------------------------------------------------ */ |
/* ------------------------------------------------------------ */ |
4628 |
case CHAR_P: /* Python-style named subpattern handling */ |
case CHAR_P: /* Python-style named subpattern handling */ |
4629 |
if (*(++ptr) == CHAR_EQUALS_SIGN || |
if (*(++ptr) == CHAR_EQUALS_SIGN || |
4630 |
*ptr == CHAR_GREATER_THAN_SIGN) /* Reference or recursion */ |
*ptr == CHAR_GREATER_THAN_SIGN) /* Reference or recursion */ |
4631 |
{ |
{ |
4632 |
is_recurse = *ptr == CHAR_GREATER_THAN_SIGN; |
is_recurse = *ptr == CHAR_GREATER_THAN_SIGN; |
4645 |
DEFINE_NAME: /* Come here from (?< handling */ |
DEFINE_NAME: /* Come here from (?< handling */ |
4646 |
case CHAR_APOSTROPHE: |
case CHAR_APOSTROPHE: |
4647 |
{ |
{ |
4648 |
terminator = (*ptr == CHAR_LESS_THAN_SIGN)? |
terminator = (*ptr == CHAR_LESS_THAN_SIGN)? |
4649 |
CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE; |
CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE; |
4650 |
name = ++ptr; |
name = ++ptr; |
4651 |
|
|
5240 |
{ |
{ |
5241 |
if (-c == ESC_Q) /* Handle start of quoted string */ |
if (-c == ESC_Q) /* Handle start of quoted string */ |
5242 |
{ |
{ |
5243 |
if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E) |
if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E) |
5244 |
ptr += 2; /* avoid empty string */ |
ptr += 2; /* avoid empty string */ |
5245 |
else inescq = TRUE; |
else inescq = TRUE; |
5246 |
continue; |
continue; |
5270 |
{ |
{ |
5271 |
const uschar *p; |
const uschar *p; |
5272 |
save_hwm = cd->hwm; /* Normally this is set when '(' is read */ |
save_hwm = cd->hwm; /* Normally this is set when '(' is read */ |
5273 |
terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)? |
terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)? |
5274 |
CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE; |
CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE; |
5275 |
|
|
5276 |
/* These two statements stop the compiler for warning about possibly |
/* These two statements stop the compiler for warning about possibly |
5321 |
/* \k<name> or \k'name' is a back reference by name (Perl syntax). |
/* \k<name> or \k'name' is a back reference by name (Perl syntax). |
5322 |
We also support \k{name} (.NET syntax) */ |
We also support \k{name} (.NET syntax) */ |
5323 |
|
|
5324 |
if (-c == ESC_k && (ptr[1] == CHAR_LESS_THAN_SIGN || |
if (-c == ESC_k && (ptr[1] == CHAR_LESS_THAN_SIGN || |
5325 |
ptr[1] == CHAR_APOSTROPHE || ptr[1] == CHAR_LEFT_CURLY_BRACKET)) |
ptr[1] == CHAR_APOSTROPHE || ptr[1] == CHAR_LEFT_CURLY_BRACKET)) |
5326 |
{ |
{ |
5327 |
is_recurse = FALSE; |
is_recurse = FALSE; |
5328 |
terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)? |
terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)? |
5329 |
CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)? |
CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)? |
5330 |
CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET; |
CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET; |
5331 |
goto NAMED_REF_OR_RECURSE; |
goto NAMED_REF_OR_RECURSE; |
5332 |
} |
} |
5879 |
const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code], |
const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code], |
5880 |
NULL, 0, FALSE); |
NULL, 0, FALSE); |
5881 |
register int op = *scode; |
register int op = *scode; |
5882 |
|
|
5883 |
/* If we are at the start of a conditional assertion group, *both* the |
/* If we are at the start of a conditional assertion group, *both* the |
5884 |
conditional assertion *and* what follows the condition must satisfy the test |
conditional assertion *and* what follows the condition must satisfy the test |
5885 |
for start of line. Other kinds of condition fail. Note that there may be an |
for start of line. Other kinds of condition fail. Note that there may be an |
5887 |
|
|
5888 |
if (op == OP_COND) |
if (op == OP_COND) |
5889 |
{ |
{ |
5890 |
scode += 1 + LINK_SIZE; |
scode += 1 + LINK_SIZE; |
5891 |
if (*scode == OP_CALLOUT) scode += _pcre_OP_lengths[OP_CALLOUT]; |
if (*scode == OP_CALLOUT) scode += _pcre_OP_lengths[OP_CALLOUT]; |
5892 |
switch (*scode) |
switch (*scode) |
5893 |
{ |
{ |
5894 |
case OP_CREF: |
case OP_CREF: |
5895 |
case OP_RREF: |
case OP_RREF: |
5896 |
case OP_DEF: |
case OP_DEF: |
5897 |
return FALSE; |
return FALSE; |
5898 |
|
|
5899 |
default: /* Assertion */ |
default: /* Assertion */ |
5900 |
if (!is_startline(scode, bracket_map, backref_map)) return FALSE; |
if (!is_startline(scode, bracket_map, backref_map)) return FALSE; |
5901 |
do scode += GET(scode, 1); while (*scode == OP_ALT); |
do scode += GET(scode, 1); while (*scode == OP_ALT); |
5902 |
scode += 1 + LINK_SIZE; |
scode += 1 + LINK_SIZE; |
5903 |
break; |
break; |
5904 |
} |
} |
5905 |
scode = first_significant_code(scode, NULL, 0, FALSE); |
scode = first_significant_code(scode, NULL, 0, FALSE); |
5906 |
op = *scode; |
op = *scode; |
5907 |
} |
} |
5908 |
|
|
5909 |
/* Non-capturing brackets */ |
/* Non-capturing brackets */ |
5910 |
|
|
5925 |
/* Other brackets */ |
/* Other brackets */ |
5926 |
|
|
5927 |
else if (op == OP_ASSERT || op == OP_ONCE) |
else if (op == OP_ASSERT || op == OP_ONCE) |
5928 |
{ |
{ |
5929 |
if (!is_startline(scode, bracket_map, backref_map)) return FALSE; |
if (!is_startline(scode, bracket_map, backref_map)) return FALSE; |
5930 |
} |
} |
5931 |
|
|
5932 |
/* .* means "start at start or after \n" if it isn't in brackets that |
/* .* means "start at start or after \n" if it isn't in brackets that |
6141 |
/* Check for global one-time settings at the start of the pattern, and remember |
/* Check for global one-time settings at the start of the pattern, and remember |
6142 |
the offset for later. */ |
the offset for later. */ |
6143 |
|
|
6144 |
while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS && |
while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS && |
6145 |
ptr[skipatstart+1] == CHAR_ASTERISK) |
ptr[skipatstart+1] == CHAR_ASTERISK) |
6146 |
{ |
{ |
6147 |
int newnl = 0; |
int newnl = 0; |