140 |
#endif |
#endif |
141 |
|
|
142 |
|
|
143 |
/* Table of special "verbs" like (*PRUNE) */ |
/* Table of special "verbs" like (*PRUNE). This is a short table, so it is |
144 |
|
searched linearly. Put all the names into a single string, in order to reduce |
145 |
|
the number of relocations when a shared library is dynamically linked. */ |
146 |
|
|
147 |
typedef struct verbitem { |
typedef struct verbitem { |
|
const char *name; |
|
148 |
int len; |
int len; |
149 |
int op; |
int op; |
150 |
} verbitem; |
} verbitem; |
151 |
|
|
152 |
|
static const char verbnames[] = |
153 |
|
"ACCEPT\0" |
154 |
|
"COMMIT\0" |
155 |
|
"F\0" |
156 |
|
"FAIL\0" |
157 |
|
"PRUNE\0" |
158 |
|
"SKIP\0" |
159 |
|
"THEN"; |
160 |
|
|
161 |
static verbitem verbs[] = { |
static verbitem verbs[] = { |
162 |
{ "ACCEPT", 6, OP_ACCEPT }, |
{ 6, OP_ACCEPT }, |
163 |
{ "COMMIT", 6, OP_COMMIT }, |
{ 6, OP_COMMIT }, |
164 |
{ "F", 1, OP_FAIL }, |
{ 1, OP_FAIL }, |
165 |
{ "FAIL", 4, OP_FAIL }, |
{ 4, OP_FAIL }, |
166 |
{ "PRUNE", 5, OP_PRUNE }, |
{ 5, OP_PRUNE }, |
167 |
{ "SKIP", 4, OP_SKIP }, |
{ 4, OP_SKIP }, |
168 |
{ "THEN", 4, OP_THEN } |
{ 4, OP_THEN } |
169 |
}; |
}; |
170 |
|
|
171 |
static int verbcount = sizeof(verbs)/sizeof(verbitem); |
static int verbcount = sizeof(verbs)/sizeof(verbitem); |
172 |
|
|
173 |
|
|
174 |
/* Tables of names of POSIX character classes and their lengths. The list is |
/* Tables of names of POSIX character classes and their lengths. The names are |
175 |
terminated by a zero length entry. The first three must be alpha, lower, upper, |
now all in a single string, to reduce the number of relocations when a shared |
176 |
as this is assumed for handling case independence. */ |
library is dynamically loaded. The list of lengths is terminated by a zero |
177 |
|
length entry. The first three must be alpha, lower, upper, as this is assumed |
178 |
static const char *const posix_names[] = { |
for handling case independence. */ |
179 |
"alpha", "lower", "upper", |
|
180 |
"alnum", "ascii", "blank", "cntrl", "digit", "graph", |
static const char posix_names[] = |
181 |
"print", "punct", "space", "word", "xdigit" }; |
"alpha\0" "lower\0" "upper\0" "alnum\0" "ascii\0" "blank\0" |
182 |
|
"cntrl\0" "digit\0" "graph\0" "print\0" "punct\0" "space\0" |
183 |
|
"word\0" "xdigit"; |
184 |
|
|
185 |
static const uschar posix_name_lengths[] = { |
static const uschar posix_name_lengths[] = { |
186 |
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 }; |
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 }; |
219 |
/* The texts of compile-time error messages. These are "char *" because they |
/* The texts of compile-time error messages. These are "char *" because they |
220 |
are passed to the outside world. Do not ever re-use any error number, because |
are passed to the outside world. Do not ever re-use any error number, because |
221 |
they are documented. Always add a new error instead. Messages marked DEAD below |
they are documented. Always add a new error instead. Messages marked DEAD below |
222 |
are no longer used. */ |
are no longer used. This used to be a table of strings, but in order to reduce |
223 |
|
the number of relocations needed when a shared library is loaded dynamically, |
224 |
static const char *error_texts[] = { |
it is now one long string. We cannot use a table of offsets, because the |
225 |
"no error", |
lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we |
226 |
"\\ at end of pattern", |
simply count through to the one we want - this isn't a performance issue |
227 |
"\\c at end of pattern", |
because these strings are used only when there is a compilation error. */ |
228 |
"unrecognized character follows \\", |
|
229 |
"numbers out of order in {} quantifier", |
static const char error_texts[] = |
230 |
|
"no error\0" |
231 |
|
"\\ at end of pattern\0" |
232 |
|
"\\c at end of pattern\0" |
233 |
|
"unrecognized character follows \\\0" |
234 |
|
"numbers out of order in {} quantifier\0" |
235 |
/* 5 */ |
/* 5 */ |
236 |
"number too big in {} quantifier", |
"number too big in {} quantifier\0" |
237 |
"missing terminating ] for character class", |
"missing terminating ] for character class\0" |
238 |
"invalid escape sequence in character class", |
"invalid escape sequence in character class\0" |
239 |
"range out of order in character class", |
"range out of order in character class\0" |
240 |
"nothing to repeat", |
"nothing to repeat\0" |
241 |
/* 10 */ |
/* 10 */ |
242 |
"operand of unlimited repeat could match the empty string", /** DEAD **/ |
"operand of unlimited repeat could match the empty string\0" /** DEAD **/ |
243 |
"internal error: unexpected repeat", |
"internal error: unexpected repeat\0" |
244 |
"unrecognized character after (?", |
"unrecognized character after (?\0" |
245 |
"POSIX named classes are supported only within a class", |
"POSIX named classes are supported only within a class\0" |
246 |
"missing )", |
"missing )\0" |
247 |
/* 15 */ |
/* 15 */ |
248 |
"reference to non-existent subpattern", |
"reference to non-existent subpattern\0" |
249 |
"erroffset passed as NULL", |
"erroffset passed as NULL\0" |
250 |
"unknown option bit(s) set", |
"unknown option bit(s) set\0" |
251 |
"missing ) after comment", |
"missing ) after comment\0" |
252 |
"parentheses nested too deeply", /** DEAD **/ |
"parentheses nested too deeply\0" /** DEAD **/ |
253 |
/* 20 */ |
/* 20 */ |
254 |
"regular expression is too large", |
"regular expression is too large\0" |
255 |
"failed to get memory", |
"failed to get memory\0" |
256 |
"unmatched parentheses", |
"unmatched parentheses\0" |
257 |
"internal error: code overflow", |
"internal error: code overflow\0" |
258 |
"unrecognized character after (?<", |
"unrecognized character after (?<\0" |
259 |
/* 25 */ |
/* 25 */ |
260 |
"lookbehind assertion is not fixed length", |
"lookbehind assertion is not fixed length\0" |
261 |
"malformed number or name after (?(", |
"malformed number or name after (?(\0" |
262 |
"conditional group contains more than two branches", |
"conditional group contains more than two branches\0" |
263 |
"assertion expected after (?(", |
"assertion expected after (?(\0" |
264 |
"(?R or (?[+-]digits must be followed by )", |
"(?R or (?[+-]digits must be followed by )\0" |
265 |
/* 30 */ |
/* 30 */ |
266 |
"unknown POSIX class name", |
"unknown POSIX class name\0" |
267 |
"POSIX collating elements are not supported", |
"POSIX collating elements are not supported\0" |
268 |
"this version of PCRE is not compiled with PCRE_UTF8 support", |
"this version of PCRE is not compiled with PCRE_UTF8 support\0" |
269 |
"spare error", /** DEAD **/ |
"spare error\0" /** DEAD **/ |
270 |
"character value in \\x{...} sequence is too large", |
"character value in \\x{...} sequence is too large\0" |
271 |
/* 35 */ |
/* 35 */ |
272 |
"invalid condition (?(0)", |
"invalid condition (?(0)\0" |
273 |
"\\C not allowed in lookbehind assertion", |
"\\C not allowed in lookbehind assertion\0" |
274 |
"PCRE does not support \\L, \\l, \\N, \\U, or \\u", |
"PCRE does not support \\L, \\l, \\N, \\U, or \\u\0" |
275 |
"number after (?C is > 255", |
"number after (?C is > 255\0" |
276 |
"closing ) for (?C expected", |
"closing ) for (?C expected\0" |
277 |
/* 40 */ |
/* 40 */ |
278 |
"recursive call could loop indefinitely", |
"recursive call could loop indefinitely\0" |
279 |
"unrecognized character after (?P", |
"unrecognized character after (?P\0" |
280 |
"syntax error in subpattern name (missing terminator)", |
"syntax error in subpattern name (missing terminator)\0" |
281 |
"two named subpatterns have the same name", |
"two named subpatterns have the same name\0" |
282 |
"invalid UTF-8 string", |
"invalid UTF-8 string\0" |
283 |
/* 45 */ |
/* 45 */ |
284 |
"support for \\P, \\p, and \\X has not been compiled", |
"support for \\P, \\p, and \\X has not been compiled\0" |
285 |
"malformed \\P or \\p sequence", |
"malformed \\P or \\p sequence\0" |
286 |
"unknown property name after \\P or \\p", |
"unknown property name after \\P or \\p\0" |
287 |
"subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)", |
"subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)\0" |
288 |
"too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")", |
"too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0" |
289 |
/* 50 */ |
/* 50 */ |
290 |
"repeated subpattern is too long", /** DEAD **/ |
"repeated subpattern is too long\0" /** DEAD **/ |
291 |
"octal value is greater than \\377 (not in UTF-8 mode)", |
"octal value is greater than \\377 (not in UTF-8 mode)\0" |
292 |
"internal error: overran compiling workspace", |
"internal error: overran compiling workspace\0" |
293 |
"internal error: previously-checked referenced subpattern not found", |
"internal error: previously-checked referenced subpattern not found\0" |
294 |
"DEFINE group contains more than one branch", |
"DEFINE group contains more than one branch\0" |
295 |
/* 55 */ |
/* 55 */ |
296 |
"repeating a DEFINE group is not allowed", |
"repeating a DEFINE group is not allowed\0" |
297 |
"inconsistent NEWLINE options", |
"inconsistent NEWLINE options\0" |
298 |
"\\g is not followed by a braced name or an optionally braced non-zero number", |
"\\g is not followed by a braced name or an optionally braced non-zero number\0" |
299 |
"(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number", |
"(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number\0" |
300 |
"(*VERB) with an argument is not supported", |
"(*VERB) with an argument is not supported\0" |
301 |
/* 60 */ |
/* 60 */ |
302 |
"(*VERB) not recognized", |
"(*VERB) not recognized\0" |
303 |
"number is too big" |
"number is too big"; |
|
}; |
|
304 |
|
|
305 |
|
|
306 |
/* Table to identify digits and hex digits. This is used when compiling |
/* Table to identify digits and hex digits. This is used when compiling |
436 |
|
|
437 |
|
|
438 |
/************************************************* |
/************************************************* |
439 |
|
* Find an error text * |
440 |
|
*************************************************/ |
441 |
|
|
442 |
|
/* The error texts are now all in one long string, to save on relocations. As |
443 |
|
some of the text is of unknown length, we can't use a table of offsets. |
444 |
|
Instead, just count through the strings. This is not a performance issue |
445 |
|
because it happens only when there has been a compilation error. |
446 |
|
|
447 |
|
Argument: the error number |
448 |
|
Returns: pointer to the error string |
449 |
|
*/ |
450 |
|
|
451 |
|
static const char * |
452 |
|
find_error_text(int n) |
453 |
|
{ |
454 |
|
const char *s = error_texts; |
455 |
|
for (; n > 0; n--) while (*s++ != 0); |
456 |
|
return s; |
457 |
|
} |
458 |
|
|
459 |
|
|
460 |
|
/************************************************* |
461 |
* Handle escapes * |
* Handle escapes * |
462 |
*************************************************/ |
*************************************************/ |
463 |
|
|
814 |
while (bot < top) |
while (bot < top) |
815 |
{ |
{ |
816 |
i = (bot + top) >> 1; |
i = (bot + top) >> 1; |
817 |
c = strcmp(name, _pcre_utt[i].name); |
c = strcmp(name, _pcre_utt_names + _pcre_utt[i].name_offset); |
818 |
if (c == 0) |
if (c == 0) |
819 |
{ |
{ |
820 |
*dptr = _pcre_utt[i].value; |
*dptr = _pcre_utt[i].value; |
1771 |
static int |
static int |
1772 |
check_posix_name(const uschar *ptr, int len) |
check_posix_name(const uschar *ptr, int len) |
1773 |
{ |
{ |
1774 |
|
const char *pn = posix_names; |
1775 |
register int yield = 0; |
register int yield = 0; |
1776 |
while (posix_name_lengths[yield] != 0) |
while (posix_name_lengths[yield] != 0) |
1777 |
{ |
{ |
1778 |
if (len == posix_name_lengths[yield] && |
if (len == posix_name_lengths[yield] && |
1779 |
strncmp((const char *)ptr, posix_names[yield], len) == 0) return yield; |
strncmp((const char *)ptr, pn, len) == 0) return yield; |
1780 |
|
pn += posix_name_lengths[yield] + 1; |
1781 |
yield++; |
yield++; |
1782 |
} |
} |
1783 |
return -1; |
return -1; |
4064 |
if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0) |
if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0) |
4065 |
{ |
{ |
4066 |
int i, namelen; |
int i, namelen; |
4067 |
|
const char *vn = verbnames; |
4068 |
const uschar *name = ++ptr; |
const uschar *name = ++ptr; |
4069 |
previous = NULL; |
previous = NULL; |
4070 |
while ((cd->ctypes[*++ptr] & ctype_letter) != 0); |
while ((cd->ctypes[*++ptr] & ctype_letter) != 0); |
4082 |
for (i = 0; i < verbcount; i++) |
for (i = 0; i < verbcount; i++) |
4083 |
{ |
{ |
4084 |
if (namelen == verbs[i].len && |
if (namelen == verbs[i].len && |
4085 |
strncmp((char *)name, verbs[i].name, namelen) == 0) |
strncmp((char *)name, vn, namelen) == 0) |
4086 |
{ |
{ |
4087 |
*code = verbs[i].op; |
*code = verbs[i].op; |
4088 |
if (*code++ == OP_ACCEPT) cd->had_accept = TRUE; |
if (*code++ == OP_ACCEPT) cd->had_accept = TRUE; |
4089 |
break; |
break; |
4090 |
} |
} |
4091 |
|
vn += verbs[i].len + 1; |
4092 |
} |
} |
4093 |
if (i < verbcount) continue; |
if (i < verbcount) continue; |
4094 |
*errorcodeptr = ERR60; |
*errorcodeptr = ERR60; |
6047 |
PCRE_EARLY_ERROR_RETURN: |
PCRE_EARLY_ERROR_RETURN: |
6048 |
*erroroffset = ptr - (const uschar *)pattern; |
*erroroffset = ptr - (const uschar *)pattern; |
6049 |
PCRE_EARLY_ERROR_RETURN2: |
PCRE_EARLY_ERROR_RETURN2: |
6050 |
*errorptr = error_texts[errorcode]; |
*errorptr = find_error_text(errorcode); |
6051 |
if (errorcodeptr != NULL) *errorcodeptr = errorcode; |
if (errorcodeptr != NULL) *errorcodeptr = errorcode; |
6052 |
return NULL; |
return NULL; |
6053 |
} |
} |
6132 |
if (code - codestart > length) |
if (code - codestart > length) |
6133 |
{ |
{ |
6134 |
(pcre_free)(re); |
(pcre_free)(re); |
6135 |
*errorptr = error_texts[ERR23]; |
*errorptr = find_error_text(ERR23); |
6136 |
*erroroffset = ptr - (uschar *)pattern; |
*erroroffset = ptr - (uschar *)pattern; |
6137 |
if (errorcodeptr != NULL) *errorcodeptr = ERR23; |
if (errorcodeptr != NULL) *errorcodeptr = ERR23; |
6138 |
return NULL; |
return NULL; |