140 |
#endif |
#endif |
141 |
|
|
142 |
|
|
143 |
|
/* Table of special "verbs" like (*PRUNE) */ |
144 |
|
|
145 |
|
typedef struct verbitem { |
146 |
|
const char *name; |
147 |
|
int len; |
148 |
|
int op; |
149 |
|
} verbitem; |
150 |
|
|
151 |
|
static verbitem verbs[] = { |
152 |
|
{ "ACCEPT", 6, OP_ACCEPT }, |
153 |
|
{ "COMMIT", 6, OP_COMMIT }, |
154 |
|
{ "F", 1, OP_FAIL }, |
155 |
|
{ "FAIL", 4, OP_FAIL }, |
156 |
|
{ "PRUNE", 5, OP_PRUNE }, |
157 |
|
{ "SKIP", 4, OP_SKIP }, |
158 |
|
{ "THEN", 4, OP_THEN } |
159 |
|
}; |
160 |
|
|
161 |
|
static int verbcount = sizeof(verbs)/sizeof(verbitem); |
162 |
|
|
163 |
|
|
164 |
/* Tables of names of POSIX character classes and their lengths. The list is |
/* Tables of names of POSIX character classes and their lengths. The list is |
165 |
terminated by a zero length entry. The first three must be alpha, lower, upper, |
terminated by a zero length entry. The first three must be alpha, lower, upper, |
166 |
as this is assumed for handling case independence. */ |
as this is assumed for handling case independence. */ |
279 |
"repeating a DEFINE group is not allowed", |
"repeating a DEFINE group is not allowed", |
280 |
"inconsistent NEWLINE options", |
"inconsistent NEWLINE options", |
281 |
"\\g is not followed by a braced name or an optionally braced non-zero number", |
"\\g is not followed by a braced name or an optionally braced non-zero number", |
282 |
"(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number" |
"(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number", |
283 |
|
"(*VERB) with an argument is not supported", |
284 |
|
/* 60 */ |
285 |
|
"(*VERB) not recognized", |
286 |
|
"number is too big" |
287 |
}; |
}; |
288 |
|
|
289 |
|
|
440 |
|
|
441 |
Returns: zero or positive => a data character |
Returns: zero or positive => a data character |
442 |
negative => a special escape sequence |
negative => a special escape sequence |
443 |
on error, errorptr is set |
on error, errorcodeptr is set |
444 |
*/ |
*/ |
445 |
|
|
446 |
static int |
static int |
524 |
c = 0; |
c = 0; |
525 |
while ((digitab[ptr[1]] & ctype_digit) != 0) |
while ((digitab[ptr[1]] & ctype_digit) != 0) |
526 |
c = c * 10 + *(++ptr) - '0'; |
c = c * 10 + *(++ptr) - '0'; |
527 |
|
|
528 |
|
if (c < 0) |
529 |
|
{ |
530 |
|
*errorcodeptr = ERR61; |
531 |
|
break; |
532 |
|
} |
533 |
|
|
534 |
if (c == 0 || (braced && *(++ptr) != '}')) |
if (c == 0 || (braced && *(++ptr) != '}')) |
535 |
{ |
{ |
536 |
*errorcodeptr = ERR57; |
*errorcodeptr = ERR57; |
537 |
return 0; |
break; |
538 |
} |
} |
539 |
|
|
540 |
if (negated) |
if (negated) |
542 |
if (c > bracount) |
if (c > bracount) |
543 |
{ |
{ |
544 |
*errorcodeptr = ERR15; |
*errorcodeptr = ERR15; |
545 |
return 0; |
break; |
546 |
} |
} |
547 |
c = bracount - (c - 1); |
c = bracount - (c - 1); |
548 |
} |
} |
571 |
c -= '0'; |
c -= '0'; |
572 |
while ((digitab[ptr[1]] & ctype_digit) != 0) |
while ((digitab[ptr[1]] & ctype_digit) != 0) |
573 |
c = c * 10 + *(++ptr) - '0'; |
c = c * 10 + *(++ptr) - '0'; |
574 |
|
if (c < 0) |
575 |
|
{ |
576 |
|
*errorcodeptr = ERR61; |
577 |
|
break; |
578 |
|
} |
579 |
if (c < 10 || c <= bracount) |
if (c < 10 || c <= bracount) |
580 |
{ |
{ |
581 |
c = -(ESC_REF + c); |
c = -(ESC_REF + c); |
671 |
if (c == 0) |
if (c == 0) |
672 |
{ |
{ |
673 |
*errorcodeptr = ERR2; |
*errorcodeptr = ERR2; |
674 |
return 0; |
break; |
675 |
} |
} |
676 |
|
|
677 |
#ifndef EBCDIC /* ASCII coding */ |
#ifndef EBCDIC /* ASCII coding */ |
950 |
{ |
{ |
951 |
while (*(++ptr) != ']') |
while (*(++ptr) != ']') |
952 |
{ |
{ |
953 |
|
if (*ptr == 0) return -1; |
954 |
if (*ptr == '\\') |
if (*ptr == '\\') |
955 |
{ |
{ |
956 |
if (*(++ptr) == 0) return -1; |
if (*(++ptr) == 0) return -1; |
978 |
/* An opening parens must now be a real metacharacter */ |
/* An opening parens must now be a real metacharacter */ |
979 |
|
|
980 |
if (*ptr != '(') continue; |
if (*ptr != '(') continue; |
981 |
if (ptr[1] != '?') |
if (ptr[1] != '?' && ptr[1] != '*') |
982 |
{ |
{ |
983 |
count++; |
count++; |
984 |
if (name == NULL && count == lorn) return count; |
if (name == NULL && count == lorn) return count; |
1106 |
{ |
{ |
1107 |
int d; |
int d; |
1108 |
register int op = *cc; |
register int op = *cc; |
|
|
|
1109 |
switch (op) |
switch (op) |
1110 |
{ |
{ |
1111 |
case OP_CBRA: |
case OP_CBRA: |
1194 |
|
|
1195 |
case OP_TYPEEXACT: |
case OP_TYPEEXACT: |
1196 |
branchlength += GET2(cc,1); |
branchlength += GET2(cc,1); |
1197 |
|
if (cc[3] == OP_PROP || cc[3] == OP_NOTPROP) cc += 2; |
1198 |
cc += 4; |
cc += 4; |
1199 |
break; |
break; |
1200 |
|
|
1303 |
code += _pcre_OP_lengths[c]; |
code += _pcre_OP_lengths[c]; |
1304 |
} |
} |
1305 |
|
|
1306 |
/* In UTF-8 mode, opcodes that are followed by a character may be followed by |
/* Otherwise, we can get the item's length from the table, except that for |
1307 |
a multi-byte character. The length in the table is a minimum, so we have to |
repeated character types, we have to test for \p and \P, which have an extra |
1308 |
arrange to skip the extra bytes. */ |
two bytes of parameters. */ |
1309 |
|
|
1310 |
else |
else |
1311 |
{ |
{ |
1312 |
|
switch(c) |
1313 |
|
{ |
1314 |
|
case OP_TYPESTAR: |
1315 |
|
case OP_TYPEMINSTAR: |
1316 |
|
case OP_TYPEPLUS: |
1317 |
|
case OP_TYPEMINPLUS: |
1318 |
|
case OP_TYPEQUERY: |
1319 |
|
case OP_TYPEMINQUERY: |
1320 |
|
case OP_TYPEUPTO: |
1321 |
|
case OP_TYPEMINUPTO: |
1322 |
|
case OP_TYPEEXACT: |
1323 |
|
case OP_TYPEPOSSTAR: |
1324 |
|
case OP_TYPEPOSPLUS: |
1325 |
|
case OP_TYPEPOSQUERY: |
1326 |
|
case OP_TYPEPOSUPTO: |
1327 |
|
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; |
1328 |
|
break; |
1329 |
|
} |
1330 |
|
|
1331 |
|
/* Add in the fixed length from the table */ |
1332 |
|
|
1333 |
code += _pcre_OP_lengths[c]; |
code += _pcre_OP_lengths[c]; |
1334 |
|
|
1335 |
|
/* In UTF-8 mode, opcodes that are followed by a character may be followed by |
1336 |
|
a multi-byte character. The length in the table is a minimum, so we have to |
1337 |
|
arrange to skip the extra bytes. */ |
1338 |
|
|
1339 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
1340 |
if (utf8) switch(c) |
if (utf8) switch(c) |
1341 |
{ |
{ |
1386 |
register int c = *code; |
register int c = *code; |
1387 |
if (c == OP_END) return NULL; |
if (c == OP_END) return NULL; |
1388 |
if (c == OP_RECURSE) return code; |
if (c == OP_RECURSE) return code; |
1389 |
|
|
1390 |
/* XCLASS is used for classes that cannot be represented just by a bit |
/* XCLASS is used for classes that cannot be represented just by a bit |
1391 |
map. This includes negated single high-valued characters. The length in |
map. This includes negated single high-valued characters. The length in |
1392 |
the table is zero; the actual length is stored in the compiled code. */ |
the table is zero; the actual length is stored in the compiled code. */ |
1393 |
|
|
1394 |
if (c == OP_XCLASS) code += GET(code, 1); |
if (c == OP_XCLASS) code += GET(code, 1); |
1395 |
|
|
1396 |
/* Otherwise, we get the item's length from the table. In UTF-8 mode, opcodes |
/* Otherwise, we can get the item's length from the table, except that for |
1397 |
that are followed by a character may be followed by a multi-byte character. |
repeated character types, we have to test for \p and \P, which have an extra |
1398 |
The length in the table is a minimum, so we have to arrange to skip the extra |
two bytes of parameters. */ |
|
bytes. */ |
|
1399 |
|
|
1400 |
else |
else |
1401 |
{ |
{ |
1402 |
|
switch(c) |
1403 |
|
{ |
1404 |
|
case OP_TYPESTAR: |
1405 |
|
case OP_TYPEMINSTAR: |
1406 |
|
case OP_TYPEPLUS: |
1407 |
|
case OP_TYPEMINPLUS: |
1408 |
|
case OP_TYPEQUERY: |
1409 |
|
case OP_TYPEMINQUERY: |
1410 |
|
case OP_TYPEUPTO: |
1411 |
|
case OP_TYPEMINUPTO: |
1412 |
|
case OP_TYPEEXACT: |
1413 |
|
case OP_TYPEPOSSTAR: |
1414 |
|
case OP_TYPEPOSPLUS: |
1415 |
|
case OP_TYPEPOSQUERY: |
1416 |
|
case OP_TYPEPOSUPTO: |
1417 |
|
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; |
1418 |
|
break; |
1419 |
|
} |
1420 |
|
|
1421 |
|
/* Add in the fixed length from the table */ |
1422 |
|
|
1423 |
code += _pcre_OP_lengths[c]; |
code += _pcre_OP_lengths[c]; |
1424 |
|
|
1425 |
|
/* In UTF-8 mode, opcodes that are followed by a character may be followed |
1426 |
|
by a multi-byte character. The length in the table is a minimum, so we have |
1427 |
|
to arrange to skip the extra bytes. */ |
1428 |
|
|
1429 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
1430 |
if (utf8) switch(c) |
if (utf8) switch(c) |
1431 |
{ |
{ |
1521 |
|
|
1522 |
switch (c) |
switch (c) |
1523 |
{ |
{ |
1524 |
/* Check for quantifiers after a class */ |
/* Check for quantifiers after a class. XCLASS is used for classes that |
1525 |
|
cannot be represented just by a bit map. This includes negated single |
1526 |
|
high-valued characters. The length in _pcre_OP_lengths[] is zero; the |
1527 |
|
actual length is stored in the compiled code, so we must update "code" |
1528 |
|
here. */ |
1529 |
|
|
1530 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
1531 |
case OP_XCLASS: |
case OP_XCLASS: |
1532 |
ccode = code + GET(code, 1); |
ccode = code += GET(code, 1); |
1533 |
goto CHECK_CLASS_REPEAT; |
goto CHECK_CLASS_REPEAT; |
1534 |
#endif |
#endif |
1535 |
|
|
2722 |
else inescq = TRUE; |
else inescq = TRUE; |
2723 |
continue; |
continue; |
2724 |
} |
} |
2725 |
|
else if (-c == ESC_E) continue; /* Ignore orphan \E */ |
2726 |
|
|
2727 |
if (c < 0) |
if (c < 0) |
2728 |
{ |
{ |
3953 |
/* ===================================================================*/ |
/* ===================================================================*/ |
3954 |
/* Start of nested parenthesized sub-expression, or comment or lookahead or |
/* Start of nested parenthesized sub-expression, or comment or lookahead or |
3955 |
lookbehind or option setting or condition or all the other extended |
lookbehind or option setting or condition or all the other extended |
3956 |
parenthesis forms. First deal with the specials; all are introduced by ?, |
parenthesis forms. */ |
|
and the appearance of any of them means that this is not a capturing |
|
|
group. */ |
|
3957 |
|
|
3958 |
case '(': |
case '(': |
3959 |
newoptions = options; |
newoptions = options; |
3962 |
save_hwm = cd->hwm; |
save_hwm = cd->hwm; |
3963 |
reset_bracount = FALSE; |
reset_bracount = FALSE; |
3964 |
|
|
3965 |
if (*(++ptr) == '?') |
/* First deal with various "verbs" that can be introduced by '*'. */ |
3966 |
|
|
3967 |
|
if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0) |
3968 |
|
{ |
3969 |
|
int i, namelen; |
3970 |
|
const uschar *name = ++ptr; |
3971 |
|
previous = NULL; |
3972 |
|
while ((cd->ctypes[*++ptr] & ctype_letter) != 0); |
3973 |
|
if (*ptr == ':') |
3974 |
|
{ |
3975 |
|
*errorcodeptr = ERR59; /* Not supported */ |
3976 |
|
goto FAILED; |
3977 |
|
} |
3978 |
|
if (*ptr != ')') |
3979 |
|
{ |
3980 |
|
*errorcodeptr = ERR60; |
3981 |
|
goto FAILED; |
3982 |
|
} |
3983 |
|
namelen = ptr - name; |
3984 |
|
for (i = 0; i < verbcount; i++) |
3985 |
|
{ |
3986 |
|
if (namelen == verbs[i].len && |
3987 |
|
strncmp((char *)name, verbs[i].name, namelen) == 0) |
3988 |
|
{ |
3989 |
|
*code = verbs[i].op; |
3990 |
|
if (*code++ == OP_ACCEPT) cd->had_accept = TRUE; |
3991 |
|
break; |
3992 |
|
} |
3993 |
|
} |
3994 |
|
if (i < verbcount) continue; |
3995 |
|
*errorcodeptr = ERR60; |
3996 |
|
goto FAILED; |
3997 |
|
} |
3998 |
|
|
3999 |
|
/* Deal with the extended parentheses; all are introduced by '?', and the |
4000 |
|
appearance of any of them means that this is not a capturing group. */ |
4001 |
|
|
4002 |
|
else if (*ptr == '?') |
4003 |
{ |
{ |
4004 |
int i, set, unset, namelen; |
int i, set, unset, namelen; |
4005 |
int *optset; |
int *optset; |
4241 |
|
|
4242 |
/* ------------------------------------------------------------ */ |
/* ------------------------------------------------------------ */ |
4243 |
case '!': /* Negative lookahead */ |
case '!': /* Negative lookahead */ |
|
bravalue = OP_ASSERT_NOT; |
|
4244 |
ptr++; |
ptr++; |
4245 |
|
if (*ptr == ')') /* Optimize (?!) */ |
4246 |
|
{ |
4247 |
|
*code++ = OP_FAIL; |
4248 |
|
previous = NULL; |
4249 |
|
continue; |
4250 |
|
} |
4251 |
|
bravalue = OP_ASSERT_NOT; |
4252 |
break; |
break; |
4253 |
|
|
4254 |
|
|
4797 |
goto FAILED; |
goto FAILED; |
4798 |
} |
} |
4799 |
|
|
4800 |
/* In the pre-compile phase, update the length by the length of the nested |
/* In the pre-compile phase, update the length by the length of the group, |
4801 |
group, less the brackets at either end. Then reduce the compiled code to |
less the brackets at either end. Then reduce the compiled code to just a |
4802 |
just the brackets so that it doesn't use much memory if it is duplicated by |
set of non-capturing brackets so that it doesn't use much memory if it is |
4803 |
a quantifier. */ |
duplicated by a quantifier.*/ |
4804 |
|
|
4805 |
if (lengthptr != NULL) |
if (lengthptr != NULL) |
4806 |
{ |
{ |
4810 |
goto FAILED; |
goto FAILED; |
4811 |
} |
} |
4812 |
*lengthptr += length_prevgroup - 2 - 2*LINK_SIZE; |
*lengthptr += length_prevgroup - 2 - 2*LINK_SIZE; |
4813 |
code++; |
*code++ = OP_BRA; |
4814 |
PUTINC(code, 0, 1 + LINK_SIZE); |
PUTINC(code, 0, 1 + LINK_SIZE); |
4815 |
*code++ = OP_KET; |
*code++ = OP_KET; |
4816 |
PUTINC(code, 0, 1 + LINK_SIZE); |
PUTINC(code, 0, 1 + LINK_SIZE); |
4817 |
|
break; /* No need to waste time with special character handling */ |
4818 |
} |
} |
4819 |
|
|
4820 |
/* Otherwise update the main code pointer to the end of the group. */ |
/* Otherwise update the main code pointer to the end of the group. */ |
4821 |
|
|
4822 |
else code = tempcode; |
code = tempcode; |
4823 |
|
|
4824 |
/* For a DEFINE group, required and first character settings are not |
/* For a DEFINE group, required and first character settings are not |
4825 |
relevant. */ |
relevant. */ |
5841 |
cd->hwm = cworkspace; |
cd->hwm = cworkspace; |
5842 |
cd->req_varyopt = 0; |
cd->req_varyopt = 0; |
5843 |
cd->nopartial = FALSE; |
cd->nopartial = FALSE; |
5844 |
|
cd->had_accept = FALSE; |
5845 |
|
|
5846 |
/* Set up a starting, non-extracting bracket, then compile the expression. On |
/* Set up a starting, non-extracting bracket, then compile the expression. On |
5847 |
error, errorcode will be set non-zero, so we don't need to look at the result |
error, errorcode will be set non-zero, so we don't need to look at the result |
5856 |
re->top_backref = cd->top_backref; |
re->top_backref = cd->top_backref; |
5857 |
|
|
5858 |
if (cd->nopartial) re->options |= PCRE_NOPARTIAL; |
if (cd->nopartial) re->options |= PCRE_NOPARTIAL; |
5859 |
|
if (cd->had_accept) reqbyte = -1; /* Must disable after (*ACCEPT) */ |
5860 |
|
|
5861 |
/* If not reached end of pattern on success, there's an excess bracket. */ |
/* If not reached end of pattern on success, there's an excess bracket. */ |
5862 |
|
|