243 |
"repeating a DEFINE group is not allowed", |
"repeating a DEFINE group is not allowed", |
244 |
"inconsistent NEWLINE options", |
"inconsistent NEWLINE options", |
245 |
"\\g is not followed by a braced name or an optionally braced non-zero number", |
"\\g is not followed by a braced name or an optionally braced non-zero number", |
246 |
"(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number" |
"(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number" |
247 |
}; |
}; |
248 |
|
|
249 |
|
|
453 |
|
|
454 |
/* \g must be followed by a number, either plain or braced. If positive, it |
/* \g must be followed by a number, either plain or braced. If positive, it |
455 |
is an absolute backreference. If negative, it is a relative backreference. |
is an absolute backreference. If negative, it is a relative backreference. |
456 |
This is a Perl 5.10 feature. Perl 5.10 also supports \g{name} as a |
This is a Perl 5.10 feature. Perl 5.10 also supports \g{name} as a |
457 |
reference to a named group. This is part of Perl's movement towards a |
reference to a named group. This is part of Perl's movement towards a |
458 |
unified syntax for back references. As this is synonymous with \k{name}, we |
unified syntax for back references. As this is synonymous with \k{name}, we |
459 |
fudge it up by pretending it really was \k. */ |
fudge it up by pretending it really was \k. */ |
460 |
|
|
461 |
case 'g': |
case 'g': |
464 |
const uschar *p; |
const uschar *p; |
465 |
for (p = ptr+2; *p != 0 && *p != '}'; p++) |
for (p = ptr+2; *p != 0 && *p != '}'; p++) |
466 |
if (*p != '-' && (digitab[*p] & ctype_digit) == 0) break; |
if (*p != '-' && (digitab[*p] & ctype_digit) == 0) break; |
467 |
if (*p != 0 && *p != '}') |
if (*p != 0 && *p != '}') |
468 |
{ |
{ |
469 |
c = -ESC_k; |
c = -ESC_k; |
470 |
break; |
break; |
471 |
} |
} |
472 |
braced = TRUE; |
braced = TRUE; |
473 |
ptr++; |
ptr++; |
474 |
} |
} |
1381 |
const uschar *ccode; |
const uschar *ccode; |
1382 |
|
|
1383 |
c = *code; |
c = *code; |
1384 |
|
|
1385 |
/* Groups with zero repeats can of course be empty; skip them. */ |
/* Groups with zero repeats can of course be empty; skip them. */ |
1386 |
|
|
1387 |
if (c == OP_BRAZERO || c == OP_BRAMINZERO) |
if (c == OP_BRAZERO || c == OP_BRAMINZERO) |
1388 |
{ |
{ |
1389 |
|
code += _pcre_OP_lengths[c]; |
1390 |
do code += GET(code, 1); while (*code == OP_ALT); |
do code += GET(code, 1); while (*code == OP_ALT); |
1391 |
c = *code; |
c = *code; |
1392 |
continue; |
continue; |
1393 |
} |
} |
1394 |
|
|
1395 |
/* For other groups, scan the branches. */ |
/* For other groups, scan the branches. */ |
1396 |
|
|
1397 |
if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE) |
if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE) |
1398 |
{ |
{ |
1399 |
BOOL empty_branch; |
BOOL empty_branch; |
1410 |
} |
} |
1411 |
while (*code == OP_ALT); |
while (*code == OP_ALT); |
1412 |
if (!empty_branch) return FALSE; /* All branches are non-empty */ |
if (!empty_branch) return FALSE; /* All branches are non-empty */ |
1413 |
c = *code; |
c = *code; |
1414 |
continue; |
continue; |
1415 |
} |
} |
1416 |
|
|
2114 |
int class_lastchar; |
int class_lastchar; |
2115 |
int newoptions; |
int newoptions; |
2116 |
int recno; |
int recno; |
2117 |
int refsign; |
int refsign; |
2118 |
int skipbytes; |
int skipbytes; |
2119 |
int subreqbyte; |
int subreqbyte; |
2120 |
int subfirstbyte; |
int subfirstbyte; |
3641 |
|
|
3642 |
code[1+LINK_SIZE] = OP_CREF; |
code[1+LINK_SIZE] = OP_CREF; |
3643 |
skipbytes = 3; |
skipbytes = 3; |
3644 |
refsign = -1; |
refsign = -1; |
3645 |
|
|
3646 |
/* Check for a test for recursion in a named group. */ |
/* Check for a test for recursion in a named group. */ |
3647 |
|
|
3665 |
terminator = '\''; |
terminator = '\''; |
3666 |
ptr++; |
ptr++; |
3667 |
} |
} |
3668 |
else |
else |
3669 |
{ |
{ |
3670 |
terminator = 0; |
terminator = 0; |
3671 |
if (ptr[1] == '-' || ptr[1] == '+') refsign = *(++ptr); |
if (ptr[1] == '-' || ptr[1] == '+') refsign = *(++ptr); |
3672 |
} |
} |
3673 |
|
|
3674 |
/* We now expect to read a name; any thing else is an error */ |
/* We now expect to read a name; any thing else is an error */ |
3675 |
|
|
3707 |
/* In the real compile we do the work of looking for the actual |
/* In the real compile we do the work of looking for the actual |
3708 |
reference. If the string started with "+" or "-" we require the rest to |
reference. If the string started with "+" or "-" we require the rest to |
3709 |
be digits, in which case recno will be set. */ |
be digits, in which case recno will be set. */ |
3710 |
|
|
3711 |
if (refsign > 0) |
if (refsign > 0) |
3712 |
{ |
{ |
3713 |
if (recno <= 0) |
if (recno <= 0) |
3714 |
{ |
{ |
3715 |
*errorcodeptr = ERR58; |
*errorcodeptr = ERR58; |
3716 |
goto FAILED; |
goto FAILED; |
3717 |
} |
} |
3718 |
if (refsign == '-') |
if (refsign == '-') |
3719 |
{ |
{ |
3720 |
recno = cd->bracount - recno + 1; |
recno = cd->bracount - recno + 1; |
3721 |
if (recno <= 0) |
if (recno <= 0) |
3722 |
{ |
{ |
3723 |
*errorcodeptr = ERR15; |
*errorcodeptr = ERR15; |
3724 |
goto FAILED; |
goto FAILED; |
3725 |
} |
} |
3726 |
} |
} |
3727 |
else recno += cd->bracount; |
else recno += cd->bracount; |
3728 |
PUT2(code, 2+LINK_SIZE, recno); |
PUT2(code, 2+LINK_SIZE, recno); |
3729 |
break; |
break; |
3730 |
} |
} |
3731 |
|
|
3732 |
/* Otherwise (did not start with "+" or "-"), start by looking for the |
/* Otherwise (did not start with "+" or "-"), start by looking for the |
3733 |
name. */ |
name. */ |
3734 |
|
|
3735 |
slot = cd->name_table; |
slot = cd->name_table; |
3736 |
for (i = 0; i < cd->names_found; i++) |
for (i = 0; i < cd->names_found; i++) |
3737 |
{ |
{ |
4056 |
const uschar *called; |
const uschar *called; |
4057 |
|
|
4058 |
if ((refsign = *ptr) == '+') ptr++; |
if ((refsign = *ptr) == '+') ptr++; |
4059 |
else if (refsign == '-') |
else if (refsign == '-') |
4060 |
{ |
{ |
4061 |
if ((digitab[ptr[1]] & ctype_digit) == 0) |
if ((digitab[ptr[1]] & ctype_digit) == 0) |
4062 |
goto OTHER_CHAR_AFTER_QUERY; |
goto OTHER_CHAR_AFTER_QUERY; |
4063 |
ptr++; |
ptr++; |
4064 |
} |
} |
4065 |
|
|
4066 |
recno = 0; |
recno = 0; |
4067 |
while((digitab[*ptr] & ctype_digit) != 0) |
while((digitab[*ptr] & ctype_digit) != 0) |
4068 |
recno = recno * 10 + *ptr++ - '0'; |
recno = recno * 10 + *ptr++ - '0'; |
4072 |
*errorcodeptr = ERR29; |
*errorcodeptr = ERR29; |
4073 |
goto FAILED; |
goto FAILED; |
4074 |
} |
} |
4075 |
|
|
4076 |
if (refsign == '-') |
if (refsign == '-') |
4077 |
{ |
{ |
4078 |
if (recno == 0) |
if (recno == 0) |
4079 |
{ |
{ |
4080 |
*errorcodeptr = ERR58; |
*errorcodeptr = ERR58; |
4081 |
goto FAILED; |
goto FAILED; |
4082 |
} |
} |
4083 |
recno = cd->bracount - recno + 1; |
recno = cd->bracount - recno + 1; |
4084 |
if (recno <= 0) |
if (recno <= 0) |
4085 |
{ |
{ |
4086 |
*errorcodeptr = ERR15; |
*errorcodeptr = ERR15; |
4087 |
goto FAILED; |
goto FAILED; |
4088 |
} |
} |
4089 |
} |
} |
4090 |
else if (refsign == '+') |
else if (refsign == '+') |
4091 |
{ |
{ |
4093 |
{ |
{ |
4094 |
*errorcodeptr = ERR58; |
*errorcodeptr = ERR58; |
4095 |
goto FAILED; |
goto FAILED; |
4096 |
} |
} |
4097 |
recno += cd->bracount; |
recno += cd->bracount; |
4098 |
} |
} |
4099 |
|
|
4100 |
/* Come here from code above that handles a named recursion */ |
/* Come here from code above that handles a named recursion */ |
4101 |
|
|
4169 |
|
|
4170 |
/* ------------------------------------------------------------ */ |
/* ------------------------------------------------------------ */ |
4171 |
default: /* Other characters: check option setting */ |
default: /* Other characters: check option setting */ |
4172 |
OTHER_CHAR_AFTER_QUERY: |
OTHER_CHAR_AFTER_QUERY: |
4173 |
set = unset = 0; |
set = unset = 0; |
4174 |
optset = &set; |
optset = &set; |
4175 |
|
|
4320 |
is on the bracket. */ |
is on the bracket. */ |
4321 |
|
|
4322 |
/* If this is a conditional bracket, check that there are no more than |
/* If this is a conditional bracket, check that there are no more than |
4323 |
two branches in the group, or just one if it's a DEFINE group. */ |
two branches in the group, or just one if it's a DEFINE group. We do this |
4324 |
|
in the real compile phase, not in the pre-pass, where the whole group may |
4325 |
|
not be available. */ |
4326 |
|
|
4327 |
if (bravalue == OP_COND) |
if (bravalue == OP_COND && lengthptr == NULL) |
4328 |
{ |
{ |
4329 |
uschar *tc = code; |
uschar *tc = code; |
4330 |
int condcount = 0; |
int condcount = 0; |
4656 |
out the amount of memory needed, as well as during the real compile phase. The |
out the amount of memory needed, as well as during the real compile phase. The |
4657 |
value of lengthptr distinguishes the two phases. |
value of lengthptr distinguishes the two phases. |
4658 |
|
|
4659 |
Argument: |
Arguments: |
4660 |
options option bits, including any changes for this subpattern |
options option bits, including any changes for this subpattern |
4661 |
oldims previous settings of ims option bits |
oldims previous settings of ims option bits |
4662 |
codeptr -> the address of the current code pointer |
codeptr -> the address of the current code pointer |
4809 |
} |
} |
4810 |
} |
} |
4811 |
|
|
4812 |
/* Reached end of expression, either ')' or end of pattern. Go back through |
/* Reached end of expression, either ')' or end of pattern. In the real |
4813 |
the alternative branches and reverse the chain of offsets, with the field in |
compile phase, go back through the alternative branches and reverse the chain |
4814 |
the BRA item now becoming an offset to the first alternative. If there are |
of offsets, with the field in the BRA item now becoming an offset to the |
4815 |
no alternatives, it points to the end of the group. The length in the |
first alternative. If there are no alternatives, it points to the end of the |
4816 |
terminating ket is always the length of the whole bracketed item. If any of |
group. The length in the terminating ket is always the length of the whole |
4817 |
the ims options were changed inside the group, compile a resetting op-code |
bracketed item. If any of the ims options were changed inside the group, |
4818 |
following, except at the very end of the pattern. Return leaving the pointer |
compile a resetting op-code following, except at the very end of the pattern. |
4819 |
at the terminating char. */ |
Return leaving the pointer at the terminating char. */ |
4820 |
|
|
4821 |
if (*ptr != '|') |
if (*ptr != '|') |
4822 |
{ |
{ |
4823 |
int branch_length = code - last_branch; |
if (lengthptr == NULL) |
|
do |
|
4824 |
{ |
{ |
4825 |
int prev_length = GET(last_branch, 1); |
int branch_length = code - last_branch; |
4826 |
PUT(last_branch, 1, branch_length); |
do |
4827 |
branch_length = prev_length; |
{ |
4828 |
last_branch -= branch_length; |
int prev_length = GET(last_branch, 1); |
4829 |
|
PUT(last_branch, 1, branch_length); |
4830 |
|
branch_length = prev_length; |
4831 |
|
last_branch -= branch_length; |
4832 |
|
} |
4833 |
|
while (branch_length > 0); |
4834 |
} |
} |
|
while (branch_length > 0); |
|
4835 |
|
|
4836 |
/* Fill in the ket */ |
/* Fill in the ket */ |
4837 |
|
|
4858 |
return TRUE; |
return TRUE; |
4859 |
} |
} |
4860 |
|
|
4861 |
/* Another branch follows; insert an "or" node. Its length field points back |
/* Another branch follows. In the pre-compile phase, we can move the code |
4862 |
|
pointer back to where it was for the start of the first branch. (That is, |
4863 |
|
pretend that each branch is the only one.) |
4864 |
|
|
4865 |
|
In the real compile phase, insert an ALT node. Its length field points back |
4866 |
to the previous branch while the bracket remains open. At the end the chain |
to the previous branch while the bracket remains open. At the end the chain |
4867 |
is reversed. It's done like this so that the start of the bracket has a |
is reversed. It's done like this so that the start of the bracket has a |
4868 |
zero offset until it is closed, making it possible to detect recursion. */ |
zero offset until it is closed, making it possible to detect recursion. */ |
4869 |
|
|
4870 |
*code = OP_ALT; |
if (lengthptr != NULL) |
4871 |
PUT(code, 1, code - last_branch); |
{ |
4872 |
bc.current = last_branch = code; |
code = *codeptr + 1 + LINK_SIZE + skipbytes; |
4873 |
code += 1 + LINK_SIZE; |
length += 1 + LINK_SIZE; |
4874 |
|
} |
4875 |
|
else |
4876 |
|
{ |
4877 |
|
*code = OP_ALT; |
4878 |
|
PUT(code, 1, code - last_branch); |
4879 |
|
bc.current = last_branch = code; |
4880 |
|
code += 1 + LINK_SIZE; |
4881 |
|
} |
4882 |
|
|
4883 |
ptr++; |
ptr++; |
|
length += 1 + LINK_SIZE; |
|
4884 |
} |
} |
4885 |
/* Control never reaches here */ |
/* Control never reaches here */ |
4886 |
} |
} |