410 
"this version of PCRE is not compiled with PCRE_UCP support\0" 
"this version of PCRE is not compiled with PCRE_UCP support\0" 
411 
"\\c must be followed by an ASCII character\0" 
"\\c must be followed by an ASCII character\0" 
412 
"\\k is not followed by a braced, anglebracketed, or quoted name\0" 
"\\k is not followed by a braced, anglebracketed, or quoted name\0" 
413 

/* 70 */ 
414 

"internal error: unknown opcode in find_fixedlength()\0" 
415 
; 
; 
416 


417 
/* Table to identify digits and hex digits. This is used when compiling 
/* Table to identify digits and hex digits. This is used when compiling 
1530 
or 1 if there is no fixed length, 
or 1 if there is no fixed length, 
1531 
or 2 if \C was encountered 
or 2 if \C was encountered 
1532 
or 3 if an OP_RECURSE item was encountered and atend is FALSE 
or 3 if an OP_RECURSE item was encountered and atend is FALSE 
1533 

or 4 if an unknown opcode was encountered (internal error) 
1534 
*/ 
*/ 
1535 


1536 
static int 
static int 
1554 
/* We only need to continue for OP_CBRA (normal capturing bracket) and 
/* We only need to continue for OP_CBRA (normal capturing bracket) and 
1555 
OP_BRA (normal noncapturing bracket) because the other variants of these 
OP_BRA (normal noncapturing bracket) because the other variants of these 
1556 
opcodes are all concerned with unlimited repeated groups, which of course 
opcodes are all concerned with unlimited repeated groups, which of course 
1557 
are not of fixed length. They will cause a 1 response from the default 
are not of fixed length. */ 

case of this switch. */ 

1558 


1559 
case OP_CBRA: 
case OP_CBRA: 
1560 
case OP_BRA: 
case OP_BRA: 
1568 
cc += 1 + LINK_SIZE; 
cc += 1 + LINK_SIZE; 
1569 
break; 
break; 
1570 


1571 
/* Reached end of a branch; if it's a ket it is the end of a nested 
/* Reached end of a branch; if it's a ket it is the end of a nested call. 
1572 
call. If it's ALT it is an alternation in a nested call. If it is 
If it's ALT it is an alternation in a nested call. An ACCEPT is effectively 
1573 
END it's the end of the outer call. All can be handled by the same code. 
an ALT. If it is END it's the end of the outer call. All can be handled by 
1574 
Note that we must not include the OP_KETRxxx opcodes here, because they 
the same code. Note that we must not include the OP_KETRxxx opcodes here, 
1575 
all imply an unlimited repeat. */ 
because they all imply an unlimited repeat. */ 
1576 


1577 
case OP_ALT: 
case OP_ALT: 
1578 
case OP_KET: 
case OP_KET: 
1579 
case OP_END: 
case OP_END: 
1580 

case OP_ACCEPT: 
1581 

case OP_ASSERT_ACCEPT: 
1582 
if (length < 0) length = branchlength; 
if (length < 0) length = branchlength; 
1583 
else if (length != branchlength) return 1; 
else if (length != branchlength) return 1; 
1584 
if (*cc != OP_ALT) return length; 
if (*cc != OP_ALT) return length; 
1612 


1613 
/* Skip over things that don't match chars */ 
/* Skip over things that don't match chars */ 
1614 


1615 
case OP_REVERSE: 
case OP_MARK: 
1616 
case OP_CREF: 
case OP_PRUNE_ARG: 
1617 
case OP_NCREF: 
case OP_SKIP_ARG: 
1618 
case OP_RREF: 
case OP_THEN_ARG: 
1619 
case OP_NRREF: 
cc += cc[1] + _pcre_OP_lengths[*cc]; 
1620 
case OP_DEF: 
break; 
1621 


1622 
case OP_CALLOUT: 
case OP_CALLOUT: 

case OP_SOD: 


case OP_SOM: 


case OP_SET_SOM: 


case OP_EOD: 


case OP_EODN: 

1623 
case OP_CIRC: 
case OP_CIRC: 
1624 
case OP_CIRCM: 
case OP_CIRCM: 
1625 

case OP_CLOSE: 
1626 

case OP_COMMIT: 
1627 

case OP_CREF: 
1628 

case OP_DEF: 
1629 
case OP_DOLL: 
case OP_DOLL: 
1630 
case OP_DOLLM: 
case OP_DOLLM: 
1631 

case OP_EOD: 
1632 

case OP_EODN: 
1633 

case OP_FAIL: 
1634 

case OP_NCREF: 
1635 

case OP_NRREF: 
1636 
case OP_NOT_WORD_BOUNDARY: 
case OP_NOT_WORD_BOUNDARY: 
1637 

case OP_PRUNE: 
1638 

case OP_REVERSE: 
1639 

case OP_RREF: 
1640 

case OP_SET_SOM: 
1641 

case OP_SKIP: 
1642 

case OP_SOD: 
1643 

case OP_SOM: 
1644 

case OP_THEN: 
1645 
case OP_WORD_BOUNDARY: 
case OP_WORD_BOUNDARY: 
1646 
cc += _pcre_OP_lengths[*cc]; 
cc += _pcre_OP_lengths[*cc]; 
1647 
break; 
break; 
1663 
need to skip over a multibyte character in UTF8 mode. */ 
need to skip over a multibyte character in UTF8 mode. */ 
1664 


1665 
case OP_EXACT: 
case OP_EXACT: 
1666 

case OP_EXACTI: 
1667 

case OP_NOTEXACT: 
1668 

case OP_NOTEXACTI: 
1669 
branchlength += GET2(cc,1); 
branchlength += GET2(cc,1); 
1670 
cc += 4; 
cc += 4; 
1671 
#ifdef SUPPORT_UTF8 
#ifdef SUPPORT_UTF8 
1686 
cc += 2; 
cc += 2; 
1687 
/* Fall through */ 
/* Fall through */ 
1688 


1689 

case OP_HSPACE: 
1690 

case OP_VSPACE: 
1691 

case OP_NOT_HSPACE: 
1692 

case OP_NOT_VSPACE: 
1693 
case OP_NOT_DIGIT: 
case OP_NOT_DIGIT: 
1694 
case OP_DIGIT: 
case OP_DIGIT: 
1695 
case OP_NOT_WHITESPACE: 
case OP_NOT_WHITESPACE: 
1721 


1722 
switch (*cc) 
switch (*cc) 
1723 
{ 
{ 
1724 

case OP_CRPLUS: 
1725 

case OP_CRMINPLUS: 
1726 
case OP_CRSTAR: 
case OP_CRSTAR: 
1727 
case OP_CRMINSTAR: 
case OP_CRMINSTAR: 
1728 
case OP_CRQUERY: 
case OP_CRQUERY: 
1743 


1744 
/* Anything else is variable length */ 
/* Anything else is variable length */ 
1745 


1746 
default: 
case OP_ANYNL: 
1747 

case OP_BRAMINZERO: 
1748 

case OP_BRAPOS: 
1749 

case OP_BRAPOSZERO: 
1750 

case OP_BRAZERO: 
1751 

case OP_CBRAPOS: 
1752 

case OP_EXTUNI: 
1753 

case OP_KETRMAX: 
1754 

case OP_KETRMIN: 
1755 

case OP_KETRPOS: 
1756 

case OP_MINPLUS: 
1757 

case OP_MINPLUSI: 
1758 

case OP_MINQUERY: 
1759 

case OP_MINQUERYI: 
1760 

case OP_MINSTAR: 
1761 

case OP_MINSTARI: 
1762 

case OP_MINUPTO: 
1763 

case OP_MINUPTOI: 
1764 

case OP_NOTMINPLUS: 
1765 

case OP_NOTMINPLUSI: 
1766 

case OP_NOTMINQUERY: 
1767 

case OP_NOTMINQUERYI: 
1768 

case OP_NOTMINSTAR: 
1769 

case OP_NOTMINSTARI: 
1770 

case OP_NOTMINUPTO: 
1771 

case OP_NOTMINUPTOI: 
1772 

case OP_NOTPLUS: 
1773 

case OP_NOTPLUSI: 
1774 

case OP_NOTPOSPLUS: 
1775 

case OP_NOTPOSPLUSI: 
1776 

case OP_NOTPOSQUERY: 
1777 

case OP_NOTPOSQUERYI: 
1778 

case OP_NOTPOSSTAR: 
1779 

case OP_NOTPOSSTARI: 
1780 

case OP_NOTPOSUPTO: 
1781 

case OP_NOTPOSUPTOI: 
1782 

case OP_NOTQUERY: 
1783 

case OP_NOTQUERYI: 
1784 

case OP_NOTSTAR: 
1785 

case OP_NOTSTARI: 
1786 

case OP_NOTUPTO: 
1787 

case OP_NOTUPTOI: 
1788 

case OP_PLUS: 
1789 

case OP_PLUSI: 
1790 

case OP_POSPLUS: 
1791 

case OP_POSPLUSI: 
1792 

case OP_POSQUERY: 
1793 

case OP_POSQUERYI: 
1794 

case OP_POSSTAR: 
1795 

case OP_POSSTARI: 
1796 

case OP_POSUPTO: 
1797 

case OP_POSUPTOI: 
1798 

case OP_QUERY: 
1799 

case OP_QUERYI: 
1800 

case OP_REF: 
1801 

case OP_REFI: 
1802 

case OP_SBRA: 
1803 

case OP_SBRAPOS: 
1804 

case OP_SCBRA: 
1805 

case OP_SCBRAPOS: 
1806 

case OP_SCOND: 
1807 

case OP_SKIPZERO: 
1808 

case OP_STAR: 
1809 

case OP_STARI: 
1810 

case OP_TYPEMINPLUS: 
1811 

case OP_TYPEMINQUERY: 
1812 

case OP_TYPEMINSTAR: 
1813 

case OP_TYPEMINUPTO: 
1814 

case OP_TYPEPLUS: 
1815 

case OP_TYPEPOSPLUS: 
1816 

case OP_TYPEPOSQUERY: 
1817 

case OP_TYPEPOSSTAR: 
1818 

case OP_TYPEPOSUPTO: 
1819 

case OP_TYPEQUERY: 
1820 

case OP_TYPESTAR: 
1821 

case OP_TYPEUPTO: 
1822 

case OP_UPTO: 
1823 

case OP_UPTOI: 
1824 
return 1; 
return 1; 
1825 


1826 

/* Catch unrecognized opcodes so that when new ones are added they 
1827 

are not forgotten, as has happened in the past. */ 
1828 


1829 

default: 
1830 

return 4; 
1831 
} 
} 
1832 
} 
} 
1833 
/* Control never gets here */ 
/* Control never gets here */ 
3352 
} 
} 
3353 


3354 
*lengthptr += (int)(code  last_code); 
*lengthptr += (int)(code  last_code); 
3355 
DPRINTF(("length=%d added %d c=%c\n", *lengthptr, code  last_code, c)); 
DPRINTF(("length=%d added %d c=%c\n", *lengthptr, (int)(code  last_code), 
3356 

c)); 
3357 


3358 
/* If "previous" is set and it is not at the start of the work space, move 
/* If "previous" is set and it is not at the start of the work space, move 
3359 
it back to there, in order to avoid filling up the work space. Otherwise, 
it back to there, in order to avoid filling up the work space. Otherwise, 
4425 
past, but it no longer happens for nonrepeated recursions. In fact, the 
past, but it no longer happens for nonrepeated recursions. In fact, the 
4426 
repeated ones could be reimplemented independently so as not to need this, 
repeated ones could be reimplemented independently so as not to need this, 
4427 
but for the moment we rely on the code for repeating groups. */ 
but for the moment we rely on the code for repeating groups. */ 
4428 


4429 
if (*previous == OP_RECURSE) 
if (*previous == OP_RECURSE) 
4430 
{ 
{ 
4431 
memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE); 
memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE); 
4983 
ONCE brackets can be converted into noncapturing brackets, as the 
ONCE brackets can be converted into noncapturing brackets, as the 
4984 
behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to 
behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to 
4985 
deal with possessive ONCEs specially. 
deal with possessive ONCEs specially. 
4986 


4987 
Otherwise, if the quantifier was possessive, we convert the BRA code to 
Otherwise, when we are doing the actual compile phase, check to see 
4988 
the POS form, and the KET code to KETRPOS. (It turns out to be convenient 
whether this group is one that could match an empty string. If so, 
4989 
at runtime to detect this kind of subpattern at both the start and at the 
convert the initial operator to the S form (e.g. OP_BRA > OP_SBRA) so 
4990 
end.) The use of special opcodes makes it possible to reduce greatly the 
that runtime checking can be done. [This check is also applied to ONCE 
4991 
stack usage in pcre_exec(). If the group is preceded by OP_BRAZERO, 
groups at runtime, but in a different way.] 
4992 
convert this to OP_BRAPOSZERO. Then cancel the possessive flag so that 

4993 
the default action below, of wrapping everything inside atomic brackets, 
Then, if the quantifier was possessive and the bracket is not a 
4994 
does not happen. 
conditional, we convert the BRA code to the POS form, and the KET code to 
4995 

KETRPOS. (It turns out to be convenient at runtime to detect this kind of 
4996 
Then, when we are doing the actual compile phase, check to see whether 
subpattern at both the start and at the end.) The use of special opcodes 
4997 
this group is one that could match an empty string. If so, convert the 
makes it possible to reduce greatly the stack usage in pcre_exec(). If 
4998 
initial operator to the S form (e.g. OP_BRA > OP_SBRA) so that runtime 
the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO. 
4999 
checking can be done. [This check is also applied to ONCE groups at 

5000 
runtime, but in a different way.] */ 
Then, if the minimum number of matches is 1 or 0, cancel the possessive 
5001 

flag so that the default action below, of wrapping everything inside 
5002 

atomic brackets, does not happen. When the minimum is greater than 1, 
5003 

there will be earlier copies of the group, and so we still have to wrap 
5004 

the whole thing. */ 
5005 


5006 
else 
else 
5007 
{ 
{ 
5008 
uschar *ketcode = code  1  LINK_SIZE; 
uschar *ketcode = code  1  LINK_SIZE; 
5009 
uschar *bracode = ketcode  GET(ketcode, 1); 
uschar *bracode = ketcode  GET(ketcode, 1); 
5010 


5011 

/* Convert possessive ONCE brackets to noncapturing */ 
5012 


5013 
if ((*bracode == OP_ONCE  *bracode == OP_ONCE_NC) && 
if ((*bracode == OP_ONCE  *bracode == OP_ONCE_NC) && 
5014 
possessive_quantifier) *bracode = OP_BRA; 
possessive_quantifier) *bracode = OP_BRA; 
5015 


5016 

/* For nonpossessive ONCE brackets, all we need to do is to 
5017 

set the KET. */ 
5018 


5019 
if (*bracode == OP_ONCE  *bracode == OP_ONCE_NC) 
if (*bracode == OP_ONCE  *bracode == OP_ONCE_NC) 
5020 
*ketcode = OP_KETRMAX + repeat_type; 
*ketcode = OP_KETRMAX + repeat_type; 
5021 


5022 

/* Handle nonONCE brackets and possessive ONCEs (which have been 
5023 

converted to noncapturing above). */ 
5024 


5025 
else 
else 
5026 
{ 
{ 
5027 
if (possessive_quantifier) 
/* In the compile phase, check for empty string matching. */ 
5028 
{ 


*bracode += 1; /* Switch to xxxPOS opcodes */ 


*ketcode = OP_KETRPOS; 


if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO; 


possessive_quantifier = FALSE; 


} 


else *ketcode = OP_KETRMAX + repeat_type; 




5029 
if (lengthptr == NULL) 
if (lengthptr == NULL) 
5030 
{ 
{ 
5031 
uschar *scode = bracode; 
uschar *scode = bracode; 
5040 
} 
} 
5041 
while (*scode == OP_ALT); 
while (*scode == OP_ALT); 
5042 
} 
} 
5043 


5044 

/* Handle possessive quantifiers. */ 
5045 


5046 

if (possessive_quantifier) 
5047 

{ 
5048 

/* For COND brackets, we wrap the whole thing in a possessively 
5049 

repeated noncapturing bracket, because we have not invented POS 
5050 

versions of the COND opcodes. Because we are moving code along, we 
5051 

must ensure that any pending recursive references are updated. */ 
5052 


5053 

if (*bracode == OP_COND  *bracode == OP_SCOND) 
5054 

{ 
5055 

int nlen = (int)(code  bracode); 
5056 

*code = OP_END; 
5057 

adjust_recurse(bracode, 1 + LINK_SIZE, utf8, cd, save_hwm); 
5058 

memmove(bracode + 1+LINK_SIZE, bracode, nlen); 
5059 

code += 1 + LINK_SIZE; 
5060 

nlen += 1 + LINK_SIZE; 
5061 

*bracode = OP_BRAPOS; 
5062 

*code++ = OP_KETRPOS; 
5063 

PUTINC(code, 0, nlen); 
5064 

PUT(bracode, 1, nlen); 
5065 

} 
5066 


5067 

/* For nonCOND brackets, we modify the BRA code and use KETRPOS. */ 
5068 


5069 

else 
5070 

{ 
5071 

*bracode += 1; /* Switch to xxxPOS opcodes */ 
5072 

*ketcode = OP_KETRPOS; 
5073 

} 
5074 


5075 

/* If the minimum is zero, mark it as possessive, then unset the 
5076 

possessive flag when the minimum is 0 or 1. */ 
5077 


5078 

if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO; 
5079 

if (repeat_min < 2) possessive_quantifier = FALSE; 
5080 

} 
5081 


5082 

/* Nonpossessive quantifier */ 
5083 


5084 

else *ketcode = OP_KETRMAX + repeat_type; 
5085 
} 
} 
5086 
} 
} 
5087 
} 
} 
5108 
notation is just syntactic sugar, taken from Sun's Java package, but the 
notation is just syntactic sugar, taken from Sun's Java package, but the 
5109 
special opcodes can optimize it. 
special opcodes can optimize it. 
5110 


5111 
Possessively repeated subpatterns have already been handled in the code 
Some (but not all) possessively repeated subpatterns have already been 
5112 
just above, so possessive_quantifier is always FALSE for them at this 
completely handled in the code just above. For them, possessive_quantifier 
5113 
stage. 
is always FALSE at this stage. 
5114 


5115 
Note that the repeated item starts at tempcode, not at previous, which 
Note that the repeated item starts at tempcode, not at previous, which 
5116 
might be the first part of a string whose (former) last char we repeated. 
might be the first part of a string whose (former) last char we repeated. 
6772 
} 
} 
6773 
else if (fixed_length < 0) 
else if (fixed_length < 0) 
6774 
{ 
{ 
6775 
*errorcodeptr = (fixed_length == 2)? ERR36 : ERR25; 
*errorcodeptr = (fixed_length == 2)? ERR36 : 
6776 

(fixed_length == 4)? ERR70: ERR25; 
6777 
*ptrptr = ptr; 
*ptrptr = ptr; 
6778 
return FALSE; 
return FALSE; 
6779 
} 
} 
7572 
DPRINTF(("fixed length = %d\n", fixed_length)); 
DPRINTF(("fixed length = %d\n", fixed_length)); 
7573 
if (fixed_length < 0) 
if (fixed_length < 0) 
7574 
{ 
{ 
7575 
errorcode = (fixed_length == 2)? ERR36 : ERR25; 
errorcode = (fixed_length == 2)? ERR36 : 
7576 

(fixed_length == 4)? ERR70 : ERR25; 
7577 
break; 
break; 
7578 
} 
} 
7579 
PUT(cc, 1, fixed_length); 
PUT(cc, 1, fixed_length); 