/[pcre]/code/branches/pcre16/pcre_compile.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 747 by ph10, Tue Nov 15 17:35:10 2011 UTC revision 749 by ph10, Fri Nov 18 10:36:45 2011 UTC
# Line 4424  for (;; ptr++) Line 4424  for (;; ptr++)
4424      past, but it no longer happens for non-repeated recursions. In fact, the      past, but it no longer happens for non-repeated recursions. In fact, the
4425      repeated ones could be re-implemented independently so as not to need this,      repeated ones could be re-implemented independently so as not to need this,
4426      but for the moment we rely on the code for repeating groups. */      but for the moment we rely on the code for repeating groups. */
4427    
4428      if (*previous == OP_RECURSE)      if (*previous == OP_RECURSE)
4429        {        {
4430        memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE);        memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE);
# Line 4982  for (;; ptr++) Line 4982  for (;; ptr++)
4982        ONCE brackets can be converted into non-capturing brackets, as the        ONCE brackets can be converted into non-capturing brackets, as the
4983        behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to        behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to
4984        deal with possessive ONCEs specially.        deal with possessive ONCEs specially.
4985    
4986        Otherwise, if the quantifier was possessive, we convert the BRA code to        Otherwise, when we are doing the actual compile phase, check to see
4987        the POS form, and the KET code to KETRPOS. (It turns out to be convenient        whether this group is one that could match an empty string. If so,
4988        at runtime to detect this kind of subpattern at both the start and at the        convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so
4989        end.) The use of special opcodes makes it possible to reduce greatly the        that runtime checking can be done. [This check is also applied to ONCE
4990        stack usage in pcre_exec(). If the group is preceded by OP_BRAZERO,        groups at runtime, but in a different way.]
4991        convert this to OP_BRAPOSZERO. Then cancel the possessive flag so that  
4992        the default action below, of wrapping everything inside atomic brackets,        Then, if the quantifier was possessive and the bracket is not a
4993        does not happen.        conditional, we convert the BRA code to the POS form, and the KET code to
4994          KETRPOS. (It turns out to be convenient at runtime to detect this kind of
4995        Then, when we are doing the actual compile phase, check to see whether        subpattern at both the start and at the end.) The use of special opcodes
4996        this group is one that could match an empty string. If so, convert the        makes it possible to reduce greatly the stack usage in pcre_exec(). If
4997        initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so that runtime        the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO. Then
4998        checking can be done. [This check is also applied to ONCE groups at        cancel the possessive flag so that the default action below, of wrapping
4999        runtime, but in a different way.] */        everything inside atomic brackets, does not happen. */
5000    
5001        else        else
5002          {          {
5003          uschar *ketcode = code - 1 - LINK_SIZE;          uschar *ketcode = code - 1 - LINK_SIZE;
5004          uschar *bracode = ketcode - GET(ketcode, 1);          uschar *bracode = ketcode - GET(ketcode, 1);
5005    
5006            /* Convert possessive ONCE brackets to non-capturing */
5007    
5008          if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) &&          if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) &&
5009              possessive_quantifier) *bracode = OP_BRA;              possessive_quantifier) *bracode = OP_BRA;
5010    
5011            /* For non-possessive ONCE brackets, all we need to do is to
5012            set the KET. */
5013    
5014          if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC)          if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC)
5015            *ketcode = OP_KETRMAX + repeat_type;            *ketcode = OP_KETRMAX + repeat_type;
5016    
5017            /* Handle non-ONCE brackets and possessive ONCEs (which have been
5018            converted to non-capturing above). */
5019    
5020          else          else
5021            {            {
5022            if (possessive_quantifier)            /* In the compile phase, check for empty string matching. */
5023              {  
             *bracode += 1;                   /* Switch to xxxPOS opcodes */  
             *ketcode = OP_KETRPOS;  
             if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO;  
             possessive_quantifier = FALSE;  
             }  
           else *ketcode = OP_KETRMAX + repeat_type;  
   
5024            if (lengthptr == NULL)            if (lengthptr == NULL)
5025              {              {
5026              uschar *scode = bracode;              uschar *scode = bracode;
# Line 5033  for (;; ptr++) Line 5035  for (;; ptr++)
5035                }                }
5036              while (*scode == OP_ALT);              while (*scode == OP_ALT);
5037              }              }
5038    
5039              /* Handle possessive quantifiers. */
5040    
5041              if (possessive_quantifier)
5042                {
5043                /* For COND brackets, we wrap the whole thing in a possessively
5044                repeated non-capturing bracket, because we have not invented POS
5045                versions of the COND opcodes. Because we are moving code along, we
5046                must ensure that any pending recursive references are updated. */
5047    
5048                if (*bracode == OP_COND || *bracode == OP_SCOND)
5049                  {
5050                  int nlen = (int)(code - bracode);
5051                  *code = OP_END;
5052                  adjust_recurse(bracode, 1 + LINK_SIZE, utf8, cd, save_hwm);
5053                  memmove(bracode + 1+LINK_SIZE, bracode, nlen);
5054                  code += 1 + LINK_SIZE;
5055                  nlen += 1 + LINK_SIZE;
5056                  *bracode = OP_BRAPOS;
5057                  *code++ = OP_KETRPOS;
5058                  PUTINC(code, 0, nlen);
5059                  PUT(bracode, 1, nlen);
5060                  }
5061    
5062                /* For non-COND brackets, we modify the BRA code and use KETRPOS. */
5063    
5064                else
5065                  {
5066                  *bracode += 1;              /* Switch to xxxPOS opcodes */
5067                  *ketcode = OP_KETRPOS;
5068                  }
5069    
5070                /* If the minimum is zero, mark it as possessive, then unset the
5071                possessive flag. */
5072    
5073                if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO;
5074                possessive_quantifier = FALSE;
5075                }
5076    
5077              /* Non-possessive quantifier */
5078    
5079              else *ketcode = OP_KETRMAX + repeat_type;
5080            }            }
5081          }          }
5082        }        }

Legend:
Removed from v.747  
changed lines
  Added in v.749

  ViewVC Help
Powered by ViewVC 1.1.5