/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 747 by ph10, Tue Nov 15 17:35:10 2011 UTC revision 750 by ph10, Fri Nov 18 11:07:14 2011 UTC
# Line 4424  for (;; ptr++) Line 4424  for (;; ptr++)
4424      past, but it no longer happens for non-repeated recursions. In fact, the      past, but it no longer happens for non-repeated recursions. In fact, the
4425      repeated ones could be re-implemented independently so as not to need this,      repeated ones could be re-implemented independently so as not to need this,
4426      but for the moment we rely on the code for repeating groups. */      but for the moment we rely on the code for repeating groups. */
4427    
4428      if (*previous == OP_RECURSE)      if (*previous == OP_RECURSE)
4429        {        {
4430        memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE);        memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE);
# Line 4982  for (;; ptr++) Line 4982  for (;; ptr++)
4982        ONCE brackets can be converted into non-capturing brackets, as the        ONCE brackets can be converted into non-capturing brackets, as the
4983        behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to        behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to
4984        deal with possessive ONCEs specially.        deal with possessive ONCEs specially.
4985    
4986        Otherwise, if the quantifier was possessive, we convert the BRA code to        Otherwise, when we are doing the actual compile phase, check to see
4987        the POS form, and the KET code to KETRPOS. (It turns out to be convenient        whether this group is one that could match an empty string. If so,
4988        at runtime to detect this kind of subpattern at both the start and at the        convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so
4989        end.) The use of special opcodes makes it possible to reduce greatly the        that runtime checking can be done. [This check is also applied to ONCE
4990        stack usage in pcre_exec(). If the group is preceded by OP_BRAZERO,        groups at runtime, but in a different way.]
4991        convert this to OP_BRAPOSZERO. Then cancel the possessive flag so that  
4992        the default action below, of wrapping everything inside atomic brackets,        Then, if the quantifier was possessive and the bracket is not a
4993        does not happen.        conditional, we convert the BRA code to the POS form, and the KET code to
4994          KETRPOS. (It turns out to be convenient at runtime to detect this kind of
4995        Then, when we are doing the actual compile phase, check to see whether        subpattern at both the start and at the end.) The use of special opcodes
4996        this group is one that could match an empty string. If so, convert the        makes it possible to reduce greatly the stack usage in pcre_exec(). If
4997        initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so that runtime        the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO.
4998        checking can be done. [This check is also applied to ONCE groups at  
4999        runtime, but in a different way.] */        Then, if the minimum number of matches is 1 or 0, cancel the possessive
5000          flag so that the default action below, of wrapping everything inside
5001          atomic brackets, does not happen. When the minimum is greater than 1,
5002          there will be earlier copies of the group, and so we still have to wrap
5003          the whole thing. */
5004    
5005        else        else
5006          {          {
5007          uschar *ketcode = code - 1 - LINK_SIZE;          uschar *ketcode = code - 1 - LINK_SIZE;
5008          uschar *bracode = ketcode - GET(ketcode, 1);          uschar *bracode = ketcode - GET(ketcode, 1);
5009    
5010            /* Convert possessive ONCE brackets to non-capturing */
5011    
5012          if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) &&          if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) &&
5013              possessive_quantifier) *bracode = OP_BRA;              possessive_quantifier) *bracode = OP_BRA;
5014    
5015            /* For non-possessive ONCE brackets, all we need to do is to
5016            set the KET. */
5017    
5018          if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC)          if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC)
5019            *ketcode = OP_KETRMAX + repeat_type;            *ketcode = OP_KETRMAX + repeat_type;
5020    
5021            /* Handle non-ONCE brackets and possessive ONCEs (which have been
5022            converted to non-capturing above). */
5023    
5024          else          else
5025            {            {
5026            if (possessive_quantifier)            /* In the compile phase, check for empty string matching. */
5027              {  
             *bracode += 1;                   /* Switch to xxxPOS opcodes */  
             *ketcode = OP_KETRPOS;  
             if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO;  
             possessive_quantifier = FALSE;  
             }  
           else *ketcode = OP_KETRMAX + repeat_type;  
   
5028            if (lengthptr == NULL)            if (lengthptr == NULL)
5029              {              {
5030              uschar *scode = bracode;              uschar *scode = bracode;
# Line 5033  for (;; ptr++) Line 5039  for (;; ptr++)
5039                }                }
5040              while (*scode == OP_ALT);              while (*scode == OP_ALT);
5041              }              }
5042    
5043              /* Handle possessive quantifiers. */
5044    
5045              if (possessive_quantifier)
5046                {
5047                /* For COND brackets, we wrap the whole thing in a possessively
5048                repeated non-capturing bracket, because we have not invented POS
5049                versions of the COND opcodes. Because we are moving code along, we
5050                must ensure that any pending recursive references are updated. */
5051    
5052                if (*bracode == OP_COND || *bracode == OP_SCOND)
5053                  {
5054                  int nlen = (int)(code - bracode);
5055                  *code = OP_END;
5056                  adjust_recurse(bracode, 1 + LINK_SIZE, utf8, cd, save_hwm);
5057                  memmove(bracode + 1+LINK_SIZE, bracode, nlen);
5058                  code += 1 + LINK_SIZE;
5059                  nlen += 1 + LINK_SIZE;
5060                  *bracode = OP_BRAPOS;
5061                  *code++ = OP_KETRPOS;
5062                  PUTINC(code, 0, nlen);
5063                  PUT(bracode, 1, nlen);
5064                  }
5065    
5066                /* For non-COND brackets, we modify the BRA code and use KETRPOS. */
5067    
5068                else
5069                  {
5070                  *bracode += 1;              /* Switch to xxxPOS opcodes */
5071                  *ketcode = OP_KETRPOS;
5072                  }
5073    
5074                /* If the minimum is zero, mark it as possessive, then unset the
5075                possessive flag when the minimum is 0 or 1. */
5076    
5077                if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO;
5078                if (repeat_min < 2) possessive_quantifier = FALSE;
5079                }
5080    
5081              /* Non-possessive quantifier */
5082    
5083              else *ketcode = OP_KETRMAX + repeat_type;
5084            }            }
5085          }          }
5086        }        }
# Line 5059  for (;; ptr++) Line 5107  for (;; ptr++)
5107      notation is just syntactic sugar, taken from Sun's Java package, but the      notation is just syntactic sugar, taken from Sun's Java package, but the
5108      special opcodes can optimize it.      special opcodes can optimize it.
5109    
5110      Possessively repeated subpatterns have already been handled in the code      Some (but not all) possessively repeated subpatterns have already been
5111      just above, so possessive_quantifier is always FALSE for them at this      completely handled in the code just above. For them, possessive_quantifier
5112      stage.      is always FALSE at this stage.
5113    
5114      Note that the repeated item starts at tempcode, not at previous, which      Note that the repeated item starts at tempcode, not at previous, which
5115      might be the first part of a string whose (former) last char we repeated.      might be the first part of a string whose (former) last char we repeated.

Legend:
Removed from v.747  
changed lines
  Added in v.750

  ViewVC Help
Powered by ViewVC 1.1.5