/[pcre]/code/branches/pcre16/pcre_compile.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 333 by ph10, Thu Apr 10 19:55:57 2008 UTC revision 336 by ph10, Sat Apr 12 15:59:03 2008 UTC
# Line 302  static const char error_texts[] = Line 302  static const char error_texts[] =
302    "(*VERB) not recognized\0"    "(*VERB) not recognized\0"
303    "number is too big\0"    "number is too big\0"
304    "subpattern name expected\0"    "subpattern name expected\0"
305    "digit expected after (?+";    "digit expected after (?+\0"
306      "] is an invalid data character in JavaScript compatibility mode";
307    
308    
309  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 1567  for (code = first_significant_code(code Line 1568  for (code = first_significant_code(code
1568    
1569    /* Groups with zero repeats can of course be empty; skip them. */    /* Groups with zero repeats can of course be empty; skip them. */
1570    
1571    if (c == OP_BRAZERO || c == OP_BRAMINZERO)    if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO)
1572      {      {
1573      code += _pcre_OP_lengths[c];      code += _pcre_OP_lengths[c];
1574      do code += GET(code, 1); while (*code == OP_ALT);      do code += GET(code, 1); while (*code == OP_ALT);
# Line 1847  return -1; Line 1848  return -1;
1848  that is referenced. This means that groups can be replicated for fixed  that is referenced. This means that groups can be replicated for fixed
1849  repetition simply by copying (because the recursion is allowed to refer to  repetition simply by copying (because the recursion is allowed to refer to
1850  earlier groups that are outside the current group). However, when a group is  earlier groups that are outside the current group). However, when a group is
1851  optional (i.e. the minimum quantifier is zero), OP_BRAZERO is inserted before  optional (i.e. the minimum quantifier is zero), OP_BRAZERO or OP_SKIPZERO is
1852  it, after it has been compiled. This means that any OP_RECURSE items within it  inserted before it, after it has been compiled. This means that any OP_RECURSE
1853  that refer to the group itself or any contained groups have to have their  items within it that refer to the group itself or any contained groups have to
1854  offsets adjusted. That one of the jobs of this function. Before it is called,  have their offsets adjusted. That one of the jobs of this function. Before it
1855  the partially compiled regex must be temporarily terminated with OP_END.  is called, the partially compiled regex must be temporarily terminated with
1856    OP_END.
1857    
1858  This function has been extended with the possibility of forward references for  This function has been extended with the possibility of forward references for
1859  recursions and subroutine calls. It must also check the list of such references  recursions and subroutine calls. It must also check the list of such references
# Line 1882  while ((ptr = (uschar *)find_recurse(ptr Line 1884  while ((ptr = (uschar *)find_recurse(ptr
1884    
1885    /* See if this recursion is on the forward reference list. If so, adjust the    /* See if this recursion is on the forward reference list. If so, adjust the
1886    reference. */    reference. */
1887    
1888    for (hc = save_hwm; hc < cd->hwm; hc += LINK_SIZE)    for (hc = save_hwm; hc < cd->hwm; hc += LINK_SIZE)
1889      {      {
1890      offset = GET(hc, 0);      offset = GET(hc, 0);
# Line 2458  for (;; ptr++) Line 2460  for (;; ptr++)
2460    /* Get next byte in the pattern */    /* Get next byte in the pattern */
2461    
2462    c = *ptr;    c = *ptr;
2463    
2464    /* If we are in the pre-compile phase, accumulate the length used for the    /* If we are in the pre-compile phase, accumulate the length used for the
2465    previous cycle of this loop. */    previous cycle of this loop. */
2466    
# Line 2653  for (;; ptr++) Line 2655  for (;; ptr++)
2655      opcode is compiled. It may optionally have a bit map for characters < 256,      opcode is compiled. It may optionally have a bit map for characters < 256,
2656      but those above are are explicitly listed afterwards. A flag byte tells      but those above are are explicitly listed afterwards. A flag byte tells
2657      whether the bitmap is present, and whether this is a negated class or not.      whether the bitmap is present, and whether this is a negated class or not.
2658      */  
2659        In JavaScript compatibility mode, an isolated ']' causes an error. In
2660        default (Perl) mode, it is treated as a data character. */
2661    
2662        case ']':
2663        if ((cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
2664          {
2665          *errorcodeptr = ERR64;
2666          goto FAILED;
2667          }
2668        goto NORMAL_CHAR;
2669    
2670      case '[':      case '[':
2671      previous = code;      previous = code;
# Line 3842  we set the flag only if there is a liter Line 3854  we set the flag only if there is a liter
3854    
3855        if (repeat_min == 0)        if (repeat_min == 0)
3856          {          {
3857          /* If the maximum is also zero, we just omit the group from the output          /* If the maximum is also zero, we used to just omit the group from the
3858          altogether. */          output altogether, like this:
   
         if (repeat_max == 0)  
           {  
           code = previous;  
           goto END_REPEAT;  
           }  
3859    
3860          /* If the maximum is 1 or unlimited, we just have to stick in the          ** if (repeat_max == 0)
3861          BRAZERO and do no more at this point. However, we do need to adjust          **   {
3862          any OP_RECURSE calls inside the group that refer to the group itself or          **   code = previous;
3863          any internal or forward referenced group, because the offset is from          **   goto END_REPEAT;
3864          the start of the whole regex. Temporarily terminate the pattern while          **   }
3865          doing this. */  
3866            However, that fails when a group is referenced as a subroutine from
3867            elsewhere in the pattern, so now we stick in OP_SKIPZERO in front of it
3868            so that it is skipped on execution. As we don't have a list of which
3869            groups are referenced, we cannot do this selectively.
3870    
3871            If the maximum is 1 or unlimited, we just have to stick in the BRAZERO
3872            and do no more at this point. However, we do need to adjust any
3873            OP_RECURSE calls inside the group that refer to the group itself or any
3874            internal or forward referenced group, because the offset is from the
3875            start of the whole regex. Temporarily terminate the pattern while doing
3876            this. */
3877    
3878          if (repeat_max <= 1)          if (repeat_max <= 1)    /* Covers 0, 1, and unlimited */
3879            {            {
3880            *code = OP_END;            *code = OP_END;
3881            adjust_recurse(previous, 1, utf8, cd, save_hwm);            adjust_recurse(previous, 1, utf8, cd, save_hwm);
3882            memmove(previous+1, previous, len);            memmove(previous+1, previous, len);
3883            code++;            code++;
3884              if (repeat_max == 0)
3885                {
3886                *previous++ = OP_SKIPZERO;
3887                goto END_REPEAT;
3888                }
3889            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
3890            }            }
3891    
# Line 4138  we set the flag only if there is a liter Line 4160  we set the flag only if there is a liter
4160      bravalue = OP_CBRA;      bravalue = OP_CBRA;
4161      save_hwm = cd->hwm;      save_hwm = cd->hwm;
4162      reset_bracount = FALSE;      reset_bracount = FALSE;
4163    
4164      /* First deal with various "verbs" that can be introduced by '*'. */      /* First deal with various "verbs" that can be introduced by '*'. */
4165    
4166      if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0)      if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0)
# Line 5127  we set the flag only if there is a liter Line 5149  we set the flag only if there is a liter
5149        -ESC_g is returned only for these cases. So we don't need to check for <        -ESC_g is returned only for these cases. So we don't need to check for <
5150        or ' if the value is -ESC_g. For the Perl syntax \g{n} the value is        or ' if the value is -ESC_g. For the Perl syntax \g{n} the value is
5151        -ESC_REF+n, and for the Perl syntax \g{name} the result is -ESC_k (as        -ESC_REF+n, and for the Perl syntax \g{name} the result is -ESC_k (as
5152        that is a synonym). */        that is a synonym for a named back reference). */
5153    
5154        if (-c == ESC_g)        if (-c == ESC_g)
5155          {          {
5156          const uschar *p;          const uschar *p;
5157            save_hwm = cd->hwm;   /* Normally this is set when '(' is read */
5158          terminator = (*(++ptr) == '<')? '>' : '\'';          terminator = (*(++ptr) == '<')? '>' : '\'';
5159    
5160          /* These two statements stop the compiler for warning about possibly          /* These two statements stop the compiler for warning about possibly
# Line 6197  while (errorcode == 0 && cd->hwm > cwork Line 6220  while (errorcode == 0 && cd->hwm > cwork
6220    if (groupptr == NULL) errorcode = ERR53;    if (groupptr == NULL) errorcode = ERR53;
6221      else PUT(((uschar *)codestart), offset, groupptr - codestart);      else PUT(((uschar *)codestart), offset, groupptr - codestart);
6222    }    }
6223    
6224  /* Give an error if there's back reference to a non-existent capturing  /* Give an error if there's back reference to a non-existent capturing
6225  subpattern. */  subpattern. */
6226    

Legend:
Removed from v.333  
changed lines
  Added in v.336

  ViewVC Help
Powered by ViewVC 1.1.5