/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 621 by ph10, Mon Jul 18 10:14:09 2011 UTC revision 638 by ph10, Mon Jul 25 09:41:19 2011 UTC
# Line 393  static const char error_texts[] = Line 393  static const char error_texts[] =
393    "internal error: previously-checked referenced subpattern not found\0"    "internal error: previously-checked referenced subpattern not found\0"
394    "DEFINE group contains more than one branch\0"    "DEFINE group contains more than one branch\0"
395    /* 55 */    /* 55 */
396    "repeating a DEFINE group is not allowed\0"    "repeating a DEFINE group is not allowed\0"  /** DEAD **/
397    "inconsistent NEWLINE options\0"    "inconsistent NEWLINE options\0"
398    "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"    "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
399    "a numbered reference must not be zero\0"    "a numbered reference must not be zero\0"
# Line 409  static const char error_texts[] = Line 409  static const char error_texts[] =
409    "(*MARK) must have an argument\0"    "(*MARK) must have an argument\0"
410    "this version of PCRE is not compiled with PCRE_UCP support\0"    "this version of PCRE is not compiled with PCRE_UCP support\0"
411    "\\c must be followed by an ASCII character\0"    "\\c must be followed by an ASCII character\0"
412      "\\k is not followed by a braced, angle-bracketed, or quoted name\0"
413    ;    ;
414    
415  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 1975  for (code = first_significant_code(code Line 1976  for (code = first_significant_code(code
1976      }      }
1977    
1978    /* For a recursion/subroutine call, if its end has been reached, which    /* For a recursion/subroutine call, if its end has been reached, which
1979    implies a subroutine call, we can scan it. */    implies a backward reference subroutine call, we can scan it. If it's a
1980      forward reference subroutine call, we can't. To detect forward reference
1981      we have to scan up the list that is kept in the workspace. This function is
1982      called only when doing the real compile, not during the pre-compile that
1983      measures the size of the compiled pattern. */
1984    
1985    if (c == OP_RECURSE)    if (c == OP_RECURSE)
1986      {      {
1987      BOOL empty_branch = FALSE;      const uschar *scode;
1988      const uschar *scode = cd->start_code + GET(code, 1);      BOOL empty_branch;
1989    
1990        /* Test for forward reference */
1991    
1992        for (scode = cd->start_workspace; scode < cd->hwm; scode += LINK_SIZE)
1993          if (GET(scode, 0) == code + 1 - cd->start_code) return TRUE;
1994    
1995        /* Not a forward reference, test for completed backward reference */
1996    
1997        empty_branch = FALSE;
1998        scode = cd->start_code + GET(code, 1);
1999      if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */      if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */
2000    
2001        /* Completed backwards reference */
2002    
2003      do      do
2004        {        {
2005        if (could_be_empty_branch(scode, endcode, utf8, cd))        if (could_be_empty_branch(scode, endcode, utf8, cd))
# Line 1992  for (code = first_significant_code(code Line 2010  for (code = first_significant_code(code
2010        scode += GET(scode, 1);        scode += GET(scode, 1);
2011        }        }
2012      while (*scode == OP_ALT);      while (*scode == OP_ALT);
2013    
2014      if (!empty_branch) return FALSE;  /* All branches are non-empty */      if (!empty_branch) return FALSE;  /* All branches are non-empty */
2015      continue;      continue;
2016      }      }
# Line 2217  return TRUE; Line 2236  return TRUE;
2236  the current branch of the current pattern to see if it could match the empty  the current branch of the current pattern to see if it could match the empty
2237  string. If it could, we must look outwards for branches at other levels,  string. If it could, we must look outwards for branches at other levels,
2238  stopping when we pass beyond the bracket which is the subject of the recursion.  stopping when we pass beyond the bracket which is the subject of the recursion.
2239    This function is called only during the real compile, not during the
2240    pre-compile.
2241    
2242  Arguments:  Arguments:
2243    code        points to start of the recursion    code        points to start of the recursion
# Line 3018  int greedy_default, greedy_non_default; Line 3039  int greedy_default, greedy_non_default;
3039  int firstbyte, reqbyte;  int firstbyte, reqbyte;
3040  int zeroreqbyte, zerofirstbyte;  int zeroreqbyte, zerofirstbyte;
3041  int req_caseopt, reqvary, tempreqvary;  int req_caseopt, reqvary, tempreqvary;
3042  int options = *optionsptr;  int options = *optionsptr;               /* May change dynamically */
3043  int after_manual_callout = 0;  int after_manual_callout = 0;
3044  int length_prevgroup = 0;  int length_prevgroup = 0;
3045  register int c;  register int c;
# Line 3036  uschar *previous_callout = NULL; Line 3057  uschar *previous_callout = NULL;
3057  uschar *save_hwm = NULL;  uschar *save_hwm = NULL;
3058  uschar classbits[32];  uschar classbits[32];
3059    
3060    /* We can fish out the UTF-8 setting once and for all into a BOOL, but we
3061    must not do this for other options (e.g. PCRE_EXTENDED) because they may change
3062    dynamically as we process the pattern. */
3063    
3064  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3065  BOOL class_utf8;  BOOL class_utf8;
3066  BOOL utf8 = (options & PCRE_UTF8) != 0;  BOOL utf8 = (options & PCRE_UTF8) != 0;
# Line 3216  for (;; ptr++) Line 3241  for (;; ptr++)
3241      previous_callout = NULL;      previous_callout = NULL;
3242      }      }
3243    
3244    /* In extended mode, skip white space and comments */    /* In extended mode, skip white space and comments. */
3245    
3246    if ((options & PCRE_EXTENDED) != 0)    if ((options & PCRE_EXTENDED) != 0)
3247      {      {
# Line 4185  for (;; ptr++) Line 4210  for (;; ptr++)
4210      op_type = 0;                    /* Default single-char op codes */      op_type = 0;                    /* Default single-char op codes */
4211      possessive_quantifier = FALSE;  /* Default not possessive quantifier */      possessive_quantifier = FALSE;  /* Default not possessive quantifier */
4212    
4213      /* Save start of previous item, in case we have to move it up to make space      /* Save start of previous item, in case we have to move it up in order to
4214      for an inserted OP_ONCE for the additional '+' extension. */      insert something before it. */
4215    
4216      tempcode = previous;      tempcode = previous;
4217    
# Line 4529  for (;; ptr++) Line 4554  for (;; ptr++)
4554        }        }
4555    
4556      /* If previous was a bracket group, we may have to replicate it in certain      /* If previous was a bracket group, we may have to replicate it in certain
4557      cases. Note that at this point we can encounter only the "basic" BRA and      cases. Note that at this point we can encounter only the "basic" bracket
4558      KET opcodes, as this is the place where they get converted into the more      opcodes such as BRA and CBRA, as this is the place where they get converted
4559      special varieties. */      into the more special varieties such as BRAPOS and SBRA. A test for >=
4560        OP_ASSERT and <= OP_COND includes ASSERT, ASSERT_NOT, ASSERTBACK,
4561        ASSERTBACK_NOT, ONCE, BRA, CBRA, and COND. Originally, PCRE did not allow
4562        repetition of assertions, but now it does, for Perl compatibility. */
4563    
4564      else if (*previous == OP_BRA  || *previous == OP_CBRA ||      else if (*previous >= OP_ASSERT && *previous <= OP_COND)
              *previous == OP_ONCE || *previous == OP_COND)  
4565        {        {
4566        register int i;        register int i;
4567        int len = (int)(code - previous);        int len = (int)(code - previous);
4568        uschar *bralink = NULL;        uschar *bralink = NULL;
4569        uschar *brazeroptr = NULL;        uschar *brazeroptr = NULL;
4570    
4571        /* Repeating a DEFINE group is pointless */        /* Repeating a DEFINE group is pointless, but Perl allows the syntax, so
4572          we just ignore the repeat. */
4573    
4574        if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_DEF)        if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_DEF)
4575          {          goto END_REPEAT;
4576          *errorcodeptr = ERR55;  
4577          goto FAILED;        /* There is no sense in actually repeating assertions. The only potential
4578          }        use of repetition is in cases when the assertion is optional. Therefore,
4579          if the minimum is greater than zero, just ignore the repeat. If the
4580          maximum is not not zero or one, set it to 1. */
4581    
4582          if (*previous < OP_ONCE)    /* Assertion */
4583            {
4584            if (repeat_min > 0) goto END_REPEAT;
4585            if (repeat_max < 0 || repeat_max > 1) repeat_max = 1;
4586            }
4587    
4588        /* The case of a zero minimum is special because of the need to stick        /* The case of a zero minimum is special because of the need to stick
4589        OP_BRAZERO in front of it, and because the group appears once in the        OP_BRAZERO in front of it, and because the group appears once in the
# Line 4567  for (;; ptr++) Line 4603  for (;; ptr++)
4603          **   goto END_REPEAT;          **   goto END_REPEAT;
4604          **   }          **   }
4605    
4606          However, that fails when a group is referenced as a subroutine from          However, that fails when a group or a subgroup within it is referenced
4607          elsewhere in the pattern, so now we stick in OP_SKIPZERO in front of it          as a subroutine from elsewhere in the pattern, so now we stick in
4608          so that it is skipped on execution. As we don't have a list of which          OP_SKIPZERO in front of it so that it is skipped on execution. As we
4609          groups are referenced, we cannot do this selectively.          don't have a list of which groups are referenced, we cannot do this
4610            selectively.
4611    
4612          If the maximum is 1 or unlimited, we just have to stick in the BRAZERO          If the maximum is 1 or unlimited, we just have to stick in the BRAZERO
4613          and do no more at this point. However, we do need to adjust any          and do no more at this point. However, we do need to adjust any
# Line 5551  for (;; ptr++) Line 5588  for (;; ptr++)
5588    
5589            temp = cd->end_pattern;            temp = cd->end_pattern;
5590            cd->end_pattern = ptr;            cd->end_pattern = ptr;
5591            recno = find_parens(cd, name, namelen,            recno = find_parens(cd, name, namelen,
5592              (options & PCRE_EXTENDED) != 0, utf8);              (options & PCRE_EXTENDED) != 0, utf8);
5593            cd->end_pattern = temp;            cd->end_pattern = temp;
5594            if (recno < 0) recno = 0;    /* Forward ref; set dummy number */            if (recno < 0) recno = 0;    /* Forward ref; set dummy number */
# Line 5835  for (;; ptr++) Line 5872  for (;; ptr++)
5872        skipbytes = 2;        skipbytes = 2;
5873        }        }
5874    
5875      /* Process nested bracketed regex. Assertions may not be repeated, but      /* Process nested bracketed regex. Assertions used not to be repeatable,
5876      other kinds can be. All their opcodes are >= OP_ONCE. We copy code into a      but this was changed for Perl compatibility, so all kinds can now be
5877      non-register variable (tempcode) in order to be able to pass its address      repeated. We copy code into a non-register variable (tempcode) in order to
5878      because some compilers complain otherwise. */      be able to pass its address because some compilers complain otherwise. */
5879    
5880      previous = (bravalue >= OP_ONCE)? code : NULL;      previous = code;                   /* For handling repetition */
5881      *code = bravalue;      *code = bravalue;
5882      tempcode = code;      tempcode = code;
5883      tempreqvary = cd->req_varyopt;     /* Save value before bracket */      tempreqvary = cd->req_varyopt;     /* Save value before bracket */
# Line 6105  for (;; ptr++) Line 6142  for (;; ptr++)
6142          }          }
6143    
6144        /* \k<name> or \k'name' is a back reference by name (Perl syntax).        /* \k<name> or \k'name' is a back reference by name (Perl syntax).
6145        We also support \k{name} (.NET syntax) */        We also support \k{name} (.NET syntax).  */
6146    
6147        if (-c == ESC_k && (ptr[1] == CHAR_LESS_THAN_SIGN ||        if (-c == ESC_k)
           ptr[1] == CHAR_APOSTROPHE || ptr[1] == CHAR_LEFT_CURLY_BRACKET))  
6148          {          {
6149            if ((ptr[1] != CHAR_LESS_THAN_SIGN &&
6150              ptr[1] != CHAR_APOSTROPHE && ptr[1] != CHAR_LEFT_CURLY_BRACKET))
6151              {
6152              *errorcodeptr = ERR69;
6153              break;
6154              }
6155          is_recurse = FALSE;          is_recurse = FALSE;
6156          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
6157            CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)?            CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)?
6158            CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET;            CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET;
6159          goto NAMED_REF_OR_RECURSE;          goto NAMED_REF_OR_RECURSE;
6160          }          }
6161    
6162        /* Back references are handled specially; must disable firstbyte if        /* Back references are handled specially; must disable firstbyte if
6163        not set to cope with cases like (?=(\w+))\1: which would otherwise set        not set to cope with cases like (?=(\w+))\1: which would otherwise set
# Line 7042  if ((options & PCRE_UCP) != 0) Line 7084  if ((options & PCRE_UCP) != 0)
7084    
7085  /* Check validity of \R options. */  /* Check validity of \R options. */
7086    
7087  switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))  if ((options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) ==
7088         (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
7089    {    {
7090    case 0:    errorcode = ERR56;
7091    case PCRE_BSR_ANYCRLF:    goto PCRE_EARLY_ERROR_RETURN;
   case PCRE_BSR_UNICODE:  
   break;  
   default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;  
7092    }    }
7093    
7094  /* Handle different types of newline. The three bits give seven cases. The  /* Handle different types of newline. The three bits give seven cases. The

Legend:
Removed from v.621  
changed lines
  Added in v.638

  ViewVC Help
Powered by ViewVC 1.1.5