/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 637 by ph10, Sun Jul 24 17:44:12 2011 UTC revision 642 by ph10, Thu Jul 28 18:59:40 2011 UTC
# Line 546  static const unsigned char ebcdic_charta Line 546  static const unsigned char ebcdic_charta
546  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
547    
548  static BOOL  static BOOL
549    compile_regex(int, uschar **, const uschar **, int *, BOOL, BOOL, int, int *,    compile_regex(int, uschar **, const uschar **, int *, BOOL, BOOL, int, int,
550      int *, branch_chain *, compile_data *, int *);      int *, int *, branch_chain *, compile_data *, int *);
551    
552    
553    
# Line 578  return s; Line 578  return s;
578    
579    
580  /*************************************************  /*************************************************
581    *            Check for counted repeat            *
582    *************************************************/
583    
584    /* This function is called when a '{' is encountered in a place where it might
585    start a quantifier. It looks ahead to see if it really is a quantifier or not.
586    It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd}
587    where the ddds are digits.
588    
589    Arguments:
590      p         pointer to the first char after '{'
591    
592    Returns:    TRUE or FALSE
593    */
594    
595    static BOOL
596    is_counted_repeat(const uschar *p)
597    {
598    if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
599    while ((digitab[*p] & ctype_digit) != 0) p++;
600    if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
601    
602    if (*p++ != CHAR_COMMA) return FALSE;
603    if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
604    
605    if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
606    while ((digitab[*p] & ctype_digit) != 0) p++;
607    
608    return (*p == CHAR_RIGHT_CURLY_BRACKET);
609    }
610    
611    
612    
613    /*************************************************
614  *            Handle escapes                      *  *            Handle escapes                      *
615  *************************************************/  *************************************************/
616    
# Line 648  else Line 681  else
681      *errorcodeptr = ERR37;      *errorcodeptr = ERR37;
682      break;      break;
683    
684      /* \g must be followed by one of a number of specific things:      /* In a character class, \g is just a literal "g". Outside a character
685        class, \g must be followed by one of a number of specific things:
686    
687      (1) A number, either plain or braced. If positive, it is an absolute      (1) A number, either plain or braced. If positive, it is an absolute
688      backreference. If negative, it is a relative backreference. This is a Perl      backreference. If negative, it is a relative backreference. This is a Perl
# Line 665  else Line 699  else
699      the -ESC_g code (cf \k). */      the -ESC_g code (cf \k). */
700    
701      case CHAR_g:      case CHAR_g:
702        if (isclass) break;
703      if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE)      if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE)
704        {        {
705        c = -ESC_g;        c = -ESC_g;
# Line 886  else Line 921  else
921    }    }
922    
923  /* Perl supports \N{name} for character names, as well as plain \N for "not  /* Perl supports \N{name} for character names, as well as plain \N for "not
924  newline". PCRE does not support \N{name}. */  newline". PCRE does not support \N{name}. However, it does support
925    quantification such as \N{2,3}. */
926    
927  if (c == -ESC_N && ptr[1] == CHAR_LEFT_CURLY_BRACKET)  if (c == -ESC_N && ptr[1] == CHAR_LEFT_CURLY_BRACKET &&
928         !is_counted_repeat(ptr+2))
929    *errorcodeptr = ERR37;    *errorcodeptr = ERR37;
930    
931  /* If PCRE_UCP is set, we change the values for \d etc. */  /* If PCRE_UCP is set, we change the values for \d etc. */
# Line 998  return -1; Line 1035  return -1;
1035    
1036    
1037  /*************************************************  /*************************************************
 *            Check for counted repeat            *  
 *************************************************/  
   
 /* This function is called when a '{' is encountered in a place where it might  
 start a quantifier. It looks ahead to see if it really is a quantifier or not.  
 It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd}  
 where the ddds are digits.  
   
 Arguments:  
   p         pointer to the first char after '{'  
   
 Returns:    TRUE or FALSE  
 */  
   
 static BOOL  
 is_counted_repeat(const uschar *p)  
 {  
 if ((digitab[*p++] & ctype_digit) == 0) return FALSE;  
 while ((digitab[*p] & ctype_digit) != 0) p++;  
 if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;  
   
 if (*p++ != CHAR_COMMA) return FALSE;  
 if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;  
   
 if ((digitab[*p++] & ctype_digit) == 0) return FALSE;  
 while ((digitab[*p] & ctype_digit) != 0) p++;  
   
 return (*p == CHAR_RIGHT_CURLY_BRACKET);  
 }  
   
   
   
 /*************************************************  
1038  *         Read repeat counts                     *  *         Read repeat counts                     *
1039  *************************************************/  *************************************************/
1040    
# Line 2288  where Perl recognizes it as the POSIX cl Line 2292  where Perl recognizes it as the POSIX cl
2292  "l\ower". This is a lesser evil that not diagnosing bad classes when Perl does,  "l\ower". This is a lesser evil that not diagnosing bad classes when Perl does,
2293  I think.  I think.
2294    
2295    A user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not.
2296    It seems that the appearance of a nested POSIX class supersedes an apparent
2297    external class. For example, [:a[:digit:]b:] matches "a", "b", ":", or
2298    a digit. Also, unescaped square brackets may also appear as part of class
2299    names. For example, [:a[:abc]b:] gives unknown class "[:abc]b:]"in Perl.
2300    
2301  Arguments:  Arguments:
2302    ptr      pointer to the initial [    ptr      pointer to the initial [
2303    endptr   where to return the end pointer    endptr   where to return the end pointer
# Line 2302  int terminator;          /* Don't combin Line 2312  int terminator;          /* Don't combin
2312  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
2313  for (++ptr; *ptr != 0; ptr++)  for (++ptr; *ptr != 0; ptr++)
2314    {    {
2315    if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) ptr++; else    if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
2316        ptr++;
2317      else
2318      {      {
     if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;  
2319      if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)      if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
2320        {        {
2321        *endptr = ptr;        *endptr = ptr;
2322        return TRUE;        return TRUE;
2323        }        }
2324        if (*ptr == CHAR_LEFT_SQUARE_BRACKET &&
2325             (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
2326              ptr[1] == CHAR_EQUALS_SIGN) &&
2327            check_posix_syntax(ptr, endptr))
2328          return FALSE;
2329      }      }
2330    }    }
2331  return FALSE;  return FALSE;
# Line 3019  Arguments: Line 3035  Arguments:
3035    firstbyteptr   set to initial literal character, or < 0 (REQ_UNSET, REQ_NONE)    firstbyteptr   set to initial literal character, or < 0 (REQ_UNSET, REQ_NONE)
3036    reqbyteptr     set to the last literal character required, else < 0    reqbyteptr     set to the last literal character required, else < 0
3037    bcptr          points to current branch chain    bcptr          points to current branch chain
3038      cond_depth     conditional nesting depth
3039    cd             contains pointers to tables etc.    cd             contains pointers to tables etc.
3040    lengthptr      NULL during the real compile phase    lengthptr      NULL during the real compile phase
3041                   points to length accumulator during pre-compile phase                   points to length accumulator during pre-compile phase
# Line 3030  Returns:         TRUE on success Line 3047  Returns:         TRUE on success
3047  static BOOL  static BOOL
3048  compile_branch(int *optionsptr, uschar **codeptr, const uschar **ptrptr,  compile_branch(int *optionsptr, uschar **codeptr, const uschar **ptrptr,
3049    int *errorcodeptr, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr,    int *errorcodeptr, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr,
3050    compile_data *cd, int *lengthptr)    int cond_depth, compile_data *cd, int *lengthptr)
3051  {  {
3052  int repeat_type, op_type;  int repeat_type, op_type;
3053  int repeat_min = 0, repeat_max = 0;      /* To please picky compilers */  int repeat_min = 0, repeat_max = 0;      /* To please picky compilers */
# Line 5743  for (;; ptr++) Line 5760  for (;; ptr++)
5760    
5761              /* If not a forward reference, and the subpattern is still open,              /* If not a forward reference, and the subpattern is still open,
5762              this is a recursive call. We check to see if this is a left              this is a recursive call. We check to see if this is a left
5763              recursion that could loop for ever, and diagnose that case. */              recursion that could loop for ever, and diagnose that case. We
5764                must not, however, do this check if we are in a conditional
5765                subpattern because the condition might be testing for recursion in
5766                a pattern such as /(?(R)a+|(?R)b)/, which is perfectly valid.
5767                Forever loops are also detected at runtime, so those that occur in
5768                conditional subpatterns will be picked up then. */
5769    
5770              else if (GET(called, 1) == 0 &&              else if (GET(called, 1) == 0 && cond_depth <= 0 &&
5771                       could_be_empty(called, code, bcptr, utf8, cd))                       could_be_empty(called, code, bcptr, utf8, cd))
5772                {                {
5773                *errorcodeptr = ERR40;                *errorcodeptr = ERR40;
# Line 5877  for (;; ptr++) Line 5899  for (;; ptr++)
5899      repeated. We copy code into a non-register variable (tempcode) in order to      repeated. We copy code into a non-register variable (tempcode) in order to
5900      be able to pass its address because some compilers complain otherwise. */      be able to pass its address because some compilers complain otherwise. */
5901    
5902      previous = code;                   /* For handling repetition */      previous = code;                      /* For handling repetition */
5903      *code = bravalue;      *code = bravalue;
5904      tempcode = code;      tempcode = code;
5905      tempreqvary = cd->req_varyopt;     /* Save value before bracket */      tempreqvary = cd->req_varyopt;        /* Save value before bracket */
5906      length_prevgroup = 0;              /* Initialize for pre-compile phase */      length_prevgroup = 0;                 /* Initialize for pre-compile phase */
5907    
5908      if (!compile_regex(      if (!compile_regex(
5909           newoptions,                   /* The complete new option state */           newoptions,                      /* The complete new option state */
5910           &tempcode,                    /* Where to put code (updated) */           &tempcode,                       /* Where to put code (updated) */
5911           &ptr,                         /* Input pointer (updated) */           &ptr,                            /* Input pointer (updated) */
5912           errorcodeptr,                 /* Where to put an error message */           errorcodeptr,                    /* Where to put an error message */
5913           (bravalue == OP_ASSERTBACK ||           (bravalue == OP_ASSERTBACK ||
5914            bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */            bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
5915           reset_bracount,               /* True if (?| group */           reset_bracount,                  /* True if (?| group */
5916           skipbytes,                    /* Skip over bracket number */           skipbytes,                       /* Skip over bracket number */
5917           &subfirstbyte,                /* For possible first char */           cond_depth +
5918           &subreqbyte,                  /* For possible last char */             ((bravalue == OP_COND)?1:0),   /* Depth of condition subpatterns */
5919           bcptr,                        /* Current branch chain */           &subfirstbyte,                   /* For possible first char */
5920           cd,                           /* Tables block */           &subreqbyte,                     /* For possible last char */
5921           (lengthptr == NULL)? NULL :   /* Actual compile phase */           bcptr,                           /* Current branch chain */
5922             &length_prevgroup           /* Pre-compile phase */           cd,                              /* Tables block */
5923             (lengthptr == NULL)? NULL :      /* Actual compile phase */
5924               &length_prevgroup              /* Pre-compile phase */
5925           ))           ))
5926        goto FAILED;        goto FAILED;
5927    
# Line 6355  Arguments: Line 6379  Arguments:
6379    lookbehind     TRUE if this is a lookbehind assertion    lookbehind     TRUE if this is a lookbehind assertion
6380    reset_bracount TRUE to reset the count for each branch    reset_bracount TRUE to reset the count for each branch
6381    skipbytes      skip this many bytes at start (for brackets and OP_COND)    skipbytes      skip this many bytes at start (for brackets and OP_COND)
6382      cond_depth     depth of nesting for conditional subpatterns
6383    firstbyteptr   place to put the first required character, or a negative number    firstbyteptr   place to put the first required character, or a negative number
6384    reqbyteptr     place to put the last required character, or a negative number    reqbyteptr     place to put the last required character, or a negative number
6385    bcptr          pointer to the chain of currently open branches    bcptr          pointer to the chain of currently open branches
# Line 6368  Returns:         TRUE on success Line 6393  Returns:         TRUE on success
6393  static BOOL  static BOOL
6394  compile_regex(int options, uschar **codeptr, const uschar **ptrptr,  compile_regex(int options, uschar **codeptr, const uschar **ptrptr,
6395    int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,    int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,
6396    int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr, compile_data *cd,    int cond_depth, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr,
6397    int *lengthptr)    compile_data *cd, int *lengthptr)
6398  {  {
6399  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
6400  uschar *code = *codeptr;  uschar *code = *codeptr;
# Line 6448  for (;;) Line 6473  for (;;)
6473    into the length. */    into the length. */
6474    
6475    if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstbyte,    if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstbyte,
6476          &branchreqbyte, &bc, cd, (lengthptr == NULL)? NULL : &length))          &branchreqbyte, &bc, cond_depth, cd,
6477            (lengthptr == NULL)? NULL : &length))
6478      {      {
6479      *ptrptr = ptr;      *ptrptr = ptr;
6480      return FALSE;      return FALSE;
# Line 7084  if ((options & PCRE_UCP) != 0) Line 7110  if ((options & PCRE_UCP) != 0)
7110    
7111  /* Check validity of \R options. */  /* Check validity of \R options. */
7112    
7113  switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))  if ((options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) ==
7114         (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
7115    {    {
7116    case 0:    errorcode = ERR56;
7117    case PCRE_BSR_ANYCRLF:    goto PCRE_EARLY_ERROR_RETURN;
   case PCRE_BSR_UNICODE:  
   break;  
   default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;  
7118    }    }
7119    
7120  /* Handle different types of newline. The three bits give seven cases. The  /* Handle different types of newline. The three bits give seven cases. The
# Line 7176  ptr += skipatstart; Line 7200  ptr += skipatstart;
7200  code = cworkspace;  code = cworkspace;
7201  *code = OP_BRA;  *code = OP_BRA;
7202  (void)compile_regex(cd->external_options, &code, &ptr, &errorcode, FALSE,  (void)compile_regex(cd->external_options, &code, &ptr, &errorcode, FALSE,
7203    FALSE, 0, &firstbyte, &reqbyte, NULL, cd, &length);    FALSE, 0, 0, &firstbyte, &reqbyte, NULL, cd, &length);
7204  if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;  if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;
7205    
7206  DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,  DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
# Line 7249  of the function here. */ Line 7273  of the function here. */
7273  ptr = (const uschar *)pattern + skipatstart;  ptr = (const uschar *)pattern + skipatstart;
7274  code = (uschar *)codestart;  code = (uschar *)codestart;
7275  *code = OP_BRA;  *code = OP_BRA;
7276  (void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0,  (void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0,
7277    &firstbyte, &reqbyte, NULL, cd, NULL);    &firstbyte, &reqbyte, NULL, cd, NULL);
7278  re->top_bracket = cd->bracount;  re->top_bracket = cd->bracount;
7279  re->top_backref = cd->top_backref;  re->top_backref = cd->top_backref;

Legend:
Removed from v.637  
changed lines
  Added in v.642

  ViewVC Help
Powered by ViewVC 1.1.5