/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1398 by ph10, Tue Nov 12 15:20:26 2013 UTC revision 1404 by ph10, Tue Nov 19 15:36:57 2013 UTC
# Line 532  static const char error_texts[] = Line 532  static const char error_texts[] =
532    "non-octal character in \\o{} (closing brace missing?)\0"    "non-octal character in \\o{} (closing brace missing?)\0"
533    "missing opening brace after \\o\0"    "missing opening brace after \\o\0"
534    "parentheses are too deeply nested\0"    "parentheses are too deeply nested\0"
535    "invalid range in character class\0"    "invalid range in character class\0"
536    "group name must start with a non-digit\0"    "group name must start with a non-digit\0"
537    ;    ;
538    
539  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 4446  for (;; ptr++) Line 4446  for (;; ptr++)
4446    /* Get next character in the pattern */    /* Get next character in the pattern */
4447    
4448    c = *ptr;    c = *ptr;
4449    
4450    /* If we are at the end of a nested substitution, revert to the outer level    /* If we are at the end of a nested substitution, revert to the outer level
4451    string. Nesting only happens one level deep. */    string. Nesting only happens one level deep. */
4452    
# Line 4548  for (;; ptr++) Line 4548  for (;; ptr++)
4548          }          }
4549        goto NORMAL_CHAR;        goto NORMAL_CHAR;
4550        }        }
4551      /* Control does not reach here. */      /* Control does not reach here. */
4552      }      }
4553    
4554    /* In extended mode, skip white space and comments. We need a loop in order    /* In extended mode, skip white space and comments. We need a loop in order
4555    to check for more white space and more comments after a comment. */    to check for more white space and more comments after a comment. */
4556    
4557    if ((options & PCRE_EXTENDED) != 0)    if ((options & PCRE_EXTENDED) != 0)
4558      {      {
4559      for (;;)      for (;;)
# Line 4564  for (;; ptr++) Line 4564  for (;; ptr++)
4564        while (*ptr != CHAR_NULL)        while (*ptr != CHAR_NULL)
4565          {          {
4566          if (IS_NEWLINE(ptr))         /* For non-fixed-length newline cases, */          if (IS_NEWLINE(ptr))         /* For non-fixed-length newline cases, */
4567            {                          /* IS_NEWLINE sets cd->nllen. */            {                          /* IS_NEWLINE sets cd->nllen. */
4568            ptr += cd->nllen;            ptr += cd->nllen;
4569            break;            break;
4570            }            }
4571          ptr++;          ptr++;
# Line 4574  for (;; ptr++) Line 4574  for (;; ptr++)
4574  #endif  #endif
4575          }          }
4576        c = *ptr;     /* Either NULL or the char after a newline */        c = *ptr;     /* Either NULL or the char after a newline */
4577        }        }
4578      }      }
4579    
4580    /* See if the next thing is a quantifier. */    /* See if the next thing is a quantifier. */
# Line 4602  for (;; ptr++) Line 4602  for (;; ptr++)
4602      previous_callout = code;      previous_callout = code;
4603      code = auto_callout(code, ptr, cd);      code = auto_callout(code, ptr, cd);
4604      }      }
4605    
4606    /* Process the next pattern item. */    /* Process the next pattern item. */
4607    
4608    switch(c)    switch(c)
# Line 4861  for (;; ptr++) Line 4861  for (;; ptr++)
4861            unsigned int ptype = 0;            unsigned int ptype = 0;
4862            int pc = posix_class + ((local_negate)? POSIX_SUBSIZE/2 : 0);            int pc = posix_class + ((local_negate)? POSIX_SUBSIZE/2 : 0);
4863    
4864            /* The posix_substitutes table specifies which POSIX classes can be            /* The posix_substitutes table specifies which POSIX classes can be
4865            converted to \p or \P items. */            converted to \p or \P items. */
4866    
4867            if (posix_substitutes[pc] != NULL)            if (posix_substitutes[pc] != NULL)
4868              {              {
4869              nestptr = tempptr + 1;              nestptr = tempptr + 1;
4870              ptr = posix_substitutes[pc] - 1;              ptr = posix_substitutes[pc] - 1;
4871              continue;              continue;
4872              }              }
4873    
4874            /* There are three other classes that generate special property calls            /* There are three other classes that generate special property calls
4875            that are recognized only in an XCLASS. */            that are recognized only in an XCLASS. */
4876    
4877            else switch(posix_class)            else switch(posix_class)
4878              {              {
# Line 4889  for (;; ptr++) Line 4889  for (;; ptr++)
4889              *class_uchardata++ = 0;              *class_uchardata++ = 0;
4890              ptr = tempptr + 1;              ptr = tempptr + 1;
4891              continue;              continue;
4892    
4893              /* For all other POSIX classes, no special action is taken in UCP              /* For all other POSIX classes, no special action is taken in UCP
4894              mode. Fall through to the non_UCP case. */              mode. Fall through to the non_UCP case. */
4895    
4896              default:              default:
4897              break;              break;
4898              }              }
4899            }            }
4900  #endif  #endif
# Line 5153  for (;; ptr++) Line 5153  for (;; ptr++)
5153          else          else
5154  #endif  #endif
5155          d = *ptr;  /* Not UTF-8 mode */          d = *ptr;  /* Not UTF-8 mode */
5156    
5157          /* The second part of a range can be a single-character escape          /* The second part of a range can be a single-character escape
5158          sequence, but not any of the other escapes. Perl treats a hyphen as a          sequence, but not any of the other escapes. Perl treats a hyphen as a
5159          literal in such circumstances. However, in Perl's warning mode, a          literal in such circumstances. However, in Perl's warning mode, a
5160          warning is given, so PCRE now faults it as it is almost certainly a          warning is given, so PCRE now faults it as it is almost certainly a
5161          mistake on the user's part. */          mistake on the user's part. */
5162    
5163          if (!inescq)          if (!inescq)
5164            {            {
5165            if (d == CHAR_BACKSLASH)            if (d == CHAR_BACKSLASH)
5166              {              {
5167              int descape;              int descape;
5168              descape = check_escape(&ptr, &d, errorcodeptr, cd->bracount, options, TRUE);              descape = check_escape(&ptr, &d, errorcodeptr, cd->bracount, options, TRUE);
5169              if (*errorcodeptr != 0) goto FAILED;              if (*errorcodeptr != 0) goto FAILED;
5170    
5171              /* 0 means a character was put into d; \b is backspace; any other              /* 0 means a character was put into d; \b is backspace; any other
5172              special causes an error. */              special causes an error. */
5173    
5174              if (descape != 0)              if (descape != 0)
5175                {                {
5176                if (descape == ESC_b) d = CHAR_BS; else                if (descape == ESC_b) d = CHAR_BS; else
5177                  {                  {
5178                  *errorcodeptr = ERR83;                  *errorcodeptr = ERR83;
5179                  goto FAILED;                  goto FAILED;
5180                  }                  }
5181                }                }
5182              }              }
5183    
5184            /* A hyphen followed by a POSIX class is treated in the same way. */            /* A hyphen followed by a POSIX class is treated in the same way. */
5185    
5186            else if (d == CHAR_LEFT_SQUARE_BRACKET &&            else if (d == CHAR_LEFT_SQUARE_BRACKET &&
5187                     (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||                     (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
5188                      ptr[1] == CHAR_EQUALS_SIGN) &&                      ptr[1] == CHAR_EQUALS_SIGN) &&
5189                     check_posix_syntax(ptr, &tempptr))                     check_posix_syntax(ptr, &tempptr))
5190              {              {
5191              *errorcodeptr = ERR83;              *errorcodeptr = ERR83;
5192              goto FAILED;              goto FAILED;
5193              }              }
5194            }            }
5195    
5196          /* Check that the two values are in the correct order. Optimize          /* Check that the two values are in the correct order. Optimize
5197          one-character ranges. */          one-character ranges. */
# Line 5453  for (;; ptr++) Line 5453  for (;; ptr++)
5453      insert something before it. */      insert something before it. */
5454    
5455      tempcode = previous;      tempcode = previous;
5456    
5457      /* Before checking for a possessive quantifier, we must skip over      /* Before checking for a possessive quantifier, we must skip over
5458      whitespace and comments in extended mode because Perl allows white space at      whitespace and comments in extended mode because Perl allows white space at
5459      this point. */      this point. */
5460    
5461      if ((options & PCRE_EXTENDED) != 0)      if ((options & PCRE_EXTENDED) != 0)
5462        {        {
5463        const pcre_uchar *p = ptr + 1;        const pcre_uchar *p = ptr + 1;
# Line 5469  for (;; ptr++) Line 5469  for (;; ptr++)
5469          while (*p != CHAR_NULL)          while (*p != CHAR_NULL)
5470            {            {
5471            if (IS_NEWLINE(p))         /* For non-fixed-length newline cases, */            if (IS_NEWLINE(p))         /* For non-fixed-length newline cases, */
5472              {                        /* IS_NEWLINE sets cd->nllen. */              {                        /* IS_NEWLINE sets cd->nllen. */
5473              p += cd->nllen;              p += cd->nllen;
5474              break;              break;
5475              }              }
5476            p++;            p++;
# Line 6572  for (;; ptr++) Line 6572  for (;; ptr++)
6572            if (*ptr == CHAR_MINUS || *ptr == CHAR_PLUS) refsign = *ptr++;            if (*ptr == CHAR_MINUS || *ptr == CHAR_PLUS) refsign = *ptr++;
6573              else if (IS_DIGIT(*ptr)) refsign = 0;              else if (IS_DIGIT(*ptr)) refsign = 0;
6574            }            }
6575    
6576          /* Handle a number */          /* Handle a number */
6577    
6578          if (refsign >= 0)          if (refsign >= 0)
6579            {            {
6580            recno = 0;            recno = 0;
6581            while (IS_DIGIT(*ptr))            while (IS_DIGIT(*ptr))
6582              {              {
6583              recno = recno * 10 + (int)(*ptr - CHAR_0);              recno = recno * 10 + (int)(*ptr - CHAR_0);
6584              ptr++;              ptr++;
6585              }              }
6586            }            }
6587    
6588          /* Otherwise we expect to read a name; anything else is an error. When          /* Otherwise we expect to read a name; anything else is an error. When
6589          a name is one of a number of duplicates, a different opcode is used and          a name is one of a number of duplicates, a different opcode is used and
6590          it needs more memory. Unfortunately we cannot tell whether a name is a          it needs more memory. Unfortunately we cannot tell whether a name is a
# Line 6596  for (;; ptr++) Line 6596  for (;; ptr++)
6596              {              {
6597              *errorcodeptr = ERR84;              *errorcodeptr = ERR84;
6598              goto FAILED;              goto FAILED;
6599              }              }
6600            if (!MAX_255(*ptr) || (cd->ctypes[*ptr] & ctype_word) == 0)            if (!MAX_255(*ptr) || (cd->ctypes[*ptr] & ctype_word) == 0)
6601              {              {
6602              *errorcodeptr = ERR28;   /* Assertion expected */              *errorcodeptr = ERR28;   /* Assertion expected */
# Line 6626  for (;; ptr++) Line 6626  for (;; ptr++)
6626          if (lengthptr != NULL) break;          if (lengthptr != NULL) break;
6627    
6628          /* In the real compile we do the work of looking for the actual          /* In the real compile we do the work of looking for the actual
6629          reference. If refsign is not negative, it means we have a number in          reference. If refsign is not negative, it means we have a number in
6630          recno. */          recno. */
6631    
6632          if (refsign >= 0)          if (refsign >= 0)
6633            {            {
6634            if (recno <= 0)            if (recno <= 0)
# Line 6743  for (;; ptr++) Line 6743  for (;; ptr++)
6743          ptr++;          ptr++;
6744          break;          break;
6745    
6746          /* Optimize (?!) to (*FAIL) unless it is quantified - which is a weird          /* Optimize (?!) to (*FAIL) unless it is quantified - which is a weird
6747          thing to do, but Perl allows all assertions to be quantified, and when          thing to do, but Perl allows all assertions to be quantified, and when
6748          they contain capturing parentheses there may be a potential use for          they contain capturing parentheses there may be a potential use for
6749          this feature. Not that that applies to a quantified (?!) but we allow          this feature. Not that that applies to a quantified (?!) but we allow
6750          it for uniformity. */          it for uniformity. */
6751    
6752          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
6753          case CHAR_EXCLAMATION_MARK:            /* Negative lookahead */          case CHAR_EXCLAMATION_MARK:            /* Negative lookahead */
6754          ptr++;          ptr++;
6755          if (*ptr == CHAR_RIGHT_PARENTHESIS && ptr[1] != CHAR_ASTERISK &&          if (*ptr == CHAR_RIGHT_PARENTHESIS && ptr[1] != CHAR_ASTERISK &&
6756               ptr[1] != CHAR_PLUS && ptr[1] != CHAR_QUESTION_MARK &&               ptr[1] != CHAR_PLUS && ptr[1] != CHAR_QUESTION_MARK &&
6757              (ptr[1] != CHAR_LEFT_CURLY_BRACKET || !is_counted_repeat(ptr+2)))              (ptr[1] != CHAR_LEFT_CURLY_BRACKET || !is_counted_repeat(ptr+2)))
6758            {            {
# Line 6853  for (;; ptr++) Line 6853  for (;; ptr++)
6853          if (IS_DIGIT(*ptr))          if (IS_DIGIT(*ptr))
6854            {            {
6855            *errorcodeptr = ERR84;   /* Group name must start with non-digit */            *errorcodeptr = ERR84;   /* Group name must start with non-digit */
6856            goto FAILED;            goto FAILED;
6857            }            }
6858          while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++;          while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
6859          namelen = (int)(ptr - name);          namelen = (int)(ptr - name);
6860    
# Line 6971  for (;; ptr++) Line 6971  for (;; ptr++)
6971          if (IS_DIGIT(*ptr))          if (IS_DIGIT(*ptr))
6972            {            {
6973            *errorcodeptr = ERR84;   /* Group name must start with non-digit */            *errorcodeptr = ERR84;   /* Group name must start with non-digit */
6974            goto FAILED;            goto FAILED;
6975            }            }
6976          while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++;          while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
6977          namelen = (int)(ptr - name);          namelen = (int)(ptr - name);
6978    
# Line 7371  for (;; ptr++) Line 7371  for (;; ptr++)
7371      if ((cd->parens_depth += 1) > PARENS_NEST_LIMIT)      if ((cd->parens_depth += 1) > PARENS_NEST_LIMIT)
7372        {        {
7373        *errorcodeptr = ERR82;        *errorcodeptr = ERR82;
7374        goto FAILED;        goto FAILED;
7375        }        }
7376    
7377      /* Assertions used not to be repeatable, but this was changed for Perl      /* Assertions used not to be repeatable, but this was changed for Perl
7378      compatibility, so all kinds can now be repeated. We copy code into a      compatibility, so all kinds can now be repeated. We copy code into a
7379      non-register variable (tempcode) in order to be able to pass its address      non-register variable (tempcode) in order to be able to pass its address
# Line 7407  for (;; ptr++) Line 7407  for (;; ptr++)
7407             &length_prevgroup              /* Pre-compile phase */             &length_prevgroup              /* Pre-compile phase */
7408           ))           ))
7409        goto FAILED;        goto FAILED;
7410    
7411      cd->parens_depth -= 1;      cd->parens_depth -= 1;
7412    
7413      /* If this was an atomic group and there are no capturing groups within it,      /* If this was an atomic group and there are no capturing groups within it,
# Line 7625  for (;; ptr++) Line 7625  for (;; ptr++)
7625          {          {
7626          const pcre_uchar *p;          const pcre_uchar *p;
7627          pcre_uint32 cf;          pcre_uint32 cf;
7628    
7629          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */
7630          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
7631            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
# Line 7647  for (;; ptr++) Line 7647  for (;; ptr++)
7647            goto NAMED_REF_OR_RECURSE;            goto NAMED_REF_OR_RECURSE;
7648            }            }
7649    
7650          /* Signed or unsigned number (cf = ptr[1]) is known to be plus or minus          /* Signed or unsigned number (cf = ptr[1]) is known to be plus or minus
7651          or a digit. */          or a digit. */
7652    
7653          p = ptr + 2;          p = ptr + 2;
# Line 8935  else Line 8935  else
8935      cd->nl[0] = newline;      cd->nl[0] = newline;
8936      }      }
8937    }    }
8938    
8939  /* Maximum back reference and backref bitmap. The bitmap records up to 31 back  /* Maximum back reference and backref bitmap. The bitmap records up to 31 back
8940  references to help in deciding whether (.*) can be treated as anchored or not.  references to help in deciding whether (.*) can be treated as anchored or not.
8941  */  */

Legend:
Removed from v.1398  
changed lines
  Added in v.1404

  ViewVC Help
Powered by ViewVC 1.1.5