/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1393 by ph10, Fri Nov 8 16:37:21 2013 UTC revision 1394 by ph10, Sat Nov 9 09:17:20 2013 UTC
# Line 533  static const char error_texts[] = Line 533  static const char error_texts[] =
533    "missing opening brace after \\o\0"    "missing opening brace after \\o\0"
534    "parentheses are too deeply nested\0"    "parentheses are too deeply nested\0"
535    "invalid range in character class\0"    "invalid range in character class\0"
536      "group name must start with a non-digit\0"
537    ;    ;
538    
539  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 6465  for (;; ptr++) Line 6466  for (;; ptr++)
6466          tempptr = ptr;          tempptr = ptr;
6467    
6468          /* A condition can be an assertion, a number (referring to a numbered          /* A condition can be an assertion, a number (referring to a numbered
6469          group), a name (referring to a named group), or 'R', referring to          group's having been set), a name (referring to a named group), or 'R',
6470          recursion. R<digits> and R&name are also permitted for recursion tests.          referring to recursion. R<digits> and R&name are also permitted for
6471            recursion tests.
6472          There are several syntaxes for testing a named group: (?(name)) is used  
6473          by Python; Perl 5.10 onwards uses (?(<name>) or (?('name')).          There are ways of testing a named group: (?(name)) is used by Python;
6474            Perl 5.10 onwards uses (?(<name>) or (?('name')).
6475          There are two unfortunate ambiguities, caused by history. (a) 'R' can  
6476          be the recursive thing or the name 'R' (and similarly for 'R' followed          There is one unfortunate ambiguity, caused by history. 'R' can be the
6477          by digits), and (b) a number could be a name that consists of digits.          recursive thing or the name 'R' (and similarly for 'R' followed by
6478          In both cases, we look for a name first; if not found, we try the other          digits). We look for a name first; if not found, we try the other case.
         cases.  
6479    
6480          For compatibility with auto-callouts, we allow a callout to be          For compatibility with auto-callouts, we allow a callout to be
6481          specified before a condition that is an assertion. First, check for the          specified before a condition that is an assertion. First, check for the
# Line 6508  for (;; ptr++) Line 6508  for (;; ptr++)
6508    
6509          /* Check for a test for recursion in a named group. */          /* Check for a test for recursion in a named group. */
6510    
6511          if (ptr[1] == CHAR_R && ptr[2] == CHAR_AMPERSAND)          ptr++;
6512            if (*ptr == CHAR_R && ptr[1] == CHAR_AMPERSAND)
6513            {            {
6514            terminator = -1;            terminator = -1;
6515            ptr += 2;            ptr += 2;
# Line 6517  for (;; ptr++) Line 6518  for (;; ptr++)
6518    
6519          /* Check for a test for a named group's having been set, using the Perl          /* Check for a test for a named group's having been set, using the Perl
6520          syntax (?(<name>) or (?('name'), and also allow for the original PCRE          syntax (?(<name>) or (?('name'), and also allow for the original PCRE
6521          syntax of (?(name) or for (?(+n), (?(-n), and just (?(n). As names may          syntax of (?(name) or for (?(+n), (?(-n), and just (?(n). */
         consist entirely of digits, there is scope for ambiguity. */  
6522    
6523          else if (ptr[1] == CHAR_LESS_THAN_SIGN)          else if (*ptr == CHAR_LESS_THAN_SIGN)
6524            {            {
6525            terminator = CHAR_GREATER_THAN_SIGN;            terminator = CHAR_GREATER_THAN_SIGN;
6526            ptr++;            ptr++;
6527            }            }
6528          else if (ptr[1] == CHAR_APOSTROPHE)          else if (*ptr == CHAR_APOSTROPHE)
6529            {            {
6530            terminator = CHAR_APOSTROPHE;            terminator = CHAR_APOSTROPHE;
6531            ptr++;            ptr++;
# Line 6533  for (;; ptr++) Line 6533  for (;; ptr++)
6533          else          else
6534            {            {
6535            terminator = CHAR_NULL;            terminator = CHAR_NULL;
6536            if (ptr[1] == CHAR_MINUS || ptr[1] == CHAR_PLUS) refsign = *(++ptr);            if (*ptr == CHAR_MINUS || *ptr == CHAR_PLUS) refsign = *ptr++;
6537                else if (IS_DIGIT(*ptr)) refsign = 0;
6538            }            }
6539    
6540          /* When a name is one of a number of duplicates, a different opcode is          /* Handle a number */
6541          used and it needs more memory. Unfortunately we cannot tell whether a  
6542          name is a duplicate in the first pass, so we have to allow for more          if (refsign >= 0)
         memory except when we know it is a relative numerical reference. */  
   
         if (refsign < 0 && lengthptr != NULL) *lengthptr += IMM2_SIZE;  
   
         /* We now expect to read a name (possibly all digits); any thing else  
         is an error. In the case of all digits, also get it as a number. */  
   
         if (!MAX_255(ptr[1]) || (cd->ctypes[ptr[1]] & ctype_word) == 0)  
6543            {            {
6544            ptr += 1;  /* To get the right offset */            recno = 0;
6545            *errorcodeptr = ERR28;            while (IS_DIGIT(*ptr))
6546            goto FAILED;              {
6547            }              recno = recno * 10 + (int)(*ptr - CHAR_0);
6548                ptr++;
6549                }
6550              }
6551    
6552            /* Otherwise we expect to read a name; anything else is an error. When
6553            a name is one of a number of duplicates, a different opcode is used and
6554            it needs more memory. Unfortunately we cannot tell whether a name is a
6555            duplicate in the first pass, so we have to allow for more memory. */
6556    
6557          recno = 0;          else
         name = ++ptr;  
         while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0)  
6558            {            {
6559            if (recno >= 0)            if (IS_DIGIT(*ptr))
6560              recno = (IS_DIGIT(*ptr))? recno * 10 + (int)(*ptr - CHAR_0) : -1;              {
6561            ptr++;              *errorcodeptr = ERR84;
6562                goto FAILED;
6563                }
6564              if (!MAX_255(*ptr) || (cd->ctypes[*ptr] & ctype_word) == 0)
6565                {
6566                *errorcodeptr = ERR28;   /* Assertion expected */
6567                goto FAILED;
6568                }
6569              name = ptr++;
6570              while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0)
6571                {
6572                ptr++;
6573                }
6574              namelen = (int)(ptr - name);
6575              if (lengthptr != NULL) *lengthptr += IMM2_SIZE;
6576            }            }
         namelen = (int)(ptr - name);  
6577    
6578          /* Check the terminator */          /* Check the terminator */
6579    
6580          if ((terminator > 0 && *ptr++ != (pcre_uchar)terminator) ||          if ((terminator > 0 && *ptr++ != (pcre_uchar)terminator) ||
6581              *ptr++ != CHAR_RIGHT_PARENTHESIS)              *ptr++ != CHAR_RIGHT_PARENTHESIS)
6582            {            {
6583            ptr--;      /* Error offset */            ptr--;                  /* Error offset */
6584            *errorcodeptr = ERR26;            *errorcodeptr = ERR26;  /* Malformed number or name */
6585            goto FAILED;            goto FAILED;
6586            }            }
6587    
# Line 6578  for (;; ptr++) Line 6590  for (;; ptr++)
6590          if (lengthptr != NULL) break;          if (lengthptr != NULL) break;
6591    
6592          /* In the real compile we do the work of looking for the actual          /* In the real compile we do the work of looking for the actual
6593          reference. If the string started with "+" or "-" we require the rest to          reference. If refsign is not negative, it means we have a number in
6594          be digits, in which case recno will be set. */          recno. */
6595    
6596          if (refsign > 0)          if (refsign >= 0)
6597            {            {
6598            if (recno <= 0)            if (recno <= 0)
6599              {              {
6600              *errorcodeptr = ERR58;              *errorcodeptr = ERR35;
6601              goto FAILED;              goto FAILED;
6602              }              }
6603            recno = (refsign == CHAR_MINUS)?            if (refsign != 0) recno = (refsign == CHAR_MINUS)?
6604              cd->bracount - recno + 1 : recno +cd->bracount;              cd->bracount - recno + 1 : recno + cd->bracount;
6605            if (recno <= 0 || recno > cd->final_bracount)            if (recno <= 0 || recno > cd->final_bracount)
6606              {              {
6607              *errorcodeptr = ERR15;              *errorcodeptr = ERR15;
# Line 6599  for (;; ptr++) Line 6611  for (;; ptr++)
6611            break;            break;
6612            }            }
6613    
6614          /* Otherwise (did not start with "+" or "-"), start by looking for the          /* Otherwise look for the name. */
         name. */  
6615    
6616          slot = cd->name_table;          slot = cd->name_table;
6617          for (i = 0; i < cd->names_found; i++)          for (i = 0; i < cd->names_found; i++)
# Line 6641  for (;; ptr++) Line 6652  for (;; ptr++)
6652          /* If terminator == CHAR_NULL it means that the name followed directly          /* If terminator == CHAR_NULL it means that the name followed directly
6653          after the opening parenthesis [e.g. (?(abc)...] and in this case there          after the opening parenthesis [e.g. (?(abc)...] and in this case there
6654          are some further alternatives to try. For the cases where terminator !=          are some further alternatives to try. For the cases where terminator !=
6655          0 [things like (?(<name>... or (?('name')... or (?(R&name)... ] we have          CHAR_NULL [things like (?(<name>... or (?('name')... or (?(R&name)... ]
6656          now checked all the possibilities, so give an error. */          we have now checked all the possibilities, so give an error. */
6657    
6658          else if (terminator != CHAR_NULL)          else if (terminator != CHAR_NULL)
6659            {            {
# Line 6679  for (;; ptr++) Line 6690  for (;; ptr++)
6690            skipbytes = 1;            skipbytes = 1;
6691            }            }
6692    
6693          /* Check for the "name" actually being a subpattern number. We are          /* Reference to an unidentified subpattern. */
         in the second pass here, so final_bracount is set. */  
   
         else if (recno > 0 && recno <= cd->final_bracount)  
           {  
           PUT2(code, 2+LINK_SIZE, recno);  
           }  
   
         /* Either an unidentified subpattern, or a reference to (?(0) */  
6694    
6695          else          else
6696            {            {
6697            *errorcodeptr = (recno == 0)? ERR35: ERR15;            *errorcodeptr = ERR15;
6698            goto FAILED;            goto FAILED;
6699            }            }
6700          break;          break;
# Line 6811  for (;; ptr++) Line 6814  for (;; ptr++)
6814          terminator = (*ptr == CHAR_LESS_THAN_SIGN)?          terminator = (*ptr == CHAR_LESS_THAN_SIGN)?
6815            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
6816          name = ++ptr;          name = ++ptr;
6817            if (IS_DIGIT(*ptr))
6818              {
6819              *errorcodeptr = ERR84;   /* Group name must start with non-digit */
6820              goto FAILED;
6821              }
6822          while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++;          while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
6823          namelen = (int)(ptr - name);          namelen = (int)(ptr - name);
6824    
# Line 6925  for (;; ptr++) Line 6932  for (;; ptr++)
6932    
6933          NAMED_REF_OR_RECURSE:          NAMED_REF_OR_RECURSE:
6934          name = ++ptr;          name = ++ptr;
6935            if (IS_DIGIT(*ptr))
6936              {
6937              *errorcodeptr = ERR84;   /* Group name must start with non-digit */
6938              goto FAILED;
6939              }
6940          while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++;          while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
6941          namelen = (int)(ptr - name);          namelen = (int)(ptr - name);
6942    
# Line 7576  for (;; ptr++) Line 7588  for (;; ptr++)
7588        if (escape == ESC_g)        if (escape == ESC_g)
7589          {          {
7590          const pcre_uchar *p;          const pcre_uchar *p;
7591            pcre_uint32 cf;
7592    
7593          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */
7594          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
7595            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
7596    
7597          /* These two statements stop the compiler for warning about possibly          /* These two statements stop the compiler for warning about possibly
7598          unset variables caused by the jump to HANDLE_NUMERICAL_RECURSION. In          unset variables caused by the jump to HANDLE_NUMERICAL_RECURSION. In
7599          fact, because we actually check for a number below, the paths that          fact, because we do the check for a number below, the paths that
7600          would actually be in error are never taken. */          would actually be in error are never taken. */
7601    
7602          skipbytes = 0;          skipbytes = 0;
7603          reset_bracount = FALSE;          reset_bracount = FALSE;
7604    
7605          /* Test for a name */          /* If it's not a signed or unsigned number, treat it as a name. */
7606    
7607          if (ptr[1] != CHAR_PLUS && ptr[1] != CHAR_MINUS)          cf = ptr[1];
7608            if (cf != CHAR_PLUS && cf != CHAR_MINUS && !IS_DIGIT(cf))
7609            {            {
           BOOL is_a_number = TRUE;  
           for (p = ptr + 1; *p != CHAR_NULL && *p != (pcre_uchar)terminator; p++)  
             {  
             if (!MAX_255(*p)) { is_a_number = FALSE; break; }  
             if ((cd->ctypes[*p] & ctype_digit) == 0) is_a_number = FALSE;  
             if ((cd->ctypes[*p] & ctype_word) == 0) break;  
             }  
           if (*p != (pcre_uchar)terminator)  
             {  
             *errorcodeptr = ERR57;  
             break;  
             }  
           if (is_a_number)  
             {  
             ptr++;  
             goto HANDLE_NUMERICAL_RECURSION;  
             }  
7610            is_recurse = TRUE;            is_recurse = TRUE;
7611            goto NAMED_REF_OR_RECURSE;            goto NAMED_REF_OR_RECURSE;
7612            }            }
7613    
7614          /* Test a signed number in angle brackets or quotes. */          /* Signed or unsigned number (cf = ptr[1]) is known to be plus or minus
7615            or a digit. */
7616    
7617          p = ptr + 2;          p = ptr + 2;
7618          while (IS_DIGIT(*p)) p++;          while (IS_DIGIT(*p)) p++;

Legend:
Removed from v.1393  
changed lines
  Added in v.1394

  ViewVC Help
Powered by ViewVC 1.1.5