/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1352 by ph10, Mon Jul 29 15:49:21 2013 UTC revision 1359 by ph10, Tue Sep 3 10:10:59 2013 UTC
# Line 115  kicks in at the same number of forward r Line 115  kicks in at the same number of forward r
115  #define COMPILE_WORK_SIZE (2048*LINK_SIZE)  #define COMPILE_WORK_SIZE (2048*LINK_SIZE)
116  #define COMPILE_WORK_SIZE_MAX (100*COMPILE_WORK_SIZE)  #define COMPILE_WORK_SIZE_MAX (100*COMPILE_WORK_SIZE)
117    
118    /* This value determines the size of the initial vector that is used for
119    remembering named groups during the pre-compile. It is allocated on the stack,
120    but if it is too small, it is expanded using malloc(), in a similar way to the
121    workspace. The value is the number of slots in the list. */
122    
123    #define NAMED_GROUP_LIST_SIZE  20
124    
125  /* The overrun tests check for a slightly smaller size so that they detect the  /* The overrun tests check for a slightly smaller size so that they detect the
126  overrun before it actually does run off the end of the data block. */  overrun before it actually does run off the end of the data block. */
127    
# Line 1358  return p; Line 1365  return p;
1365    
1366    
1367  /*************************************************  /*************************************************
 *  Subroutine for finding forward reference      *  
 *************************************************/  
   
 /* This recursive function is called only from find_parens() below. The  
 top-level call starts at the beginning of the pattern. All other calls must  
 start at a parenthesis. It scans along a pattern's text looking for capturing  
 subpatterns, and counting them. If it finds a named pattern that matches the  
 name it is given, it returns its number. Alternatively, if the name is NULL, it  
 returns when it reaches a given numbered subpattern. Recursion is used to keep  
 track of subpatterns that reset the capturing group numbers - the (?| feature.  
   
 This function was originally called only from the second pass, in which we know  
 that if (?< or (?' or (?P< is encountered, the name will be correctly  
 terminated because that is checked in the first pass. There is now one call to  
 this function in the first pass, to check for a recursive back reference by  
 name (so that we can make the whole group atomic). In this case, we need check  
 only up to the current position in the pattern, and that is still OK because  
 and previous occurrences will have been checked. To make this work, the test  
 for "end of pattern" is a check against cd->end_pattern in the main loop,  
 instead of looking for a binary zero. This means that the special first-pass  
 call can adjust cd->end_pattern temporarily. (Checks for binary zero while  
 processing items within the loop are OK, because afterwards the main loop will  
 terminate.)  
   
 Arguments:  
   ptrptr       address of the current character pointer (updated)  
   cd           compile background data  
   name         name to seek, or NULL if seeking a numbered subpattern  
   lorn         name length, or subpattern number if name is NULL  
   xmode        TRUE if we are in /x mode  
   utf          TRUE if we are in UTF-8 / UTF-16 / UTF-32 mode  
   count        pointer to the current capturing subpattern number (updated)  
   
 Returns:       the number of the named subpattern, or -1 if not found  
 */  
   
 static int  
 find_parens_sub(pcre_uchar **ptrptr, compile_data *cd, const pcre_uchar *name, int lorn,  
   BOOL xmode, BOOL utf, int *count)  
 {  
 pcre_uchar *ptr = *ptrptr;  
 int start_count = *count;  
 int hwm_count = start_count;  
 BOOL dup_parens = FALSE;  
   
 /* If the first character is a parenthesis, check on the type of group we are  
 dealing with. The very first call may not start with a parenthesis. */  
   
 if (ptr[0] == CHAR_LEFT_PARENTHESIS)  
   {  
   /* Handle specials such as (*SKIP) or (*UTF8) etc. */  
   
   if (ptr[1] == CHAR_ASTERISK)  
     {  
     ptr += 2;  
     while (ptr < cd->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;  
     }  
   
   /* Handle a normal, unnamed capturing parenthesis. */  
   
   else if (ptr[1] != CHAR_QUESTION_MARK)  
     {  
     *count += 1;  
     if (name == NULL && *count == lorn) return *count;  
     ptr++;  
     }  
   
   /* All cases now have (? at the start. Remember when we are in a group  
   where the parenthesis numbers are duplicated. */  
   
   else if (ptr[2] == CHAR_VERTICAL_LINE)  
     {  
     ptr += 3;  
     dup_parens = TRUE;  
     }  
   
   /* Handle comments; all characters are allowed until a ket is reached. */  
   
   else if (ptr[2] == CHAR_NUMBER_SIGN)  
     {  
     for (ptr += 3; *ptr != CHAR_NULL; ptr++)  
       if (*ptr == CHAR_RIGHT_PARENTHESIS) break;  
     goto FAIL_EXIT;  
     }  
   
   /* Handle a condition. If it is an assertion, just carry on so that it  
   is processed as normal. If not, skip to the closing parenthesis of the  
   condition (there can't be any nested parens). */  
   
   else if (ptr[2] == CHAR_LEFT_PARENTHESIS)  
     {  
     ptr += 2;  
     if (ptr[1] != CHAR_QUESTION_MARK)  
       {  
       while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;  
       if (*ptr != CHAR_NULL) ptr++;  
       }  
     }  
   
   /* Start with (? but not a condition. */  
   
   else  
     {  
     ptr += 2;  
     if (*ptr == CHAR_P) ptr++;                      /* Allow optional P */  
   
     /* We have to disambiguate (?<! and (?<= from (?<name> for named groups */  
   
     if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK &&  
         ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)  
       {  
       pcre_uchar term;  
       const pcre_uchar *thisname;  
       *count += 1;  
       if (name == NULL && *count == lorn) return *count;  
       term = *ptr++;  
       if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN;  
       thisname = ptr;  
       while (*ptr != term) ptr++;  
       if (name != NULL && lorn == (int)(ptr - thisname) &&  
           STRNCMP_UC_UC(name, thisname, (unsigned int)lorn) == 0)  
         return *count;  
       term++;  
       }  
     }  
   }  
   
 /* Past any initial parenthesis handling, scan for parentheses or vertical  
 bars. Stop if we get to cd->end_pattern. Note that this is important for the  
 first-pass call when this value is temporarily adjusted to stop at the current  
 position. So DO NOT change this to a test for binary zero. */  
   
 for (; ptr < cd->end_pattern; ptr++)  
   {  
   /* Skip over backslashed characters and also entire \Q...\E */  
   
   if (*ptr == CHAR_BACKSLASH)  
     {  
     if (*(++ptr) == CHAR_NULL) goto FAIL_EXIT;  
     if (*ptr == CHAR_Q) for (;;)  
       {  
       while (*(++ptr) != CHAR_NULL && *ptr != CHAR_BACKSLASH) {};  
       if (*ptr == CHAR_NULL) goto FAIL_EXIT;  
       if (*(++ptr) == CHAR_E) break;  
       }  
     continue;  
     }  
   
   /* Skip over character classes; this logic must be similar to the way they  
   are handled for real. If the first character is '^', skip it. Also, if the  
   first few characters (either before or after ^) are \Q\E or \E we skip them  
   too. This makes for compatibility with Perl. Note the use of STR macros to  
   encode "Q\\E" so that it works in UTF-8 on EBCDIC platforms. */  
   
   if (*ptr == CHAR_LEFT_SQUARE_BRACKET)  
     {  
     BOOL negate_class = FALSE;  
     for (;;)  
       {  
       if (ptr[1] == CHAR_BACKSLASH)  
         {  
         if (ptr[2] == CHAR_E)  
           ptr+= 2;  
         else if (STRNCMP_UC_C8(ptr + 2,  
                  STR_Q STR_BACKSLASH STR_E, 3) == 0)  
           ptr += 4;  
         else  
           break;  
         }  
       else if (!negate_class && ptr[1] == CHAR_CIRCUMFLEX_ACCENT)  
         {  
         negate_class = TRUE;  
         ptr++;  
         }  
       else break;  
       }  
   
     /* If the next character is ']', it is a data character that must be  
     skipped, except in JavaScript compatibility mode. */  
   
     if (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET &&  
         (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0)  
       ptr++;  
   
     while (*(++ptr) != CHAR_RIGHT_SQUARE_BRACKET)  
       {  
       if (*ptr == CHAR_NULL) return -1;  
       if (*ptr == CHAR_BACKSLASH)  
         {  
         if (*(++ptr) == CHAR_NULL) goto FAIL_EXIT;  
         if (*ptr == CHAR_Q) for (;;)  
           {  
           while (*(++ptr) != CHAR_NULL && *ptr != CHAR_BACKSLASH) {};  
           if (*ptr == CHAR_NULL) goto FAIL_EXIT;  
           if (*(++ptr) == CHAR_E) break;  
           }  
         continue;  
         }  
       }  
     continue;  
     }  
   
   /* Skip comments in /x mode */  
   
   if (xmode && *ptr == CHAR_NUMBER_SIGN)  
     {  
     ptr++;  
     while (*ptr != CHAR_NULL)  
       {  
       if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }  
       ptr++;  
 #ifdef SUPPORT_UTF  
       if (utf) FORWARDCHAR(ptr);  
 #endif  
       }  
     if (*ptr == CHAR_NULL) goto FAIL_EXIT;  
     continue;  
     }  
   
   /* Check for the special metacharacters */  
   
   if (*ptr == CHAR_LEFT_PARENTHESIS)  
     {  
     int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf, count);  
     if (rc > 0) return rc;  
     if (*ptr == CHAR_NULL) goto FAIL_EXIT;  
     }  
   
   else if (*ptr == CHAR_RIGHT_PARENTHESIS)  
     {  
     if (dup_parens && *count < hwm_count) *count = hwm_count;  
     goto FAIL_EXIT;  
     }  
   
   else if (*ptr == CHAR_VERTICAL_LINE && dup_parens)  
     {  
     if (*count > hwm_count) hwm_count = *count;  
     *count = start_count;  
     }  
   }  
   
 FAIL_EXIT:  
 *ptrptr = ptr;  
 return -1;  
 }  
   
   
   
   
 /*************************************************  
 *       Find forward referenced subpattern       *  
 *************************************************/  
   
 /* This function scans along a pattern's text looking for capturing  
 subpatterns, and counting them. If it finds a named pattern that matches the  
 name it is given, it returns its number. Alternatively, if the name is NULL, it  
 returns when it reaches a given numbered subpattern. This is used for forward  
 references to subpatterns. We used to be able to start this scan from the  
 current compiling point, using the current count value from cd->bracount, and  
 do it all in a single loop, but the addition of the possibility of duplicate  
 subpattern numbers means that we have to scan from the very start, in order to  
 take account of such duplicates, and to use a recursive function to keep track  
 of the different types of group.  
   
 Arguments:  
   cd           compile background data  
   name         name to seek, or NULL if seeking a numbered subpattern  
   lorn         name length, or subpattern number if name is NULL  
   xmode        TRUE if we are in /x mode  
   utf          TRUE if we are in UTF-8 / UTF-16 / UTF-32 mode  
   
 Returns:       the number of the found subpattern, or -1 if not found  
 */  
   
 static int  
 find_parens(compile_data *cd, const pcre_uchar *name, int lorn, BOOL xmode,  
   BOOL utf)  
 {  
 pcre_uchar *ptr = (pcre_uchar *)cd->start_pattern;  
 int count = 0;  
 int rc;  
   
 /* If the pattern does not start with an opening parenthesis, the first call  
 to find_parens_sub() will scan right to the end (if necessary). However, if it  
 does start with a parenthesis, find_parens_sub() will return when it hits the  
 matching closing parens. That is why we have to have a loop. */  
   
 for (;;)  
   {  
   rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf, &count);  
   if (rc > 0 || *ptr++ == CHAR_NULL) break;  
   }  
   
 return rc;  
 }  
   
   
   
   
 /*************************************************  
1368  *      Find first significant op code            *  *      Find first significant op code            *
1369  *************************************************/  *************************************************/
1370    
# Line 5949  for (;; ptr++) Line 5656  for (;; ptr++)
5656            slot += cd->name_entry_size;            slot += cd->name_entry_size;
5657            }            }
5658    
5659          /* Found a previous named subpattern */          /* Found the named subpattern */
5660    
5661          if (i < cd->names_found)          if (i < cd->names_found)
5662            {            {
# Line 5958  for (;; ptr++) Line 5665  for (;; ptr++)
5665            code[1+LINK_SIZE]++;            code[1+LINK_SIZE]++;
5666            }            }
5667    
         /* Search the pattern for a forward reference */  
   
         else if ((i = find_parens(cd, name, namelen,  
                         (options & PCRE_EXTENDED) != 0, utf)) > 0)  
           {  
           PUT2(code, 2+LINK_SIZE, i);  
           code[1+LINK_SIZE]++;  
           }  
   
5668          /* If terminator == CHAR_NULL it means that the name followed directly          /* If terminator == CHAR_NULL it means that the name followed directly
5669          after the opening parenthesis [e.g. (?(abc)...] and in this case there          after the opening parenthesis [e.g. (?(abc)...] and in this case there
5670          are some further alternatives to try. For the cases where terminator !=          are some further alternatives to try. For the cases where terminator !=
# Line 6130  for (;; ptr++) Line 5828  for (;; ptr++)
5828          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
5829          DEFINE_NAME:    /* Come here from (?< handling */          DEFINE_NAME:    /* Come here from (?< handling */
5830          case CHAR_APOSTROPHE:          case CHAR_APOSTROPHE:
5831            {          terminator = (*ptr == CHAR_LESS_THAN_SIGN)?
5832            terminator = (*ptr == CHAR_LESS_THAN_SIGN)?            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
5833              CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;          name = ++ptr;
           name = ++ptr;  
5834    
5835            while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++;          while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
5836            namelen = (int)(ptr - name);          namelen = (int)(ptr - name);
5837    
5838            /* In the pre-compile phase, just do a syntax check. */          /* In the pre-compile phase, do a syntax check, remember the longest
5839            name, and then remember the group in a vector, expanding it if
5840            necessary. Duplicates for the same number are skipped; other duplicates
5841            are checked for validity. In the actual compile, there is nothing to
5842            do. */
5843    
5844            if (lengthptr != NULL)          if (lengthptr != NULL)
5845              {
5846              named_group *ng;
5847              pcre_uint32 number = cd->bracount + 1;
5848    
5849              if (*ptr != (pcre_uchar)terminator)
5850              {              {
5851              if (*ptr != (pcre_uchar)terminator)              *errorcodeptr = ERR42;
5852                {              goto FAILED;
5853                *errorcodeptr = ERR42;              }
5854                goto FAILED;  
5855                }            if (cd->names_found >= MAX_NAME_COUNT)
5856              if (cd->names_found >= MAX_NAME_COUNT)              {
5857                *errorcodeptr = ERR49;
5858                goto FAILED;
5859                }
5860    
5861              if (namelen + IMM2_SIZE + 1 > cd->name_entry_size)
5862                {
5863                cd->name_entry_size = namelen + IMM2_SIZE + 1;
5864                if (namelen > MAX_NAME_SIZE)
5865                {                {
5866                *errorcodeptr = ERR49;                *errorcodeptr = ERR48;
5867                goto FAILED;                goto FAILED;
5868                }                }
             if (namelen + IMM2_SIZE + 1 > cd->name_entry_size)  
               {  
               cd->name_entry_size = namelen + IMM2_SIZE + 1;  
               if (namelen > MAX_NAME_SIZE)  
                 {  
                 *errorcodeptr = ERR48;  
                 goto FAILED;  
                 }  
               }  
5869              }              }
5870    
5871            /* In the real compile, create the entry in the table, maintaining            /* Scan the list to check for duplicates. For duplicate names, if the
5872            alphabetical order. Duplicate names for different numbers are            number is the same, break the loop, which causes the name to be
5873            permitted only if PCRE_DUPNAMES is set. Duplicate names for the same            discarded; otherwise, if DUPNAMES is not set, give an error.
5874            number are always OK. (An existing number can be re-used if (?|            If it is set, allow the name with a different number, but continue
5875            appears in the pattern.) In either event, a duplicate name results in            scanning in case this is a duplicate with the same number. For
5876            a duplicate entry in the table, even if the number is the same. This            non-duplicate names, give an error if the number is duplicated. */
5877            is because the number of names, and hence the table size, is computed  
5878            in the pre-compile, and it affects various numbers and pointers which            ng = cd->named_groups;
5879            would all have to be modified, and the compiled code moved down, if            for (i = 0; i < cd->names_found; i++, ng++)
5880            duplicates with the same number were omitted from the table. This              {
5881            doesn't seem worth the hassle. However, *different* names for the              if (namelen == ng->length &&
5882            same number are not permitted. */                  STRNCMP_UC_UC(name, ng->name, namelen) == 0)
5883                  {
5884            else                if (ng->number == number) break;
5885              {                if ((options & PCRE_DUPNAMES) == 0)
             BOOL dupname = FALSE;  
             slot = cd->name_table;  
   
             for (i = 0; i < cd->names_found; i++)  
               {  
               int crc = memcmp(name, slot+IMM2_SIZE, IN_UCHARS(namelen));  
               if (crc == 0)  
                 {  
                 if (slot[IMM2_SIZE+namelen] == 0)  
                   {  
                   if (GET2(slot, 0) != cd->bracount + 1 &&  
                       (options & PCRE_DUPNAMES) == 0)  
                     {  
                     *errorcodeptr = ERR43;  
                     goto FAILED;  
                     }  
                   else dupname = TRUE;  
                   }  
                 else crc = -1;      /* Current name is a substring */  
                 }  
   
               /* Make space in the table and break the loop for an earlier  
               name. For a duplicate or later name, carry on. We do this for  
               duplicates so that in the simple case (when ?(| is not used) they  
               are in order of their numbers. */  
   
               if (crc < 0)  
5886                  {                  {
5887                  memmove(slot + cd->name_entry_size, slot,                  *errorcodeptr = ERR43;
5888                    IN_UCHARS((cd->names_found - i) * cd->name_entry_size));                  goto FAILED;
5889                  break;                  }
5890                  }                }
5891                else if (ng->number == number)
5892                /* Continue the loop for a later or duplicate name */                {
5893                  *errorcodeptr = ERR65;
5894                slot += cd->name_entry_size;                goto FAILED;
5895                }                }
5896                }
             /* For non-duplicate names, check for a duplicate number before  
             adding the new name. */  
5897    
5898              if (!dupname)            if (i >= cd->names_found)     /* Not a duplicate with same number */
5899                {
5900                /* Increase the list size if necessary */
5901    
5902                if (cd->names_found >= cd->named_group_list_size)
5903                {                {
5904                pcre_uchar *cslot = cd->name_table;                int newsize = cd->named_group_list_size * 2;
5905                for (i = 0; i < cd->names_found; i++)                named_group *newspace = (PUBL(malloc))
5906                    (newsize * sizeof(named_group));
5907    
5908                  if (newspace == NULL)
5909                  {                  {
5910                  if (cslot != slot)                  *errorcodeptr = ERR21;
5911                    {                  goto FAILED;
5912                    if (GET2(cslot, 0) == cd->bracount + 1)                  }
5913                      {  
5914                      *errorcodeptr = ERR65;                memcpy(newspace, cd->named_groups,
5915                      goto FAILED;                  cd->named_group_list_size * sizeof(named_group));
5916                      }                if (cd->named_group_list_size > NAMED_GROUP_LIST_SIZE)
5917                    }                  (PUBL(free))((void *)cd->named_groups);
5918                  else i--;                cd->named_groups = newspace;
5919                  cslot += cd->name_entry_size;                cd->named_group_list_size = newsize;
5920                  }                }
5921                }  
5922                cd->named_groups[cd->names_found].name = name;
5923              PUT2(slot, 0, cd->bracount + 1);              cd->named_groups[cd->names_found].length = namelen;
5924              memcpy(slot + IMM2_SIZE, name, IN_UCHARS(namelen));              cd->named_groups[cd->names_found].number = number;
5925              slot[IMM2_SIZE + namelen] = 0;              cd->names_found++;
5926              }              }
5927            }            }
5928    
5929          /* In both pre-compile and compile, count the number of names we've          ptr++;                    /* Move past > or ' in both passes. */
         encountered. */  
   
         cd->names_found++;  
         ptr++;                    /* Move past > or ' */  
5930          goto NUMBERED_GROUP;          goto NUMBERED_GROUP;
5931    
5932    
# Line 6277  for (;; ptr++) Line 5956  for (;; ptr++)
5956    
5957          if (lengthptr != NULL)          if (lengthptr != NULL)
5958            {            {
5959            const pcre_uchar *temp;            named_group *ng;
5960    
5961            if (namelen == 0)            if (namelen == 0)
5962              {              {
5963              *errorcodeptr = ERR62;              *errorcodeptr = ERR62;
# Line 6295  for (;; ptr++) Line 5974  for (;; ptr++)
5974              goto FAILED;              goto FAILED;
5975              }              }
5976    
5977            /* The name table does not exist in the first pass, so we cannot            /* The name table does not exist in the first pass; instead we must
5978            do a simple search as in the code below. Instead, we have to scan the            scan the list of names encountered so far in order to get the
5979            pattern to find the number. It is important that we scan it only as            number. The number may be negative if it is for a name that may be
5980            far as we have got because the syntax of named subpatterns has not            duplicated. If the name is not found, set the value to 0 for a
5981            been checked for the rest of the pattern, and find_parens() assumes            forward reference. */
5982            correct syntax. In any case, it's a waste of resources to scan  
5983            further. We stop the scan at the current point by temporarily            ng = cd->named_groups;
5984            adjusting the value of cd->endpattern. */            for (i = 0; i < cd->names_found; i++, ng++)
5985                {
5986            temp = cd->end_pattern;              if (namelen == ng->length &&
5987            cd->end_pattern = ptr;                  STRNCMP_UC_UC(name, ng->name, namelen) == 0)
5988            recno = find_parens(cd, name, namelen,                break;
5989              (options & PCRE_EXTENDED) != 0, utf);              }
5990            cd->end_pattern = temp;            recno = (i < cd->names_found)? ng->number : 0;
           if (recno < 0) recno = 0;    /* Forward ref; set dummy number */  
5991            }            }
5992    
5993          /* In the real compile, seek the name in the table. We check the name          /* In the real compile, search the name table. We check the name
5994          first, and then check that we have reached the end of the name in the          first, and then check that we have reached the end of the name in the
5995          table. That way, if the name that is longer than any in the table,          table. That way, if the name is longer than any in the table, the
5996          the comparison will fail without reading beyond the table entry. */          comparison will fail without reading beyond the table entry. */
5997    
5998          else          else
5999            {            {
# Line 6328  for (;; ptr++) Line 6006  for (;; ptr++)
6006              slot += cd->name_entry_size;              slot += cd->name_entry_size;
6007              }              }
6008    
6009            if (i < cd->names_found)         /* Back reference */            if (i < cd->names_found)
6010              {              {
6011              recno = GET2(slot, 0);              recno = GET2(slot, 0);
6012              }              }
6013            else if ((recno =                /* Forward back reference */            else
                     find_parens(cd, name, namelen,  
                       (options & PCRE_EXTENDED) != 0, utf)) <= 0)  
6014              {              {
6015              *errorcodeptr = ERR15;              *errorcodeptr = ERR15;
6016              goto FAILED;              goto FAILED;
# Line 6444  for (;; ptr++) Line 6120  for (;; ptr++)
6120    
6121              if (called == NULL)              if (called == NULL)
6122                {                {
6123                if (find_parens(cd, NULL, recno,                if (recno > cd->final_bracount)
                     (options & PCRE_EXTENDED) != 0, utf) < 0)  
6124                  {                  {
6125                  *errorcodeptr = ERR15;                  *errorcodeptr = ERR15;
6126                  goto FAILED;                  goto FAILED;
# Line 7790  return c; Line 7465  return c;
7465    
7466    
7467  /*************************************************  /*************************************************
7468    *     Add an entry to the name/number table      *
7469    *************************************************/
7470    
7471    /* This function is called between compiling passes to add an entry to the
7472    name/number table, maintaining alphabetical order. Checking for permitted
7473    and forbidden duplicates has already been done.
7474    
7475    Arguments:
7476      cd           the compile data block
7477      name         the name to add
7478      length       the length of the name
7479      groupno      the group number
7480    
7481    Returns:       nothing
7482    */
7483    
7484    static void
7485    add_name(compile_data *cd, const pcre_uchar *name, int length,
7486      unsigned int groupno)
7487    {
7488    int i;
7489    pcre_uchar *slot = cd->name_table;
7490    
7491    for (i = 0; i < cd->names_found; i++)
7492      {
7493      int crc = memcmp(name, slot+IMM2_SIZE, IN_UCHARS(length));
7494      if (crc == 0 && slot[IMM2_SIZE+length] != 0)
7495        crc = -1; /* Current name is a substring */
7496    
7497      /* Make space in the table and break the loop for an earlier name. For a
7498      duplicate or later name, carry on. We do this for duplicates so that in the
7499      simple case (when ?(| is not used) they are in order of their numbers. In all
7500      cases they are in the order in which they appear in the pattern. */
7501    
7502      if (crc < 0)
7503        {
7504        memmove(slot + cd->name_entry_size, slot,
7505          IN_UCHARS((cd->names_found - i) * cd->name_entry_size));
7506        break;
7507        }
7508    
7509      /* Continue the loop for a later or duplicate name */
7510    
7511      slot += cd->name_entry_size;
7512      }
7513    
7514    PUT2(slot, 0, groupno);
7515    memcpy(slot + IMM2_SIZE, name, IN_UCHARS(length));
7516    slot[IMM2_SIZE + length] = 0;
7517    cd->names_found++;
7518    }
7519    
7520    
7521    
7522    /*************************************************
7523  *        Compile a Regular Expression            *  *        Compile a Regular Expression            *
7524  *************************************************/  *************************************************/
7525    
# Line 7876  new memory is obtained from malloc(). */ Line 7606  new memory is obtained from malloc(). */
7606    
7607  pcre_uchar cworkspace[COMPILE_WORK_SIZE];  pcre_uchar cworkspace[COMPILE_WORK_SIZE];
7608    
7609    /* This vector is used for remembering name groups during the pre-compile. In a
7610    similar way to cworkspace, it can be expanded using malloc() if necessary. */
7611    
7612    named_group named_groups[NAMED_GROUP_LIST_SIZE];
7613    
7614  /* Set this early so that early errors get offset 0. */  /* Set this early so that early errors get offset 0. */
7615    
7616  ptr = (const pcre_uchar *)pattern;  ptr = (const pcre_uchar *)pattern;
# Line 8142  cd->start_code = cworkspace; Line 7877  cd->start_code = cworkspace;
7877  cd->hwm = cworkspace;  cd->hwm = cworkspace;
7878  cd->start_workspace = cworkspace;  cd->start_workspace = cworkspace;
7879  cd->workspace_size = COMPILE_WORK_SIZE;  cd->workspace_size = COMPILE_WORK_SIZE;
7880    cd->named_groups = named_groups;
7881    cd->named_group_list_size = NAMED_GROUP_LIST_SIZE;
7882  cd->start_pattern = (const pcre_uchar *)pattern;  cd->start_pattern = (const pcre_uchar *)pattern;
7883  cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));  cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));
7884  cd->req_varyopt = 0;  cd->req_varyopt = 0;
# Line 8224  cd->final_bracount = cd->bracount;  /* S Line 7961  cd->final_bracount = cd->bracount;  /* S
7961  cd->assert_depth = 0;  cd->assert_depth = 0;
7962  cd->bracount = 0;  cd->bracount = 0;
7963  cd->max_lookbehind = 0;  cd->max_lookbehind = 0;
 cd->names_found = 0;  
7964  cd->name_table = (pcre_uchar *)re + re->name_table_offset;  cd->name_table = (pcre_uchar *)re + re->name_table_offset;
7965  codestart = cd->name_table + re->name_entry_size * re->name_count;  codestart = cd->name_table + re->name_entry_size * re->name_count;
7966  cd->start_code = codestart;  cd->start_code = codestart;
# Line 8235  cd->had_pruneorskip = FALSE; Line 7971  cd->had_pruneorskip = FALSE;
7971  cd->check_lookbehind = FALSE;  cd->check_lookbehind = FALSE;
7972  cd->open_caps = NULL;  cd->open_caps = NULL;
7973    
7974    /* If any named groups were found, create the name/number table from the list
7975    created in the first pass. */
7976    
7977    if (cd->names_found > 0)
7978      {
7979      int i = cd->names_found;
7980      named_group *ng = cd->named_groups;
7981      cd->names_found = 0;
7982      for (; i > 0; i--, ng++)
7983        add_name(cd, ng->name, ng->length, ng->number);
7984      if (cd->named_group_list_size > NAMED_GROUP_LIST_SIZE)
7985        (PUBL(free))((void *)cd->named_groups);
7986      }
7987    
7988  /* Set up a starting, non-extracting bracket, then compile the expression. On  /* Set up a starting, non-extracting bracket, then compile the expression. On
7989  error, errorcode will be set non-zero, so we don't need to look at the result  error, errorcode will be set non-zero, so we don't need to look at the result
7990  of the function here. */  of the function here. */

Legend:
Removed from v.1352  
changed lines
  Added in v.1359

  ViewVC Help
Powered by ViewVC 1.1.5