/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1348 by ph10, Fri Jul 5 10:38:37 2013 UTC revision 1361 by ph10, Fri Sep 6 17:47:32 2013 UTC
# Line 115  kicks in at the same number of forward r Line 115  kicks in at the same number of forward r
115  #define COMPILE_WORK_SIZE (2048*LINK_SIZE)  #define COMPILE_WORK_SIZE (2048*LINK_SIZE)
116  #define COMPILE_WORK_SIZE_MAX (100*COMPILE_WORK_SIZE)  #define COMPILE_WORK_SIZE_MAX (100*COMPILE_WORK_SIZE)
117    
118    /* This value determines the size of the initial vector that is used for
119    remembering named groups during the pre-compile. It is allocated on the stack,
120    but if it is too small, it is expanded using malloc(), in a similar way to the
121    workspace. The value is the number of slots in the list. */
122    
123    #define NAMED_GROUP_LIST_SIZE  20
124    
125  /* The overrun tests check for a slightly smaller size so that they detect the  /* The overrun tests check for a slightly smaller size so that they detect the
126  overrun before it actually does run off the end of the data block. */  overrun before it actually does run off the end of the data block. */
127    
# Line 1358  return p; Line 1365  return p;
1365    
1366    
1367  /*************************************************  /*************************************************
 *  Subroutine for finding forward reference      *  
 *************************************************/  
   
 /* This recursive function is called only from find_parens() below. The  
 top-level call starts at the beginning of the pattern. All other calls must  
 start at a parenthesis. It scans along a pattern's text looking for capturing  
 subpatterns, and counting them. If it finds a named pattern that matches the  
 name it is given, it returns its number. Alternatively, if the name is NULL, it  
 returns when it reaches a given numbered subpattern. Recursion is used to keep  
 track of subpatterns that reset the capturing group numbers - the (?| feature.  
   
 This function was originally called only from the second pass, in which we know  
 that if (?< or (?' or (?P< is encountered, the name will be correctly  
 terminated because that is checked in the first pass. There is now one call to  
 this function in the first pass, to check for a recursive back reference by  
 name (so that we can make the whole group atomic). In this case, we need check  
 only up to the current position in the pattern, and that is still OK because  
 and previous occurrences will have been checked. To make this work, the test  
 for "end of pattern" is a check against cd->end_pattern in the main loop,  
 instead of looking for a binary zero. This means that the special first-pass  
 call can adjust cd->end_pattern temporarily. (Checks for binary zero while  
 processing items within the loop are OK, because afterwards the main loop will  
 terminate.)  
   
 Arguments:  
   ptrptr       address of the current character pointer (updated)  
   cd           compile background data  
   name         name to seek, or NULL if seeking a numbered subpattern  
   lorn         name length, or subpattern number if name is NULL  
   xmode        TRUE if we are in /x mode  
   utf          TRUE if we are in UTF-8 / UTF-16 / UTF-32 mode  
   count        pointer to the current capturing subpattern number (updated)  
   
 Returns:       the number of the named subpattern, or -1 if not found  
 */  
   
 static int  
 find_parens_sub(pcre_uchar **ptrptr, compile_data *cd, const pcre_uchar *name, int lorn,  
   BOOL xmode, BOOL utf, int *count)  
 {  
 pcre_uchar *ptr = *ptrptr;  
 int start_count = *count;  
 int hwm_count = start_count;  
 BOOL dup_parens = FALSE;  
   
 /* If the first character is a parenthesis, check on the type of group we are  
 dealing with. The very first call may not start with a parenthesis. */  
   
 if (ptr[0] == CHAR_LEFT_PARENTHESIS)  
   {  
   /* Handle specials such as (*SKIP) or (*UTF8) etc. */  
   
   if (ptr[1] == CHAR_ASTERISK)  
     {  
     ptr += 2;  
     while (ptr < cd->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;  
     }  
   
   /* Handle a normal, unnamed capturing parenthesis. */  
   
   else if (ptr[1] != CHAR_QUESTION_MARK)  
     {  
     *count += 1;  
     if (name == NULL && *count == lorn) return *count;  
     ptr++;  
     }  
   
   /* All cases now have (? at the start. Remember when we are in a group  
   where the parenthesis numbers are duplicated. */  
   
   else if (ptr[2] == CHAR_VERTICAL_LINE)  
     {  
     ptr += 3;  
     dup_parens = TRUE;  
     }  
   
   /* Handle comments; all characters are allowed until a ket is reached. */  
   
   else if (ptr[2] == CHAR_NUMBER_SIGN)  
     {  
     for (ptr += 3; *ptr != CHAR_NULL; ptr++)  
       if (*ptr == CHAR_RIGHT_PARENTHESIS) break;  
     goto FAIL_EXIT;  
     }  
   
   /* Handle a condition. If it is an assertion, just carry on so that it  
   is processed as normal. If not, skip to the closing parenthesis of the  
   condition (there can't be any nested parens). */  
   
   else if (ptr[2] == CHAR_LEFT_PARENTHESIS)  
     {  
     ptr += 2;  
     if (ptr[1] != CHAR_QUESTION_MARK)  
       {  
       while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;  
       if (*ptr != CHAR_NULL) ptr++;  
       }  
     }  
   
   /* Start with (? but not a condition. */  
   
   else  
     {  
     ptr += 2;  
     if (*ptr == CHAR_P) ptr++;                      /* Allow optional P */  
   
     /* We have to disambiguate (?<! and (?<= from (?<name> for named groups */  
   
     if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK &&  
         ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)  
       {  
       pcre_uchar term;  
       const pcre_uchar *thisname;  
       *count += 1;  
       if (name == NULL && *count == lorn) return *count;  
       term = *ptr++;  
       if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN;  
       thisname = ptr;  
       while (*ptr != term) ptr++;  
       if (name != NULL && lorn == (int)(ptr - thisname) &&  
           STRNCMP_UC_UC(name, thisname, (unsigned int)lorn) == 0)  
         return *count;  
       term++;  
       }  
     }  
   }  
   
 /* Past any initial parenthesis handling, scan for parentheses or vertical  
 bars. Stop if we get to cd->end_pattern. Note that this is important for the  
 first-pass call when this value is temporarily adjusted to stop at the current  
 position. So DO NOT change this to a test for binary zero. */  
   
 for (; ptr < cd->end_pattern; ptr++)  
   {  
   /* Skip over backslashed characters and also entire \Q...\E */  
   
   if (*ptr == CHAR_BACKSLASH)  
     {  
     if (*(++ptr) == CHAR_NULL) goto FAIL_EXIT;  
     if (*ptr == CHAR_Q) for (;;)  
       {  
       while (*(++ptr) != CHAR_NULL && *ptr != CHAR_BACKSLASH) {};  
       if (*ptr == CHAR_NULL) goto FAIL_EXIT;  
       if (*(++ptr) == CHAR_E) break;  
       }  
     continue;  
     }  
   
   /* Skip over character classes; this logic must be similar to the way they  
   are handled for real. If the first character is '^', skip it. Also, if the  
   first few characters (either before or after ^) are \Q\E or \E we skip them  
   too. This makes for compatibility with Perl. Note the use of STR macros to  
   encode "Q\\E" so that it works in UTF-8 on EBCDIC platforms. */  
   
   if (*ptr == CHAR_LEFT_SQUARE_BRACKET)  
     {  
     BOOL negate_class = FALSE;  
     for (;;)  
       {  
       if (ptr[1] == CHAR_BACKSLASH)  
         {  
         if (ptr[2] == CHAR_E)  
           ptr+= 2;  
         else if (STRNCMP_UC_C8(ptr + 2,  
                  STR_Q STR_BACKSLASH STR_E, 3) == 0)  
           ptr += 4;  
         else  
           break;  
         }  
       else if (!negate_class && ptr[1] == CHAR_CIRCUMFLEX_ACCENT)  
         {  
         negate_class = TRUE;  
         ptr++;  
         }  
       else break;  
       }  
   
     /* If the next character is ']', it is a data character that must be  
     skipped, except in JavaScript compatibility mode. */  
   
     if (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET &&  
         (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0)  
       ptr++;  
   
     while (*(++ptr) != CHAR_RIGHT_SQUARE_BRACKET)  
       {  
       if (*ptr == CHAR_NULL) return -1;  
       if (*ptr == CHAR_BACKSLASH)  
         {  
         if (*(++ptr) == CHAR_NULL) goto FAIL_EXIT;  
         if (*ptr == CHAR_Q) for (;;)  
           {  
           while (*(++ptr) != CHAR_NULL && *ptr != CHAR_BACKSLASH) {};  
           if (*ptr == CHAR_NULL) goto FAIL_EXIT;  
           if (*(++ptr) == CHAR_E) break;  
           }  
         continue;  
         }  
       }  
     continue;  
     }  
   
   /* Skip comments in /x mode */  
   
   if (xmode && *ptr == CHAR_NUMBER_SIGN)  
     {  
     ptr++;  
     while (*ptr != CHAR_NULL)  
       {  
       if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }  
       ptr++;  
 #ifdef SUPPORT_UTF  
       if (utf) FORWARDCHAR(ptr);  
 #endif  
       }  
     if (*ptr == CHAR_NULL) goto FAIL_EXIT;  
     continue;  
     }  
   
   /* Check for the special metacharacters */  
   
   if (*ptr == CHAR_LEFT_PARENTHESIS)  
     {  
     int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf, count);  
     if (rc > 0) return rc;  
     if (*ptr == CHAR_NULL) goto FAIL_EXIT;  
     }  
   
   else if (*ptr == CHAR_RIGHT_PARENTHESIS)  
     {  
     if (dup_parens && *count < hwm_count) *count = hwm_count;  
     goto FAIL_EXIT;  
     }  
   
   else if (*ptr == CHAR_VERTICAL_LINE && dup_parens)  
     {  
     if (*count > hwm_count) hwm_count = *count;  
     *count = start_count;  
     }  
   }  
   
 FAIL_EXIT:  
 *ptrptr = ptr;  
 return -1;  
 }  
   
   
   
   
 /*************************************************  
 *       Find forward referenced subpattern       *  
 *************************************************/  
   
 /* This function scans along a pattern's text looking for capturing  
 subpatterns, and counting them. If it finds a named pattern that matches the  
 name it is given, it returns its number. Alternatively, if the name is NULL, it  
 returns when it reaches a given numbered subpattern. This is used for forward  
 references to subpatterns. We used to be able to start this scan from the  
 current compiling point, using the current count value from cd->bracount, and  
 do it all in a single loop, but the addition of the possibility of duplicate  
 subpattern numbers means that we have to scan from the very start, in order to  
 take account of such duplicates, and to use a recursive function to keep track  
 of the different types of group.  
   
 Arguments:  
   cd           compile background data  
   name         name to seek, or NULL if seeking a numbered subpattern  
   lorn         name length, or subpattern number if name is NULL  
   xmode        TRUE if we are in /x mode  
   utf          TRUE if we are in UTF-8 / UTF-16 / UTF-32 mode  
   
 Returns:       the number of the found subpattern, or -1 if not found  
 */  
   
 static int  
 find_parens(compile_data *cd, const pcre_uchar *name, int lorn, BOOL xmode,  
   BOOL utf)  
 {  
 pcre_uchar *ptr = (pcre_uchar *)cd->start_pattern;  
 int count = 0;  
 int rc;  
   
 /* If the pattern does not start with an opening parenthesis, the first call  
 to find_parens_sub() will scan right to the end (if necessary). However, if it  
 does start with a parenthesis, find_parens_sub() will return when it hits the  
 matching closing parens. That is why we have to have a loop. */  
   
 for (;;)  
   {  
   rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf, &count);  
   if (rc > 0 || *ptr++ == CHAR_NULL) break;  
   }  
   
 return rc;  
 }  
   
   
   
   
 /*************************************************  
1368  *      Find first significant op code            *  *      Find first significant op code            *
1369  *************************************************/  *************************************************/
1370    
# Line 2015  for (;;) Line 1722  for (;;)
1722      case OP_QUERYI:      case OP_QUERYI:
1723      case OP_REF:      case OP_REF:
1724      case OP_REFI:      case OP_REFI:
1725        case OP_DNREF:
1726        case OP_DNREFI:
1727      case OP_SBRA:      case OP_SBRA:
1728      case OP_SBRAPOS:      case OP_SBRAPOS:
1729      case OP_SCBRA:      case OP_SCBRA:
# Line 2361  Returns:      TRUE if what is matched co Line 2070  Returns:      TRUE if what is matched co
2070  typedef struct recurse_check {  typedef struct recurse_check {
2071    struct recurse_check *prev;    struct recurse_check *prev;
2072    const pcre_uchar *group;    const pcre_uchar *group;
2073  } recurse_check;  } recurse_check;
2074    
2075  static BOOL  static BOOL
2076  could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,  could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
# Line 2377  for (code = first_significant_code(code Line 2086  for (code = first_significant_code(code
2086    const pcre_uchar *ccode;    const pcre_uchar *ccode;
2087    
2088    c = *code;    c = *code;
2089    
2090    /* Skip over forward assertions; the other assertions are skipped by    /* Skip over forward assertions; the other assertions are skipped by
2091    first_significant_code() with a TRUE final argument. */    first_significant_code() with a TRUE final argument. */
2092    
# Line 2405  for (code = first_significant_code(code Line 2114  for (code = first_significant_code(code
2114      NULL. */      NULL. */
2115    
2116      if (cd->start_workspace != NULL)      if (cd->start_workspace != NULL)
2117        {        {
2118        const pcre_uchar *tcode;        const pcre_uchar *tcode;
2119        for (tcode = cd->start_workspace; tcode < cd->hwm; tcode += LINK_SIZE)        for (tcode = cd->start_workspace; tcode < cd->hwm; tcode += LINK_SIZE)
2120          if ((int)GET(tcode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;          if ((int)GET(tcode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;
2121        if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */        if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */
2122        }        }
2123    
2124      /* If we are scanning a completed pattern, there are no forward references      /* If we are scanning a completed pattern, there are no forward references
2125      and all groups are complete. We need to detect whether this is a recursive      and all groups are complete. We need to detect whether this is a recursive
2126      call, as otherwise there will be an infinite loop. If it is a recursion,      call, as otherwise there will be an infinite loop. If it is a recursion,
2127      just skip over it. Simple recursions are easily detected. For mutual      just skip over it. Simple recursions are easily detected. For mutual
2128      recursions we keep a chain on the stack. */      recursions we keep a chain on the stack. */
2129    
2130      else      else
2131        {        {
2132        recurse_check *r = recurses;        recurse_check *r = recurses;
2133        const pcre_uchar *endgroup = scode;        const pcre_uchar *endgroup = scode;
2134    
2135        do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT);        do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT);
2136        if (code >= scode && code <= endgroup) continue;  /* Simple recursion */        if (code >= scode && code <= endgroup) continue;  /* Simple recursion */
2137    
2138        for (r = recurses; r != NULL; r = r->prev)        for (r = recurses; r != NULL; r = r->prev)
2139          if (r->group == scode) break;          if (r->group == scode) break;
2140        if (r != NULL) continue;   /* Mutual recursion */        if (r != NULL) continue;   /* Mutual recursion */
# Line 2436  for (code = first_significant_code(code Line 2145  for (code = first_significant_code(code
2145    
2146      empty_branch = FALSE;      empty_branch = FALSE;
2147      this_recurse.prev = recurses;      this_recurse.prev = recurses;
2148      this_recurse.group = scode;      this_recurse.group = scode;
2149    
2150      do      do
2151        {        {
2152        if (could_be_empty_branch(scode, endcode, utf, cd, &this_recurse))        if (could_be_empty_branch(scode, endcode, utf, cd, &this_recurse))
# Line 2557  for (code = first_significant_code(code Line 2266  for (code = first_significant_code(code
2266      case OP_ANY:      case OP_ANY:
2267      case OP_ALLANY:      case OP_ALLANY:
2268      case OP_ANYBYTE:      case OP_ANYBYTE:
2269    
2270      case OP_PROP:      case OP_PROP:
2271      case OP_NOTPROP:      case OP_NOTPROP:
2272      case OP_ANYNL:      case OP_ANYNL:
2273    
2274      case OP_NOT_HSPACE:      case OP_NOT_HSPACE:
2275      case OP_HSPACE:      case OP_HSPACE:
2276      case OP_NOT_VSPACE:      case OP_NOT_VSPACE:
2277      case OP_VSPACE:      case OP_VSPACE:
2278      case OP_EXTUNI:      case OP_EXTUNI:
2279    
2280      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
2281      case OP_DIGIT:      case OP_DIGIT:
2282      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
2283      case OP_WHITESPACE:      case OP_WHITESPACE:
2284      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
2285      case OP_WORDCHAR:      case OP_WORDCHAR:
2286    
2287      case OP_CHAR:      case OP_CHAR:
2288      case OP_CHARI:      case OP_CHARI:
2289      case OP_NOT:      case OP_NOT:
2290      case OP_NOTI:      case OP_NOTI:
2291    
2292      case OP_PLUS:      case OP_PLUS:
2293      case OP_PLUSI:      case OP_PLUSI:
2294      case OP_MINPLUS:      case OP_MINPLUS:
# Line 2589  for (code = first_significant_code(code Line 2298  for (code = first_significant_code(code
2298      case OP_NOTPLUSI:      case OP_NOTPLUSI:
2299      case OP_NOTMINPLUS:      case OP_NOTMINPLUS:
2300      case OP_NOTMINPLUSI:      case OP_NOTMINPLUSI:
2301    
2302      case OP_POSPLUS:      case OP_POSPLUS:
2303      case OP_POSPLUSI:      case OP_POSPLUSI:
2304      case OP_NOTPOSPLUS:      case OP_NOTPOSPLUS:
2305      case OP_NOTPOSPLUSI:      case OP_NOTPOSPLUSI:
2306    
2307      case OP_EXACT:      case OP_EXACT:
2308      case OP_EXACTI:      case OP_EXACTI:
2309      case OP_NOTEXACT:      case OP_NOTEXACT:
2310      case OP_NOTEXACTI:      case OP_NOTEXACTI:
2311    
2312      case OP_TYPEPLUS:      case OP_TYPEPLUS:
2313      case OP_TYPEMINPLUS:      case OP_TYPEMINPLUS:
2314      case OP_TYPEPOSPLUS:      case OP_TYPEPOSPLUS:
2315      case OP_TYPEEXACT:      case OP_TYPEEXACT:
2316    
2317      return FALSE;      return FALSE;
2318    
2319      /* These are going to continue, as they may be empty, but we have to      /* These are going to continue, as they may be empty, but we have to
# Line 2644  for (code = first_significant_code(code Line 2353  for (code = first_significant_code(code
2353  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2354      case OP_STAR:      case OP_STAR:
2355      case OP_STARI:      case OP_STARI:
2356      case OP_NOTSTAR:      case OP_NOTSTAR:
2357      case OP_NOTSTARI:      case OP_NOTSTARI:
2358    
2359      case OP_MINSTAR:      case OP_MINSTAR:
2360      case OP_MINSTARI:      case OP_MINSTARI:
2361      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
2362      case OP_NOTMINSTARI:      case OP_NOTMINSTARI:
2363    
2364      case OP_POSSTAR:      case OP_POSSTAR:
2365      case OP_POSSTARI:      case OP_POSSTARI:
2366      case OP_NOTPOSSTAR:      case OP_NOTPOSSTAR:
2367      case OP_NOTPOSSTARI:      case OP_NOTPOSSTARI:
2368    
2369      case OP_QUERY:      case OP_QUERY:
2370      case OP_QUERYI:      case OP_QUERYI:
2371      case OP_NOTQUERY:      case OP_NOTQUERY:
2372      case OP_NOTQUERYI:      case OP_NOTQUERYI:
2373    
2374      case OP_MINQUERY:      case OP_MINQUERY:
2375      case OP_MINQUERYI:      case OP_MINQUERYI:
2376      case OP_NOTMINQUERY:      case OP_NOTMINQUERY:
2377      case OP_NOTMINQUERYI:      case OP_NOTMINQUERYI:
2378    
2379      case OP_POSQUERY:      case OP_POSQUERY:
2380      case OP_POSQUERYI:      case OP_POSQUERYI:
2381      case OP_NOTPOSQUERY:      case OP_NOTPOSQUERY:
2382      case OP_NOTPOSQUERYI:      case OP_NOTPOSQUERYI:
2383    
2384      if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);      if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);
2385      break;      break;
2386    
2387      case OP_UPTO:      case OP_UPTO:
2388      case OP_UPTOI:      case OP_UPTOI:
2389      case OP_NOTUPTO:      case OP_NOTUPTO:
2390      case OP_NOTUPTOI:      case OP_NOTUPTOI:
2391    
2392      case OP_MINUPTO:      case OP_MINUPTO:
2393      case OP_MINUPTOI:      case OP_MINUPTOI:
2394      case OP_NOTMINUPTO:      case OP_NOTMINUPTO:
2395      case OP_NOTMINUPTOI:      case OP_NOTMINUPTOI:
2396    
2397      case OP_POSUPTO:      case OP_POSUPTO:
2398      case OP_POSUPTOI:      case OP_POSUPTOI:
2399      case OP_NOTPOSUPTO:      case OP_NOTPOSUPTO:
2400      case OP_NOTPOSUPTOI:      case OP_NOTPOSUPTOI:
2401    
2402      if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);      if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);
2403      break;      break;
2404  #endif  #endif
# Line 3754  to find out the amount of memory needed, Line 3463  to find out the amount of memory needed,
3463  phase. The value of lengthptr distinguishes the two phases.  phase. The value of lengthptr distinguishes the two phases.
3464    
3465  Arguments:  Arguments:
3466    optionsptr     pointer to the option bits    optionsptr        pointer to the option bits
3467    codeptr        points to the pointer to the current code point    codeptr           points to the pointer to the current code point
3468    ptrptr         points to the current pattern pointer    ptrptr            points to the current pattern pointer
3469    errorcodeptr   points to error code variable    errorcodeptr      points to error code variable
3470    firstcharptr    place to put the first required character    firstcharptr      place to put the first required character
3471    firstcharflagsptr place to put the first character flags, or a negative number    firstcharflagsptr place to put the first character flags, or a negative number
3472    reqcharptr     place to put the last required character    reqcharptr        place to put the last required character
3473    reqcharflagsptr place to put the last required character flags, or a negative number    reqcharflagsptr   place to put the last required character flags, or a negative number
3474    bcptr          points to current branch chain    bcptr             points to current branch chain
3475    cond_depth     conditional nesting depth    cond_depth        conditional nesting depth
3476    cd             contains pointers to tables etc.    cd                contains pointers to tables etc.
3477    lengthptr      NULL during the real compile phase    lengthptr         NULL during the real compile phase
3478                   points to length accumulator during pre-compile phase                      points to length accumulator during pre-compile phase
3479    
3480  Returns:         TRUE on success  Returns:            TRUE on success
3481                   FALSE, with *errorcodeptr set non-zero on error                      FALSE, with *errorcodeptr set non-zero on error
3482  */  */
3483    
3484  static BOOL  static BOOL
# Line 5119  for (;; ptr++) Line 4828  for (;; ptr++)
4828      /* If previous was a character class or a back reference, we put the repeat      /* If previous was a character class or a back reference, we put the repeat
4829      stuff after it, but just skip the item if the repeat was {0,0}. */      stuff after it, but just skip the item if the repeat was {0,0}. */
4830    
4831      else if (*previous == OP_CLASS ||      else if (*previous == OP_CLASS || *previous == OP_NCLASS ||
              *previous == OP_NCLASS ||  
4832  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4833               *previous == OP_XCLASS ||               *previous == OP_XCLASS ||
4834  #endif  #endif
4835               *previous == OP_REF ||               *previous == OP_REF   || *previous == OP_REFI ||
4836               *previous == OP_REFI)               *previous == OP_DNREF || *previous == OP_DNREFI)
4837        {        {
4838        if (repeat_max == 0)        if (repeat_max == 0)
4839          {          {
# Line 5949  for (;; ptr++) Line 5657  for (;; ptr++)
5657            slot += cd->name_entry_size;            slot += cd->name_entry_size;
5658            }            }
5659    
5660          /* Found a previous named subpattern */          /* Found the named subpattern */
5661    
5662          if (i < cd->names_found)          if (i < cd->names_found)
5663            {            {
# Line 5958  for (;; ptr++) Line 5666  for (;; ptr++)
5666            code[1+LINK_SIZE]++;            code[1+LINK_SIZE]++;
5667            }            }
5668    
         /* Search the pattern for a forward reference */  
   
         else if ((i = find_parens(cd, name, namelen,  
                         (options & PCRE_EXTENDED) != 0, utf)) > 0)  
           {  
           PUT2(code, 2+LINK_SIZE, i);  
           code[1+LINK_SIZE]++;  
           }  
   
5669          /* If terminator == CHAR_NULL it means that the name followed directly          /* If terminator == CHAR_NULL it means that the name followed directly
5670          after the opening parenthesis [e.g. (?(abc)...] and in this case there          after the opening parenthesis [e.g. (?(abc)...] and in this case there
5671          are some further alternatives to try. For the cases where terminator !=          are some further alternatives to try. For the cases where terminator !=
# Line 6130  for (;; ptr++) Line 5829  for (;; ptr++)
5829          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
5830          DEFINE_NAME:    /* Come here from (?< handling */          DEFINE_NAME:    /* Come here from (?< handling */
5831          case CHAR_APOSTROPHE:          case CHAR_APOSTROPHE:
5832            {          terminator = (*ptr == CHAR_LESS_THAN_SIGN)?
5833            terminator = (*ptr == CHAR_LESS_THAN_SIGN)?            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
5834              CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;          name = ++ptr;
           name = ++ptr;  
5835    
5836            while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++;          while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
5837            namelen = (int)(ptr - name);          namelen = (int)(ptr - name);
5838    
5839            /* In the pre-compile phase, just do a syntax check. */          /* In the pre-compile phase, do a syntax check, remember the longest
5840            name, and then remember the group in a vector, expanding it if
5841            necessary. Duplicates for the same number are skipped; other duplicates
5842            are checked for validity. In the actual compile, there is nothing to
5843            do. */
5844    
5845            if (lengthptr != NULL)          if (lengthptr != NULL)
5846              {
5847              named_group *ng;
5848              pcre_uint32 number = cd->bracount + 1;
5849    
5850              if (*ptr != (pcre_uchar)terminator)
5851              {              {
5852              if (*ptr != (pcre_uchar)terminator)              *errorcodeptr = ERR42;
5853                {              goto FAILED;
5854                *errorcodeptr = ERR42;              }
5855                goto FAILED;  
5856                }            if (cd->names_found >= MAX_NAME_COUNT)
5857              if (cd->names_found >= MAX_NAME_COUNT)              {
5858                *errorcodeptr = ERR49;
5859                goto FAILED;
5860                }
5861    
5862              if (namelen + IMM2_SIZE + 1 > cd->name_entry_size)
5863                {
5864                cd->name_entry_size = namelen + IMM2_SIZE + 1;
5865                if (namelen > MAX_NAME_SIZE)
5866                {                {
5867                *errorcodeptr = ERR49;                *errorcodeptr = ERR48;
5868                goto FAILED;                goto FAILED;
5869                }                }
             if (namelen + IMM2_SIZE + 1 > cd->name_entry_size)  
               {  
               cd->name_entry_size = namelen + IMM2_SIZE + 1;  
               if (namelen > MAX_NAME_SIZE)  
                 {  
                 *errorcodeptr = ERR48;  
                 goto FAILED;  
                 }  
               }  
5870              }              }
5871    
5872            /* In the real compile, create the entry in the table, maintaining            /* Scan the list to check for duplicates. For duplicate names, if the
5873            alphabetical order. Duplicate names for different numbers are            number is the same, break the loop, which causes the name to be
5874            permitted only if PCRE_DUPNAMES is set. Duplicate names for the same            discarded; otherwise, if DUPNAMES is not set, give an error.
5875            number are always OK. (An existing number can be re-used if (?|            If it is set, allow the name with a different number, but continue
5876            appears in the pattern.) In either event, a duplicate name results in            scanning in case this is a duplicate with the same number. For
5877            a duplicate entry in the table, even if the number is the same. This            non-duplicate names, give an error if the number is duplicated. */
5878            is because the number of names, and hence the table size, is computed  
5879            in the pre-compile, and it affects various numbers and pointers which            ng = cd->named_groups;
5880            would all have to be modified, and the compiled code moved down, if            for (i = 0; i < cd->names_found; i++, ng++)
5881            duplicates with the same number were omitted from the table. This              {
5882            doesn't seem worth the hassle. However, *different* names for the              if (namelen == ng->length &&
5883            same number are not permitted. */                  STRNCMP_UC_UC(name, ng->name, namelen) == 0)
5884                  {
5885            else                if (ng->number == number) break;
5886              {                if ((options & PCRE_DUPNAMES) == 0)
             BOOL dupname = FALSE;  
             slot = cd->name_table;  
   
             for (i = 0; i < cd->names_found; i++)  
               {  
               int crc = memcmp(name, slot+IMM2_SIZE, IN_UCHARS(namelen));  
               if (crc == 0)  
5887                  {                  {
5888                  if (slot[IMM2_SIZE+namelen] == 0)                  *errorcodeptr = ERR43;
5889                    {                  goto FAILED;
                   if (GET2(slot, 0) != cd->bracount + 1 &&  
                       (options & PCRE_DUPNAMES) == 0)  
                     {  
                     *errorcodeptr = ERR43;  
                     goto FAILED;  
                     }  
                   else dupname = TRUE;  
                   }  
                 else crc = -1;      /* Current name is a substring */  
                 }  
   
               /* Make space in the table and break the loop for an earlier  
               name. For a duplicate or later name, carry on. We do this for  
               duplicates so that in the simple case (when ?(| is not used) they  
               are in order of their numbers. */  
   
               if (crc < 0)  
                 {  
                 memmove(slot + cd->name_entry_size, slot,  
                   IN_UCHARS((cd->names_found - i) * cd->name_entry_size));  
                 break;  
5890                  }                  }
5891                  cd->dupnames = TRUE;  /* Duplicate names exist */
5892                  }
5893                else if (ng->number == number)
5894                  {
5895                  *errorcodeptr = ERR65;
5896                  goto FAILED;
5897                  }
5898                }
5899    
5900                /* Continue the loop for a later or duplicate name */            if (i >= cd->names_found)     /* Not a duplicate with same number */
5901                {
5902                slot += cd->name_entry_size;              /* Increase the list size if necessary */
5903                }  
5904                if (cd->names_found >= cd->named_group_list_size)
             /* For non-duplicate names, check for a duplicate number before  
             adding the new name. */  
   
             if (!dupname)  
5905                {                {
5906                pcre_uchar *cslot = cd->name_table;                int newsize = cd->named_group_list_size * 2;
5907                for (i = 0; i < cd->names_found; i++)                named_group *newspace = (PUBL(malloc))
5908                    (newsize * sizeof(named_group));
5909    
5910                  if (newspace == NULL)
5911                  {                  {
5912                  if (cslot != slot)                  *errorcodeptr = ERR21;
5913                    {                  goto FAILED;
5914                    if (GET2(cslot, 0) == cd->bracount + 1)                  }
5915                      {  
5916                      *errorcodeptr = ERR65;                memcpy(newspace, cd->named_groups,
5917                      goto FAILED;                  cd->named_group_list_size * sizeof(named_group));
5918                      }                if (cd->named_group_list_size > NAMED_GROUP_LIST_SIZE)
5919                    }                  (PUBL(free))((void *)cd->named_groups);
5920                  else i--;                cd->named_groups = newspace;
5921                  cslot += cd->name_entry_size;                cd->named_group_list_size = newsize;
5922                  }                }
5923                }  
5924                cd->named_groups[cd->names_found].name = name;
5925              PUT2(slot, 0, cd->bracount + 1);              cd->named_groups[cd->names_found].length = namelen;
5926              memcpy(slot + IMM2_SIZE, name, IN_UCHARS(namelen));              cd->named_groups[cd->names_found].number = number;
5927              slot[IMM2_SIZE + namelen] = 0;              cd->names_found++;
5928              }              }
5929            }            }
5930    
5931          /* In both pre-compile and compile, count the number of names we've          ptr++;                    /* Move past > or ' in both passes. */
         encountered. */  
   
         cd->names_found++;  
         ptr++;                    /* Move past > or ' */  
5932          goto NUMBERED_GROUP;          goto NUMBERED_GROUP;
5933    
5934    
# Line 6277  for (;; ptr++) Line 5958  for (;; ptr++)
5958    
5959          if (lengthptr != NULL)          if (lengthptr != NULL)
5960            {            {
5961            const pcre_uchar *temp;            named_group *ng;
5962    
5963            if (namelen == 0)            if (namelen == 0)
5964              {              {
5965              *errorcodeptr = ERR62;              *errorcodeptr = ERR62;
# Line 6295  for (;; ptr++) Line 5976  for (;; ptr++)
5976              goto FAILED;              goto FAILED;
5977              }              }
5978    
5979            /* The name table does not exist in the first pass, so we cannot            /* The name table does not exist in the first pass; instead we must
5980            do a simple search as in the code below. Instead, we have to scan the            scan the list of names encountered so far in order to get the
5981            pattern to find the number. It is important that we scan it only as            number. If the name is not found, set the value to 0 for a forward
5982            far as we have got because the syntax of named subpatterns has not            reference. */
5983            been checked for the rest of the pattern, and find_parens() assumes  
5984            correct syntax. In any case, it's a waste of resources to scan            ng = cd->named_groups;
5985            further. We stop the scan at the current point by temporarily            for (i = 0; i < cd->names_found; i++, ng++)
5986            adjusting the value of cd->endpattern. */              {
5987                if (namelen == ng->length &&
5988            temp = cd->end_pattern;                  STRNCMP_UC_UC(name, ng->name, namelen) == 0)
5989            cd->end_pattern = ptr;                break;
5990            recno = find_parens(cd, name, namelen,              }
5991              (options & PCRE_EXTENDED) != 0, utf);            recno = (i < cd->names_found)? ng->number : 0;
5992            cd->end_pattern = temp;  
5993            if (recno < 0) recno = 0;    /* Forward ref; set dummy number */            /* Count named back references. */
5994    
5995              if (!is_recurse) cd->namedrefcount++;
5996            }            }
5997    
5998          /* In the real compile, seek the name in the table. We check the name          /* In the real compile, search the name table. We check the name
5999          first, and then check that we have reached the end of the name in the          first, and then check that we have reached the end of the name in the
6000          table. That way, if the name that is longer than any in the table,          table. That way, if the name is longer than any in the table, the
6001          the comparison will fail without reading beyond the table entry. */          comparison will fail without reading beyond the table entry. */
6002    
6003          else          else
6004            {            {
# Line 6328  for (;; ptr++) Line 6011  for (;; ptr++)
6011              slot += cd->name_entry_size;              slot += cd->name_entry_size;
6012              }              }
6013    
6014            if (i < cd->names_found)         /* Back reference */            if (i < cd->names_found)
6015              {              {
6016              recno = GET2(slot, 0);              recno = GET2(slot, 0);
6017              }              }
6018            else if ((recno =                /* Forward back reference */            else
                     find_parens(cd, name, namelen,  
                       (options & PCRE_EXTENDED) != 0, utf)) <= 0)  
6019              {              {
6020              *errorcodeptr = ERR15;              *errorcodeptr = ERR15;
6021              goto FAILED;              goto FAILED;
6022              }              }
6023            }            }
6024    
6025          /* In both phases, we can now go to the code than handles numerical          /* In both phases, for recursions, we can now go to the code than
6026          recursion or backreferences. */          handles numerical recursion. */
6027    
6028          if (is_recurse) goto HANDLE_RECURSION;          if (is_recurse) goto HANDLE_RECURSION;
6029            else goto HANDLE_REFERENCE;  
6030            /* In the second pass we must see if the name is duplicated. If so, we
6031            generate a different opcode. */
6032    
6033            if (lengthptr == NULL && cd->dupnames)
6034              {
6035              int count = 1;
6036              unsigned int index = i;
6037              pcre_uchar *cslot = slot + cd->name_entry_size;
6038    
6039              for (i++; i < cd->names_found; i++)
6040                {
6041                if (STRCMP_UC_UC(slot + IMM2_SIZE, cslot + IMM2_SIZE) != 0) break;
6042                count++;
6043                cslot += cd->name_entry_size;
6044                }
6045    
6046              if (count > 1)
6047                {
6048                if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
6049                previous = code;
6050                *code++ = ((options & PCRE_CASELESS) != 0)? OP_DNREFI : OP_DNREF;
6051                PUT2INC(code, 0, index);
6052                PUT2INC(code, 0, count);
6053    
6054                /* Process each potentially referenced group. */
6055    
6056                for (; slot < cslot; slot += cd->name_entry_size)
6057                  {
6058                  open_capitem *oc;
6059                  recno = GET2(slot, 0);
6060                  cd->backref_map |= (recno < 32)? (1 << recno) : 1;
6061                  if (recno > cd->top_backref) cd->top_backref = recno;
6062    
6063                  /* Check to see if this back reference is recursive, that it, it
6064                  is inside the group that it references. A flag is set so that the
6065                  group can be made atomic. */
6066    
6067                  for (oc = cd->open_caps; oc != NULL; oc = oc->next)
6068                    {
6069                    if (oc->number == recno)
6070                      {
6071                      oc->flag = TRUE;
6072                      break;
6073                      }
6074                    }
6075                  }
6076    
6077                continue;  /* End of back ref handling */
6078                }
6079              }
6080    
6081            /* First pass, or a non-duplicated name. */
6082    
6083            goto HANDLE_REFERENCE;
6084    
6085    
6086          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
6087          case CHAR_R:              /* Recursion */          case CHAR_R:              /* Recursion */
# Line 6444  for (;; ptr++) Line 6179  for (;; ptr++)
6179    
6180              if (called == NULL)              if (called == NULL)
6181                {                {
6182                if (find_parens(cd, NULL, recno,                if (recno > cd->final_bracount)
                     (options & PCRE_EXTENDED) != 0, utf) < 0)  
6183                  {                  {
6184                  *errorcodeptr = ERR15;                  *errorcodeptr = ERR15;
6185                  goto FAILED;                  goto FAILED;
# Line 6928  for (;; ptr++) Line 6662  for (;; ptr++)
6662          {          {
6663          open_capitem *oc;          open_capitem *oc;
6664          recno = -escape;          recno = -escape;
6665    
6666            /* Come here from named backref handling when the reference is to a
6667            single group (i.e. not to a duplicated name. */
6668    
6669          HANDLE_REFERENCE:    /* Come here from named backref handling */          HANDLE_REFERENCE:
6670          if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;          if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
6671          previous = code;          previous = code;
6672          *code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF;          *code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF;
# Line 7058  for (;; ptr++) Line 6795  for (;; ptr++)
6795          *code++ = OP_PROP;          *code++ = OP_PROP;
6796          *code++ = PT_CLIST;          *code++ = PT_CLIST;
6797          *code++ = c;          *code++ = c;
6798          if (firstcharflags == REQ_UNSET) firstcharflags = zerofirstcharflags = REQ_NONE;          if (firstcharflags == REQ_UNSET)
6799              firstcharflags = zerofirstcharflags = REQ_NONE;
6800          break;          break;
6801          }          }
6802        }        }
# Line 7147  out the amount of memory needed, as well Line 6885  out the amount of memory needed, as well
6885  value of lengthptr distinguishes the two phases.  value of lengthptr distinguishes the two phases.
6886    
6887  Arguments:  Arguments:
6888    options        option bits, including any changes for this subpattern    options           option bits, including any changes for this subpattern
6889    codeptr        -> the address of the current code pointer    codeptr           -> the address of the current code pointer
6890    ptrptr         -> the address of the current pattern pointer    ptrptr            -> the address of the current pattern pointer
6891    errorcodeptr   -> pointer to error code variable    errorcodeptr      -> pointer to error code variable
6892    lookbehind     TRUE if this is a lookbehind assertion    lookbehind        TRUE if this is a lookbehind assertion
6893    reset_bracount TRUE to reset the count for each branch    reset_bracount    TRUE to reset the count for each branch
6894    skipbytes      skip this many bytes at start (for brackets and OP_COND)    skipbytes         skip this many bytes at start (for brackets and OP_COND)
6895    cond_depth     depth of nesting for conditional subpatterns    cond_depth        depth of nesting for conditional subpatterns
6896    firstcharptr    place to put the first required character    firstcharptr      place to put the first required character
6897    firstcharflagsptr place to put the first character flags, or a negative number    firstcharflagsptr place to put the first character flags, or a negative number
6898    reqcharptr     place to put the last required character    reqcharptr        place to put the last required character
6899    reqcharflagsptr place to put the last required character flags, or a negative number    reqcharflagsptr   place to put the last required character flags, or a negative number
6900    bcptr          pointer to the chain of currently open branches    bcptr             pointer to the chain of currently open branches
6901    cd             points to the data block with tables pointers etc.    cd                points to the data block with tables pointers etc.
6902    lengthptr      NULL during the real compile phase    lengthptr         NULL during the real compile phase
6903                   points to length accumulator during pre-compile phase                      points to length accumulator during pre-compile phase
6904    
6905  Returns:         TRUE on success  Returns:            TRUE on success
6906  */  */
6907    
6908  static BOOL  static BOOL
# Line 7701  return TRUE; Line 7439  return TRUE;
7439  discarded, because they can cause conflicts with actual literals that follow.  discarded, because they can cause conflicts with actual literals that follow.
7440  However, if we end up without a first char setting for an unanchored pattern,  However, if we end up without a first char setting for an unanchored pattern,
7441  it is worth scanning the regex to see if there is an initial asserted first  it is worth scanning the regex to see if there is an initial asserted first
7442  char. If all branches start with the same asserted char, or with a bracket all  char. If all branches start with the same asserted char, or with a
7443  of whose alternatives start with the same asserted char (recurse ad lib), then  non-conditional bracket all of whose alternatives start with the same asserted
7444  we return that char, otherwise -1.  char (recurse ad lib), then we return that char, with the flags set to zero or
7445    REQ_CASELESS; otherwise return zero with REQ_NONE in the flags.
7446    
7447  Arguments:  Arguments:
7448    code       points to start of expression (the bracket)    code       points to start of expression (the bracket)
7449    flags       points to the first char flags, or to REQ_NONE    flags      points to the first char flags, or to REQ_NONE
7450    inassert   TRUE if in an assertion    inassert   TRUE if in an assertion
7451    
7452  Returns:     the fixed first char, or 0 with REQ_NONE in flags  Returns:     the fixed first char, or 0 with REQ_NONE in flags
# Line 7744  do { Line 7483  do {
7483       case OP_ASSERT:       case OP_ASSERT:
7484       case OP_ONCE:       case OP_ONCE:
7485       case OP_ONCE_NC:       case OP_ONCE_NC:
      case OP_COND:  
7486       d = find_firstassertedchar(scode, &dflags, op == OP_ASSERT);       d = find_firstassertedchar(scode, &dflags, op == OP_ASSERT);
7487       if (dflags < 0)       if (dflags < 0)
7488         return 0;         return 0;
# Line 7789  return c; Line 7527  return c;
7527    
7528    
7529  /*************************************************  /*************************************************
7530    *     Add an entry to the name/number table      *
7531    *************************************************/
7532    
7533    /* This function is called between compiling passes to add an entry to the
7534    name/number table, maintaining alphabetical order. Checking for permitted
7535    and forbidden duplicates has already been done.
7536    
7537    Arguments:
7538      cd           the compile data block
7539      name         the name to add
7540      length       the length of the name
7541      groupno      the group number
7542    
7543    Returns:       nothing
7544    */
7545    
7546    static void
7547    add_name(compile_data *cd, const pcre_uchar *name, int length,
7548      unsigned int groupno)
7549    {
7550    int i;
7551    pcre_uchar *slot = cd->name_table;
7552    
7553    for (i = 0; i < cd->names_found; i++)
7554      {
7555      int crc = memcmp(name, slot+IMM2_SIZE, IN_UCHARS(length));
7556      if (crc == 0 && slot[IMM2_SIZE+length] != 0)
7557        crc = -1; /* Current name is a substring */
7558    
7559      /* Make space in the table and break the loop for an earlier name. For a
7560      duplicate or later name, carry on. We do this for duplicates so that in the
7561      simple case (when ?(| is not used) they are in order of their numbers. In all
7562      cases they are in the order in which they appear in the pattern. */
7563    
7564      if (crc < 0)
7565        {
7566        memmove(slot + cd->name_entry_size, slot,
7567          IN_UCHARS((cd->names_found - i) * cd->name_entry_size));
7568        break;
7569        }
7570    
7571      /* Continue the loop for a later or duplicate name */
7572    
7573      slot += cd->name_entry_size;
7574      }
7575    
7576    PUT2(slot, 0, groupno);
7577    memcpy(slot + IMM2_SIZE, name, IN_UCHARS(length));
7578    slot[IMM2_SIZE + length] = 0;
7579    cd->names_found++;
7580    }
7581    
7582    
7583    
7584    /*************************************************
7585  *        Compile a Regular Expression            *  *        Compile a Regular Expression            *
7586  *************************************************/  *************************************************/
7587    
# Line 7875  new memory is obtained from malloc(). */ Line 7668  new memory is obtained from malloc(). */
7668    
7669  pcre_uchar cworkspace[COMPILE_WORK_SIZE];  pcre_uchar cworkspace[COMPILE_WORK_SIZE];
7670    
7671    /* This vector is used for remembering name groups during the pre-compile. In a
7672    similar way to cworkspace, it can be expanded using malloc() if necessary. */
7673    
7674    named_group named_groups[NAMED_GROUP_LIST_SIZE];
7675    
7676  /* Set this early so that early errors get offset 0. */  /* Set this early so that early errors get offset 0. */
7677    
7678  ptr = (const pcre_uchar *)pattern;  ptr = (const pcre_uchar *)pattern;
# Line 8137  cd->bracount = cd->final_bracount = 0; Line 7935  cd->bracount = cd->final_bracount = 0;
7935  cd->names_found = 0;  cd->names_found = 0;
7936  cd->name_entry_size = 0;  cd->name_entry_size = 0;
7937  cd->name_table = NULL;  cd->name_table = NULL;
7938    cd->dupnames = FALSE;
7939    cd->namedrefcount = 0;
7940  cd->start_code = cworkspace;  cd->start_code = cworkspace;
7941  cd->hwm = cworkspace;  cd->hwm = cworkspace;
7942  cd->start_workspace = cworkspace;  cd->start_workspace = cworkspace;
7943  cd->workspace_size = COMPILE_WORK_SIZE;  cd->workspace_size = COMPILE_WORK_SIZE;
7944    cd->named_groups = named_groups;
7945    cd->named_group_list_size = NAMED_GROUP_LIST_SIZE;
7946  cd->start_pattern = (const pcre_uchar *)pattern;  cd->start_pattern = (const pcre_uchar *)pattern;
7947  cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));  cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));
7948  cd->req_varyopt = 0;  cd->req_varyopt = 0;
# Line 8172  if (length > MAX_PATTERN_SIZE) Line 7974  if (length > MAX_PATTERN_SIZE)
7974    goto PCRE_EARLY_ERROR_RETURN;    goto PCRE_EARLY_ERROR_RETURN;
7975    }    }
7976    
7977  /* Compute the size of data block needed and get it, either from malloc or  /* If there are groups with duplicate names and there are also references by
7978  externally provided function. Integer overflow should no longer be possible  name, we must allow for the possibility of named references to duplicated
7979  because nowadays we limit the maximum value of cd->names_found and  groups. These require an extra data item each. */
 cd->name_entry_size. */  
7980    
7981  size = sizeof(REAL_PCRE) + (length + cd->names_found * cd->name_entry_size) * sizeof(pcre_uchar);  if (cd->dupnames && cd->namedrefcount > 0)
7982  re = (REAL_PCRE *)(PUBL(malloc))(size);    length += cd->namedrefcount * IMM2_SIZE * sizeof(pcre_uchar);
7983    
7984    /* Compute the size of the data block for storing the compiled pattern. Integer
7985    overflow should no longer be possible because nowadays we limit the maximum
7986    value of cd->names_found and cd->name_entry_size. */
7987    
7988    size = sizeof(REAL_PCRE) +
7989      (length + cd->names_found * cd->name_entry_size) * sizeof(pcre_uchar);
7990    
7991    /* Get the memory. */
7992    
7993    re = (REAL_PCRE *)(PUBL(malloc))(size);
7994  if (re == NULL)  if (re == NULL)
7995    {    {
7996    errorcode = ERR21;    errorcode = ERR21;
# Line 8223  cd->final_bracount = cd->bracount;  /* S Line 8034  cd->final_bracount = cd->bracount;  /* S
8034  cd->assert_depth = 0;  cd->assert_depth = 0;
8035  cd->bracount = 0;  cd->bracount = 0;
8036  cd->max_lookbehind = 0;  cd->max_lookbehind = 0;
 cd->names_found = 0;  
8037  cd->name_table = (pcre_uchar *)re + re->name_table_offset;  cd->name_table = (pcre_uchar *)re + re->name_table_offset;
8038  codestart = cd->name_table + re->name_entry_size * re->name_count;  codestart = cd->name_table + re->name_entry_size * re->name_count;
8039  cd->start_code = codestart;  cd->start_code = codestart;
# Line 8234  cd->had_pruneorskip = FALSE; Line 8044  cd->had_pruneorskip = FALSE;
8044  cd->check_lookbehind = FALSE;  cd->check_lookbehind = FALSE;
8045  cd->open_caps = NULL;  cd->open_caps = NULL;
8046    
8047    /* If any named groups were found, create the name/number table from the list
8048    created in the first pass. */
8049    
8050    if (cd->names_found > 0)
8051      {
8052      int i = cd->names_found;
8053      named_group *ng = cd->named_groups;
8054      cd->names_found = 0;
8055      for (; i > 0; i--, ng++)
8056        add_name(cd, ng->name, ng->length, ng->number);
8057      if (cd->named_group_list_size > NAMED_GROUP_LIST_SIZE)
8058        (PUBL(free))((void *)cd->named_groups);
8059      }
8060    
8061  /* Set up a starting, non-extracting bracket, then compile the expression. On  /* Set up a starting, non-extracting bracket, then compile the expression. On
8062  error, errorcode will be set non-zero, so we don't need to look at the result  error, errorcode will be set non-zero, so we don't need to look at the result
8063  of the function here. */  of the function here. */
# Line 8297  if (cd->hwm > cd->start_workspace) Line 8121  if (cd->hwm > cd->start_workspace)
8121      }      }
8122    }    }
8123    
8124  /* If the workspace had to be expanded, free the new memory. Set the pointer to  /* If the workspace had to be expanded, free the new memory. Set the pointer to
8125  NULL to indicate that forward references have been filled in. */  NULL to indicate that forward references have been filled in. */
8126    
8127  if (cd->workspace_size > COMPILE_WORK_SIZE)  if (cd->workspace_size > COMPILE_WORK_SIZE)
8128    (PUBL(free))((void *)cd->start_workspace);    (PUBL(free))((void *)cd->start_workspace);
8129  cd->start_workspace = NULL;  cd->start_workspace = NULL;
8130    
8131  /* Give an error if there's back reference to a non-existent capturing  /* Give an error if there's back reference to a non-existent capturing
8132  subpattern. */  subpattern. */
# Line 8506  if (code - codestart > length) Line 8330  if (code - codestart > length)
8330    }    }
8331  #endif   /* PCRE_DEBUG */  #endif   /* PCRE_DEBUG */
8332    
8333  /* Check for a pattern than can match an empty string, so that this information  /* Check for a pattern than can match an empty string, so that this information
8334  can be provided to applications. */  can be provided to applications. */
8335    
8336  do  do
# Line 8515  do Line 8339  do
8339      {      {
8340      re->flags |= PCRE_MATCH_EMPTY;      re->flags |= PCRE_MATCH_EMPTY;
8341      break;      break;
8342      }      }
8343    codestart += GET(codestart, 1);    codestart += GET(codestart, 1);
8344    }    }
8345  while (*codestart == OP_ALT);  while (*codestart == OP_ALT);

Legend:
Removed from v.1348  
changed lines
  Added in v.1361

  ViewVC Help
Powered by ViewVC 1.1.5