/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1414 by zherczeg, Sun Dec 22 16:27:35 2013 UTC revision 1538 by ph10, Sun Mar 29 11:22:24 2015 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2013 University of Cambridge             Copyright (c) 1997-2014 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 47  supporting internal functions that are n Line 47  supporting internal functions that are n
47  #endif  #endif
48    
49  #define NLBLOCK cd             /* Block containing newline information */  #define NLBLOCK cd             /* Block containing newline information */
50  #define PSSTART start_pattern  /* Field containing processed string start */  #define PSSTART start_pattern  /* Field containing pattern start */
51  #define PSEND   end_pattern    /* Field containing processed string end */  #define PSEND   end_pattern    /* Field containing pattern end */
52    
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
# Line 547  static const char error_texts[] = Line 547  static const char error_texts[] =
547    "parentheses are too deeply nested\0"    "parentheses are too deeply nested\0"
548    "invalid range in character class\0"    "invalid range in character class\0"
549    "group name must start with a non-digit\0"    "group name must start with a non-digit\0"
550      /* 85 */
551      "parentheses are too deeply nested (stack check)\0"
552      "digits missing in \\x{} or \\o{}\0"
553    ;    ;
554    
555  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 1257  else Line 1260  else
1260    
1261      case CHAR_o:      case CHAR_o:
1262      if (ptr[1] != CHAR_LEFT_CURLY_BRACKET) *errorcodeptr = ERR81; else      if (ptr[1] != CHAR_LEFT_CURLY_BRACKET) *errorcodeptr = ERR81; else
1263        if (ptr[2] == CHAR_RIGHT_CURLY_BRACKET) *errorcodeptr = ERR86; else
1264        {        {
1265        ptr += 2;        ptr += 2;
1266        c = 0;        c = 0;
# Line 1326  else Line 1330  else
1330        if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)        if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
1331          {          {
1332          ptr += 2;          ptr += 2;
1333            if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
1334              {
1335              *errorcodeptr = ERR86;
1336              break;
1337              }
1338          c = 0;          c = 0;
1339          overflow = FALSE;          overflow = FALSE;
1340          while (MAX_255(*ptr) && (digitab[*ptr] & ctype_xdigit) != 0)          while (MAX_255(*ptr) && (digitab[*ptr] & ctype_xdigit) != 0)
# Line 1581  read_repeat_counts(const pcre_uchar *p, Line 1590  read_repeat_counts(const pcre_uchar *p,
1590  int min = 0;  int min = 0;
1591  int max = -1;  int max = -1;
1592    
1593  /* Read the minimum value and do a paranoid check: a negative value indicates  while (IS_DIGIT(*p))
 an integer overflow. */  
   
 while (IS_DIGIT(*p)) min = min * 10 + (int)(*p++ - CHAR_0);  
 if (min < 0 || min > 65535)  
1594    {    {
1595    *errorcodeptr = ERR5;    min = min * 10 + (int)(*p++ - CHAR_0);
1596    return p;    if (min > 65535)
1597        {
1598        *errorcodeptr = ERR5;
1599        return p;
1600        }
1601    }    }
1602    
 /* Read the maximum value if there is one, and again do a paranoid on its size.  
 Also, max must not be less than min. */  
   
1603  if (*p == CHAR_RIGHT_CURLY_BRACKET) max = min; else  if (*p == CHAR_RIGHT_CURLY_BRACKET) max = min; else
1604    {    {
1605    if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)    if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
1606      {      {
1607      max = 0;      max = 0;
1608      while(IS_DIGIT(*p)) max = max * 10 + (int)(*p++ - CHAR_0);      while(IS_DIGIT(*p))
     if (max < 0 || max > 65535)  
1609        {        {
1610        *errorcodeptr = ERR5;        max = max * 10 + (int)(*p++ - CHAR_0);
1611        return p;        if (max > 65535)
1612            {
1613            *errorcodeptr = ERR5;
1614            return p;
1615            }
1616        }        }
1617      if (max < min)      if (max < min)
1618        {        {
# Line 1613  if (*p == CHAR_RIGHT_CURLY_BRACKET) max Line 1622  if (*p == CHAR_RIGHT_CURLY_BRACKET) max
1622      }      }
1623    }    }
1624    
 /* Fill in the required variables, and pass back the pointer to the terminating  
 '}'. */  
   
1625  *minp = min;  *minp = min;
1626  *maxp = max;  *maxp = max;
1627  return p;  return p;
# Line 2123  for (;;) Line 2129  for (;;)
2129        {        {
2130        case OP_CHAR:        case OP_CHAR:
2131        case OP_CHARI:        case OP_CHARI:
2132          case OP_NOT:
2133          case OP_NOTI:
2134        case OP_EXACT:        case OP_EXACT:
2135        case OP_EXACTI:        case OP_EXACTI:
2136          case OP_NOTEXACT:
2137          case OP_NOTEXACTI:
2138        case OP_UPTO:        case OP_UPTO:
2139        case OP_UPTOI:        case OP_UPTOI:
2140          case OP_NOTUPTO:
2141          case OP_NOTUPTOI:
2142        case OP_MINUPTO:        case OP_MINUPTO:
2143        case OP_MINUPTOI:        case OP_MINUPTOI:
2144          case OP_NOTMINUPTO:
2145          case OP_NOTMINUPTOI:
2146        case OP_POSUPTO:        case OP_POSUPTO:
2147        case OP_POSUPTOI:        case OP_POSUPTOI:
2148          case OP_NOTPOSUPTO:
2149          case OP_NOTPOSUPTOI:
2150        case OP_STAR:        case OP_STAR:
2151        case OP_STARI:        case OP_STARI:
2152          case OP_NOTSTAR:
2153          case OP_NOTSTARI:
2154        case OP_MINSTAR:        case OP_MINSTAR:
2155        case OP_MINSTARI:        case OP_MINSTARI:
2156          case OP_NOTMINSTAR:
2157          case OP_NOTMINSTARI:
2158        case OP_POSSTAR:        case OP_POSSTAR:
2159        case OP_POSSTARI:        case OP_POSSTARI:
2160          case OP_NOTPOSSTAR:
2161          case OP_NOTPOSSTARI:
2162        case OP_PLUS:        case OP_PLUS:
2163        case OP_PLUSI:        case OP_PLUSI:
2164          case OP_NOTPLUS:
2165          case OP_NOTPLUSI:
2166        case OP_MINPLUS:        case OP_MINPLUS:
2167        case OP_MINPLUSI:        case OP_MINPLUSI:
2168          case OP_NOTMINPLUS:
2169          case OP_NOTMINPLUSI:
2170        case OP_POSPLUS:        case OP_POSPLUS:
2171        case OP_POSPLUSI:        case OP_POSPLUSI:
2172          case OP_NOTPOSPLUS:
2173          case OP_NOTPOSPLUSI:
2174        case OP_QUERY:        case OP_QUERY:
2175        case OP_QUERYI:        case OP_QUERYI:
2176          case OP_NOTQUERY:
2177          case OP_NOTQUERYI:
2178        case OP_MINQUERY:        case OP_MINQUERY:
2179        case OP_MINQUERYI:        case OP_MINQUERYI:
2180          case OP_NOTMINQUERY:
2181          case OP_NOTMINQUERYI:
2182        case OP_POSQUERY:        case OP_POSQUERY:
2183        case OP_POSQUERYI:        case OP_POSQUERYI:
2184          case OP_NOTPOSQUERY:
2185          case OP_NOTPOSQUERYI:
2186        if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);        if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
2187        break;        break;
2188        }        }
# Line 2368  for (code = first_significant_code(code Line 2402  for (code = first_significant_code(code
2402    if (c == OP_RECURSE)    if (c == OP_RECURSE)
2403      {      {
2404      const pcre_uchar *scode = cd->start_code + GET(code, 1);      const pcre_uchar *scode = cd->start_code + GET(code, 1);
2405        const pcre_uchar *endgroup = scode;
2406      BOOL empty_branch;      BOOL empty_branch;
2407    
2408      /* Test for forward reference or uncompleted reference. This is disabled      /* Test for forward reference or uncompleted reference. This is disabled
# Line 2382  for (code = first_significant_code(code Line 2417  for (code = first_significant_code(code
2417        if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */        if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */
2418        }        }
2419    
2420      /* If we are scanning a completed pattern, there are no forward references      /* If the reference is to a completed group, we need to detect whether this
2421      and all groups are complete. We need to detect whether this is a recursive      is a recursive call, as otherwise there will be an infinite loop. If it is
2422      call, as otherwise there will be an infinite loop. If it is a recursion,      a recursion, just skip over it. Simple recursions are easily detected. For
2423      just skip over it. Simple recursions are easily detected. For mutual      mutual recursions we keep a chain on the stack. */
     recursions we keep a chain on the stack. */  
2424    
2425        do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT);
2426        if (code >= scode && code <= endgroup) continue;  /* Simple recursion */
2427      else      else
2428        {        {
2429        recurse_check *r = recurses;        recurse_check *r = recurses;
       const pcre_uchar *endgroup = scode;  
   
       do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT);  
       if (code >= scode && code <= endgroup) continue;  /* Simple recursion */  
   
2430        for (r = recurses; r != NULL; r = r->prev)        for (r = recurses; r != NULL; r = r->prev)
2431          if (r->group == scode) break;          if (r->group == scode) break;
2432        if (r != NULL) continue;   /* Mutual recursion */        if (r != NULL) continue;   /* Mutual recursion */
# Line 2466  for (code = first_significant_code(code Line 2497  for (code = first_significant_code(code
2497        empty_branch = FALSE;        empty_branch = FALSE;
2498        do        do
2499          {          {
2500          if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd, NULL))          if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd,
2501            empty_branch = TRUE;            recurses)) empty_branch = TRUE;
2502          code += GET(code, 1);          code += GET(code, 1);
2503          }          }
2504        while (*code == OP_ALT);        while (*code == OP_ALT);
# Line 3036  switch(c) Line 3067  switch(c)
3067      end += 1 + 2 * IMM2_SIZE;      end += 1 + 2 * IMM2_SIZE;
3068      break;      break;
3069      }      }
3070    list[2] = end - code;    list[2] = (pcre_uint32)(end - code);
3071    return end;    return end;
3072    }    }
3073  return NULL;    /* Opcode not accepted */  return NULL;    /* Opcode not accepted */
# Line 3062  Returns:      TRUE if the auto-possessif Line 3093  Returns:      TRUE if the auto-possessif
3093    
3094  static BOOL  static BOOL
3095  compare_opcodes(const pcre_uchar *code, BOOL utf, const compile_data *cd,  compare_opcodes(const pcre_uchar *code, BOOL utf, const compile_data *cd,
3096    const pcre_uint32 *base_list, const pcre_uchar *base_end)    const pcre_uint32 *base_list, const pcre_uchar *base_end, int *rec_limit)
3097  {  {
3098  pcre_uchar c;  pcre_uchar c;
3099  pcre_uint32 list[8];  pcre_uint32 list[8];
# Line 3070  const pcre_uint32 *chr_ptr; Line 3101  const pcre_uint32 *chr_ptr;
3101  const pcre_uint32 *ochr_ptr;  const pcre_uint32 *ochr_ptr;
3102  const pcre_uint32 *list_ptr;  const pcre_uint32 *list_ptr;
3103  const pcre_uchar *next_code;  const pcre_uchar *next_code;
3104    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3105    const pcre_uchar *xclass_flags;
3106    #endif
3107  const pcre_uint8 *class_bitset;  const pcre_uint8 *class_bitset;
3108  const pcre_uint32 *set1, *set2, *set_end;  const pcre_uint8 *set1, *set2, *set_end;
3109  pcre_uint32 chr;  pcre_uint32 chr;
3110  BOOL accepted, invert_bits;  BOOL accepted, invert_bits;
3111    BOOL entered_a_group = FALSE;
3112    
3113    if (*rec_limit == 0) return FALSE;
3114    --(*rec_limit);
3115    
3116  /* Note: the base_list[1] contains whether the current opcode has greedy  /* Note: the base_list[1] contains whether the current opcode has greedy
3117  (represented by a non-zero value) quantifier. This is a different from  (represented by a non-zero value) quantifier. This is a different from
# Line 3127  for(;;) Line 3165  for(;;)
3165        case OP_ONCE:        case OP_ONCE:
3166        case OP_ONCE_NC:        case OP_ONCE_NC:
3167        /* Atomic sub-patterns and assertions can always auto-possessify their        /* Atomic sub-patterns and assertions can always auto-possessify their
3168        last iterator. */        last iterator. However, if the group was entered as a result of checking
3169        return TRUE;        a previous iterator, this is not possible. */
3170    
3171          return !entered_a_group;
3172        }        }
3173    
3174      code += PRIV(OP_lengths)[c];      code += PRIV(OP_lengths)[c];
# Line 3143  for(;;) Line 3183  for(;;)
3183    
3184      while (*next_code == OP_ALT)      while (*next_code == OP_ALT)
3185        {        {
3186        if (!compare_opcodes(code, utf, cd, base_list, base_end)) return FALSE;        if (!compare_opcodes(code, utf, cd, base_list, base_end, rec_limit))
3187            return FALSE;
3188        code = next_code + 1 + LINK_SIZE;        code = next_code + 1 + LINK_SIZE;
3189        next_code += GET(next_code, 1);        next_code += GET(next_code, 1);
3190        }        }
3191    
3192        entered_a_group = TRUE;
3193      continue;      continue;
3194    
3195      case OP_BRAZERO:      case OP_BRAZERO:
# Line 3161  for(;;) Line 3204  for(;;)
3204      /* The bracket content will be checked by the      /* The bracket content will be checked by the
3205      OP_BRA/OP_CBRA case above. */      OP_BRA/OP_CBRA case above. */
3206      next_code += 1 + LINK_SIZE;      next_code += 1 + LINK_SIZE;
3207      if (!compare_opcodes(next_code, utf, cd, base_list, base_end))      if (!compare_opcodes(next_code, utf, cd, base_list, base_end, rec_limit))
3208        return FALSE;        return FALSE;
3209    
3210      code += PRIV(OP_lengths)[c];      code += PRIV(OP_lengths)[c];
3211      continue;      continue;
3212    
3213        default:
3214        break;
3215      }      }
3216    
3217    /* Check for a supported opcode, and load its properties. */    /* Check for a supported opcode, and load its properties. */
# Line 3202  for(;;) Line 3248  for(;;)
3248      if (base_list[0] == OP_CLASS)      if (base_list[0] == OP_CLASS)
3249  #endif  #endif
3250        {        {
3251        set1 = (pcre_uint32 *)(base_end - base_list[2]);        set1 = (pcre_uint8 *)(base_end - base_list[2]);
3252        list_ptr = list;        list_ptr = list;
3253        }        }
3254      else      else
3255        {        {
3256        set1 = (pcre_uint32 *)(code - list[2]);        set1 = (pcre_uint8 *)(code - list[2]);
3257        list_ptr = base_list;        list_ptr = base_list;
3258        }        }
3259    
# Line 3216  for(;;) Line 3262  for(;;)
3262        {        {
3263        case OP_CLASS:        case OP_CLASS:
3264        case OP_NCLASS:        case OP_NCLASS:
3265        set2 = (pcre_uint32 *)        set2 = (pcre_uint8 *)
3266          ((list_ptr == list ? code : base_end) - list_ptr[2]);          ((list_ptr == list ? code : base_end) - list_ptr[2]);
3267        break;        break;
3268    
3269        /* OP_XCLASS cannot be supported here, because its bitset  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3270        is not necessarily complete. E.g: [a-\0x{200}] is stored        case OP_XCLASS:
3271        as a character range, and the appropriate bits are not set. */        xclass_flags = (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE;
3272          if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE;
3273          if ((*xclass_flags & XCL_MAP) == 0)
3274            {
3275            /* No bits are set for characters < 256. */
3276            if (list[1] == 0) return TRUE;
3277            /* Might be an empty repeat. */
3278            continue;
3279            }
3280          set2 = (pcre_uint8 *)(xclass_flags + 1);
3281          break;
3282    #endif
3283    
3284        case OP_NOT_DIGIT:        case OP_NOT_DIGIT:
3285          invert_bits = TRUE;        invert_bits = TRUE;
3286          /* Fall through */        /* Fall through */
3287        case OP_DIGIT:        case OP_DIGIT:
3288          set2 = (pcre_uint32 *)(cd->cbits + cbit_digit);        set2 = (pcre_uint8 *)(cd->cbits + cbit_digit);
3289          break;        break;
3290    
3291        case OP_NOT_WHITESPACE:        case OP_NOT_WHITESPACE:
3292          invert_bits = TRUE;        invert_bits = TRUE;
3293          /* Fall through */        /* Fall through */
3294        case OP_WHITESPACE:        case OP_WHITESPACE:
3295          set2 = (pcre_uint32 *)(cd->cbits + cbit_space);        set2 = (pcre_uint8 *)(cd->cbits + cbit_space);
3296          break;        break;
3297    
3298        case OP_NOT_WORDCHAR:        case OP_NOT_WORDCHAR:
3299          invert_bits = TRUE;        invert_bits = TRUE;
3300          /* Fall through */        /* Fall through */
3301        case OP_WORDCHAR:        case OP_WORDCHAR:
3302          set2 = (pcre_uint32 *)(cd->cbits + cbit_word);        set2 = (pcre_uint8 *)(cd->cbits + cbit_word);
3303          break;        break;
3304    
3305        default:        default:
3306        return FALSE;        return FALSE;
3307        }        }
3308    
3309      /* Compare 4 bytes to improve speed. */      /* Because the sets are unaligned, we need
3310      set_end = set1 + (32 / 4);      to perform byte comparison here. */
3311        set_end = set1 + 32;
3312      if (invert_bits)      if (invert_bits)
3313        {        {
3314        do        do
# Line 3392  for(;;) Line 3450  for(;;)
3450             rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&             rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
3451             autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];             autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
3452    
3453      if (!accepted)      if (!accepted) return FALSE;
       return FALSE;  
3454    
3455      if (list[1] == 0) return TRUE;      if (list[1] == 0) return TRUE;
3456      /* Might be an empty repeat. */      /* Might be an empty repeat. */
# Line 3551  for(;;) Line 3608  for(;;)
3608    if (list[1] == 0) return TRUE;    if (list[1] == 0) return TRUE;
3609    }    }
3610    
3611  return FALSE;  /* Control never reaches here. There used to be a fail-save return FALSE; here,
3612    but some compilers complain about an unreachable statement. */
3613    
3614  }  }
3615    
3616    
# Line 3578  register pcre_uchar c; Line 3637  register pcre_uchar c;
3637  const pcre_uchar *end;  const pcre_uchar *end;
3638  pcre_uchar *repeat_opcode;  pcre_uchar *repeat_opcode;
3639  pcre_uint32 list[8];  pcre_uint32 list[8];
3640    int rec_limit;
3641    
3642  for (;;)  for (;;)
3643    {    {
3644    c = *code;    c = *code;
3645    
3646      /* When a pattern with bad UTF-8 encoding is compiled with NO_UTF_CHECK,
3647      it may compile without complaining, but may get into a loop here if the code
3648      pointer points to a bad value. This is, of course a documentated possibility,
3649      when NO_UTF_CHECK is set, so it isn't a bug, but we can detect this case and
3650      just give up on this optimization. */
3651    
3652      if (c >= OP_TABLE_LENGTH) return;
3653    
3654    if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)    if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
3655      {      {
3656      c -= get_repeat_base(c) - OP_STAR;      c -= get_repeat_base(c) - OP_STAR;
# Line 3590  for (;;) Line 3658  for (;;)
3658        get_chr_property_list(code, utf, cd->fcc, list) : NULL;        get_chr_property_list(code, utf, cd->fcc, list) : NULL;
3659      list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;      list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
3660    
3661      if (end != NULL && compare_opcodes(end, utf, cd, list, end))      rec_limit = 10000;
3662        if (end != NULL && compare_opcodes(end, utf, cd, list, end, &rec_limit))
3663        {        {
3664        switch(c)        switch(c)
3665          {          {
# Line 3623  for (;;) Line 3692  for (;;)
3692          break;          break;
3693    
3694          case OP_MINUPTO:          case OP_MINUPTO:
3695          *code += OP_MINUPTO - OP_UPTO;          *code += OP_POSUPTO - OP_MINUPTO;
3696          break;          break;
3697          }          }
3698        }        }
# Line 3646  for (;;) Line 3715  for (;;)
3715    
3716        list[1] = (c & 1) == 0;        list[1] = (c & 1) == 0;
3717    
3718        if (compare_opcodes(end, utf, cd, list, end))        rec_limit = 10000;
3719          if (compare_opcodes(end, utf, cd, list, end, &rec_limit))
3720          {          {
3721          switch (c)          switch (c)
3722            {            {
# Line 3920  Arguments: Line 3990  Arguments:
3990    adjust     the amount by which the group is to be moved    adjust     the amount by which the group is to be moved
3991    utf        TRUE in UTF-8 / UTF-16 / UTF-32 mode    utf        TRUE in UTF-8 / UTF-16 / UTF-32 mode
3992    cd         contains pointers to tables etc.    cd         contains pointers to tables etc.
3993    save_hwm   the hwm forward reference pointer at the start of the group    save_hwm_offset   the hwm forward reference offset at the start of the group
3994    
3995  Returns:     nothing  Returns:     nothing
3996  */  */
3997    
3998  static void  static void
3999  adjust_recurse(pcre_uchar *group, int adjust, BOOL utf, compile_data *cd,  adjust_recurse(pcre_uchar *group, int adjust, BOOL utf, compile_data *cd,
4000    pcre_uchar *save_hwm)    size_t save_hwm_offset)
4001  {  {
4002  pcre_uchar *ptr = group;  pcre_uchar *ptr = group;
4003    
# Line 3939  while ((ptr = (pcre_uchar *)find_recurse Line 4009  while ((ptr = (pcre_uchar *)find_recurse
4009    /* See if this recursion is on the forward reference list. If so, adjust the    /* See if this recursion is on the forward reference list. If so, adjust the
4010    reference. */    reference. */
4011    
4012    for (hc = save_hwm; hc < cd->hwm; hc += LINK_SIZE)    for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm;
4013           hc += LINK_SIZE)
4014      {      {
4015      offset = (int)GET(hc, 0);      offset = (int)GET(hc, 0);
4016      if (cd->start_code + offset == ptr + 1)      if (cd->start_code + offset == ptr + 1)
# Line 4062  for (c = *cptr; c <= d; c++) Line 4133  for (c = *cptr; c <= d; c++)
4133    
4134  if (c > d) return -1;  /* Reached end of range */  if (c > d) return -1;  /* Reached end of range */
4135    
4136    /* Found a character that has a single other case. Search for the end of the
4137    range, which is either the end of the input range, or a character that has zero
4138    or more than one other cases. */
4139    
4140  *ocptr = othercase;  *ocptr = othercase;
4141  next = othercase + 1;  next = othercase + 1;
4142    
4143  for (++c; c <= d; c++)  for (++c; c <= d; c++)
4144    {    {
4145    if (UCD_OTHERCASE(c) != next) break;    if ((co = UCD_CASESET(c)) != 0 || UCD_OTHERCASE(c) != next) break;
4146    next++;    next++;
4147    }    }
4148    
# Line 4140  if ((options & PCRE_CASELESS) != 0) Line 4215  if ((options & PCRE_CASELESS) != 0)
4215        range. Otherwise, use a recursive call to add the additional range. */        range. Otherwise, use a recursive call to add the additional range. */
4216    
4217        else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */        else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */
4218        else if (od > end && oc <= end + 1) end = od;       /* Extend upwards */        else if (od > end && oc <= end + 1)
4219            {
4220            end = od;       /* Extend upwards */
4221            if (end > classbits_end) classbits_end = (end <= 0xff ? end : 0xff);
4222            }
4223        else n8 += add_to_class(classbits, uchardptr, options, cd, oc, od);        else n8 += add_to_class(classbits, uchardptr, options, cd, oc, od);
4224        }        }
4225      }      }
# Line 4186  for (c = start; c <= classbits_end; c++) Line 4265  for (c = start; c <= classbits_end; c++)
4265  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4266  if (start <= 0xff) start = 0xff + 1;  if (start <= 0xff) start = 0xff + 1;
4267    
4268  if (end >= start) {  if (end >= start)
4269      {
4270    pcre_uchar *uchardata = *uchardptr;    pcre_uchar *uchardata = *uchardptr;
   
4271  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
4272    if ((options & PCRE_UTF8) != 0)  /* All UTFs use the same flag bit */    if ((options & PCRE_UTF8) != 0)  /* All UTFs use the same flag bit */
4273      {      {
# Line 4380  const pcre_uchar *tempptr; Line 4459  const pcre_uchar *tempptr;
4459  const pcre_uchar *nestptr = NULL;  const pcre_uchar *nestptr = NULL;
4460  pcre_uchar *previous = NULL;  pcre_uchar *previous = NULL;
4461  pcre_uchar *previous_callout = NULL;  pcre_uchar *previous_callout = NULL;
4462  pcre_uchar *save_hwm = NULL;  size_t save_hwm_offset = 0;
4463  pcre_uint8 classbits[32];  pcre_uint8 classbits[32];
4464    
4465  /* We can fish out the UTF-8 setting once and for all into a BOOL, but we  /* We can fish out the UTF-8 setting once and for all into a BOOL, but we
# Line 4660  for (;; ptr++) Line 4739  for (;; ptr++)
4739      previous = NULL;      previous = NULL;
4740      if ((options & PCRE_MULTILINE) != 0)      if ((options & PCRE_MULTILINE) != 0)
4741        {        {
4742        if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;        if (firstcharflags == REQ_UNSET)
4743            zerofirstcharflags = firstcharflags = REQ_NONE;
4744        *code++ = OP_CIRCM;        *code++ = OP_CIRCM;
4745        }        }
4746      else *code++ = OP_CIRC;      else *code++ = OP_CIRC;
# Line 4840  for (;; ptr++) Line 4920  for (;; ptr++)
4920        if (lengthptr != NULL && class_uchardata > class_uchardata_base)        if (lengthptr != NULL && class_uchardata > class_uchardata_base)
4921          {          {
4922          xclass = TRUE;          xclass = TRUE;
4923          *lengthptr += class_uchardata - class_uchardata_base;          *lengthptr += (int)(class_uchardata - class_uchardata_base);
4924          class_uchardata = class_uchardata_base;          class_uchardata = class_uchardata_base;
4925          }          }
4926  #endif  #endif
# Line 5290  for (;; ptr++) Line 5370  for (;; ptr++)
5370        whatever repeat count may follow. In the case of reqchar, save the        whatever repeat count may follow. In the case of reqchar, save the
5371        previous value for reinstating. */        previous value for reinstating. */
5372    
5373        if (class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)        if (!inescq && class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
5374          {          {
5375          ptr++;          ptr++;
5376          zeroreqchar = reqchar;          zeroreqchar = reqchar;
# Line 5440  for (;; ptr++) Line 5520  for (;; ptr++)
5520        }        }
5521  #endif  #endif
5522    
5523        /* Even though any XCLASS list is now discarded, we must allow for
5524        its memory. */
5525    
5526        if (lengthptr != NULL)
5527          *lengthptr += (int)(class_uchardata - class_uchardata_base);
5528    
5529      /* If there are no characters > 255, or they are all to be included or      /* If there are no characters > 255, or they are all to be included or
5530      excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the      excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the
5531      whole class was negated and whether there were negative specials such as \S      whole class was negated and whether there were negative specials such as \S
# Line 5892  for (;; ptr++) Line 5978  for (;; ptr++)
5978          if (repeat_max <= 1)    /* Covers 0, 1, and unlimited */          if (repeat_max <= 1)    /* Covers 0, 1, and unlimited */
5979            {            {
5980            *code = OP_END;            *code = OP_END;
5981            adjust_recurse(previous, 1, utf, cd, save_hwm);            adjust_recurse(previous, 1, utf, cd, save_hwm_offset);
5982            memmove(previous + 1, previous, IN_UCHARS(len));            memmove(previous + 1, previous, IN_UCHARS(len));
5983            code++;            code++;
5984            if (repeat_max == 0)            if (repeat_max == 0)
# Line 5916  for (;; ptr++) Line 6002  for (;; ptr++)
6002            {            {
6003            int offset;            int offset;
6004            *code = OP_END;            *code = OP_END;
6005            adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, save_hwm);            adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, save_hwm_offset);
6006            memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len));            memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len));
6007            code += 2 + LINK_SIZE;            code += 2 + LINK_SIZE;
6008            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
# Line 5979  for (;; ptr++) Line 6065  for (;; ptr++)
6065              for (i = 1; i < repeat_min; i++)              for (i = 1; i < repeat_min; i++)
6066                {                {
6067                pcre_uchar *hc;                pcre_uchar *hc;
6068                pcre_uchar *this_hwm = cd->hwm;                size_t this_hwm_offset = cd->hwm - cd->start_workspace;
6069                memcpy(code, previous, IN_UCHARS(len));                memcpy(code, previous, IN_UCHARS(len));
6070    
6071                while (cd->hwm > cd->start_workspace + cd->workspace_size -                while (cd->hwm > cd->start_workspace + cd->workspace_size -
6072                       WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm))                       WORK_SIZE_SAFETY_MARGIN -
6073                         (this_hwm_offset - save_hwm_offset))
6074                  {                  {
                 int save_offset = save_hwm - cd->start_workspace;  
                 int this_offset = this_hwm - cd->start_workspace;  
6075                  *errorcodeptr = expand_workspace(cd);                  *errorcodeptr = expand_workspace(cd);
6076                  if (*errorcodeptr != 0) goto FAILED;                  if (*errorcodeptr != 0) goto FAILED;
                 save_hwm = (pcre_uchar *)cd->start_workspace + save_offset;  
                 this_hwm = (pcre_uchar *)cd->start_workspace + this_offset;  
6077                  }                  }
6078    
6079                for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)                for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset;
6080                       hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset;
6081                       hc += LINK_SIZE)
6082                  {                  {
6083                  PUT(cd->hwm, 0, GET(hc, 0) + len);                  PUT(cd->hwm, 0, GET(hc, 0) + len);
6084                  cd->hwm += LINK_SIZE;                  cd->hwm += LINK_SIZE;
6085                  }                  }
6086                save_hwm = this_hwm;                save_hwm_offset = this_hwm_offset;
6087                code += len;                code += len;
6088                }                }
6089              }              }
# Line 6043  for (;; ptr++) Line 6128  for (;; ptr++)
6128          else for (i = repeat_max - 1; i >= 0; i--)          else for (i = repeat_max - 1; i >= 0; i--)
6129            {            {
6130            pcre_uchar *hc;            pcre_uchar *hc;
6131            pcre_uchar *this_hwm = cd->hwm;            size_t this_hwm_offset = cd->hwm - cd->start_workspace;
6132    
6133            *code++ = OP_BRAZERO + repeat_type;            *code++ = OP_BRAZERO + repeat_type;
6134    
# Line 6065  for (;; ptr++) Line 6150  for (;; ptr++)
6150            copying them. */            copying them. */
6151    
6152            while (cd->hwm > cd->start_workspace + cd->workspace_size -            while (cd->hwm > cd->start_workspace + cd->workspace_size -
6153                   WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm))                   WORK_SIZE_SAFETY_MARGIN -
6154                     (this_hwm_offset - save_hwm_offset))
6155              {              {
             int save_offset = save_hwm - cd->start_workspace;  
             int this_offset = this_hwm - cd->start_workspace;  
6156              *errorcodeptr = expand_workspace(cd);              *errorcodeptr = expand_workspace(cd);
6157              if (*errorcodeptr != 0) goto FAILED;              if (*errorcodeptr != 0) goto FAILED;
             save_hwm = (pcre_uchar *)cd->start_workspace + save_offset;  
             this_hwm = (pcre_uchar *)cd->start_workspace + this_offset;  
6158              }              }
6159    
6160            for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)            for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset;
6161                   hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset;
6162                   hc += LINK_SIZE)
6163              {              {
6164              PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));              PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));
6165              cd->hwm += LINK_SIZE;              cd->hwm += LINK_SIZE;
6166              }              }
6167            save_hwm = this_hwm;            save_hwm_offset = this_hwm_offset;
6168            code += len;            code += len;
6169            }            }
6170    
# Line 6176  for (;; ptr++) Line 6260  for (;; ptr++)
6260                {                {
6261                int nlen = (int)(code - bracode);                int nlen = (int)(code - bracode);
6262                *code = OP_END;                *code = OP_END;
6263                adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, save_hwm);                adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, save_hwm_offset);
6264                memmove(bracode + 1 + LINK_SIZE, bracode, IN_UCHARS(nlen));                memmove(bracode + 1 + LINK_SIZE, bracode, IN_UCHARS(nlen));
6265                code += 1 + LINK_SIZE;                code += 1 + LINK_SIZE;
6266                nlen += 1 + LINK_SIZE;                nlen += 1 + LINK_SIZE;
# Line 6310  for (;; ptr++) Line 6394  for (;; ptr++)
6394          else          else
6395            {            {
6396            *code = OP_END;            *code = OP_END;
6397            adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm);            adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm_offset);
6398            memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));            memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
6399            code += 1 + LINK_SIZE;            code += 1 + LINK_SIZE;
6400            len += 1 + LINK_SIZE;            len += 1 + LINK_SIZE;
# Line 6359  for (;; ptr++) Line 6443  for (;; ptr++)
6443    
6444          default:          default:
6445          *code = OP_END;          *code = OP_END;
6446          adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm);          adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm_offset);
6447          memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));          memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
6448          code += 1 + LINK_SIZE;          code += 1 + LINK_SIZE;
6449          len += 1 + LINK_SIZE;          len += 1 + LINK_SIZE;
# Line 6388  for (;; ptr++) Line 6472  for (;; ptr++)
6472      parenthesis forms.  */      parenthesis forms.  */
6473    
6474      case CHAR_LEFT_PARENTHESIS:      case CHAR_LEFT_PARENTHESIS:
6475      newoptions = options;      ptr++;
     skipbytes = 0;  
     bravalue = OP_CBRA;  
     save_hwm = cd->hwm;  
     reset_bracount = FALSE;  
6476    
6477      /* First deal with various "verbs" that can be introduced by '*'. */      /* First deal with comments. Putting this code right at the start ensures
6478        that comments have no bad side effects. */
6479    
6480        if (ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN)
6481          {
6482          ptr += 2;
6483          while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
6484          if (*ptr == CHAR_NULL)
6485            {
6486            *errorcodeptr = ERR18;
6487            goto FAILED;
6488            }
6489          continue;
6490          }
6491    
6492        /* Now deal with various "verbs" that can be introduced by '*'. */
6493    
     ptr++;  
6494      if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':'      if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':'
6495           || (MAX_255(ptr[1]) && ((cd->ctypes[ptr[1]] & ctype_letter) != 0))))           || (MAX_255(ptr[1]) && ((cd->ctypes[ptr[1]] & ctype_letter) != 0))))
6496        {        {
# Line 6517  for (;; ptr++) Line 6611  for (;; ptr++)
6611        goto FAILED;        goto FAILED;
6612        }        }
6613    
6614        /* Initialize for "real" parentheses */
6615    
6616        newoptions = options;
6617        skipbytes = 0;
6618        bravalue = OP_CBRA;
6619        save_hwm_offset = cd->hwm - cd->start_workspace;
6620        reset_bracount = FALSE;
6621    
6622      /* Deal with the extended parentheses; all are introduced by '?', and the      /* Deal with the extended parentheses; all are introduced by '?', and the
6623      appearance of any of them means that this is not a capturing group. */      appearance of any of them means that this is not a capturing group. */
6624    
6625      else if (*ptr == CHAR_QUESTION_MARK)      if (*ptr == CHAR_QUESTION_MARK)
6626        {        {
6627        int i, set, unset, namelen;        int i, set, unset, namelen;
6628        int *optset;        int *optset;
# Line 6529  for (;; ptr++) Line 6631  for (;; ptr++)
6631    
6632        switch (*(++ptr))        switch (*(++ptr))
6633          {          {
         case CHAR_NUMBER_SIGN:                 /* Comment; skip to ket */  
         ptr++;  
         while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;  
         if (*ptr == CHAR_NULL)  
           {  
           *errorcodeptr = ERR18;  
           goto FAILED;  
           }  
         continue;  
   
   
6634          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
6635          case CHAR_VERTICAL_LINE:  /* Reset capture count for each branch */          case CHAR_VERTICAL_LINE:  /* Reset capture count for each branch */
6636          reset_bracount = TRUE;          reset_bracount = TRUE;
# Line 6589  for (;; ptr++) Line 6680  for (;; ptr++)
6680                (tempptr[2] == CHAR_EQUALS_SIGN ||                (tempptr[2] == CHAR_EQUALS_SIGN ||
6681                 tempptr[2] == CHAR_EXCLAMATION_MARK ||                 tempptr[2] == CHAR_EXCLAMATION_MARK ||
6682                 tempptr[2] == CHAR_LESS_THAN_SIGN))                 tempptr[2] == CHAR_LESS_THAN_SIGN))
6683              {
6684              cd->iscondassert = TRUE;
6685            break;            break;
6686              }
6687    
6688          /* Other conditions use OP_CREF/OP_DNCREF/OP_RREF/OP_DNRREF, and all          /* Other conditions use OP_CREF/OP_DNCREF/OP_RREF/OP_DNRREF, and all
6689          need to skip at least 1+IMM2_SIZE bytes at the start of the group. */          need to skip at least 1+IMM2_SIZE bytes at the start of the group. */
6690    
6691          code[1+LINK_SIZE] = OP_CREF;          code[1+LINK_SIZE] = OP_CREF;
6692          skipbytes = 1+IMM2_SIZE;          skipbytes = 1+IMM2_SIZE;
6693          refsign = -1;          refsign = -1;     /* => not a number */
6694            namelen = -1;     /* => not a name; must set to avoid warning */
6695            name = NULL;      /* Always set to avoid warning */
6696            recno = 0;        /* Always set to avoid warning */
6697    
6698          /* Check for a test for recursion in a named group. */          /* Check for a test for recursion in a named group. */
6699    
# Line 6633  for (;; ptr++) Line 6730  for (;; ptr++)
6730    
6731          if (refsign >= 0)          if (refsign >= 0)
6732            {            {
           recno = 0;  
6733            while (IS_DIGIT(*ptr))            while (IS_DIGIT(*ptr))
6734              {              {
6735              recno = recno * 10 + (int)(*ptr - CHAR_0);              recno = recno * 10 + (int)(*ptr - CHAR_0);
# Line 6700  for (;; ptr++) Line 6796  for (;; ptr++)
6796              goto FAILED;              goto FAILED;
6797              }              }
6798            PUT2(code, 2+LINK_SIZE, recno);            PUT2(code, 2+LINK_SIZE, recno);
6799              if (recno > cd->top_backref) cd->top_backref = recno;
6800            break;            break;
6801            }            }
6802    
# Line 6722  for (;; ptr++) Line 6819  for (;; ptr++)
6819            int offset = i++;            int offset = i++;
6820            int count = 1;            int count = 1;
6821            recno = GET2(slot, 0);   /* Number from first found */            recno = GET2(slot, 0);   /* Number from first found */
6822              if (recno > cd->top_backref) cd->top_backref = recno;
6823            for (; i < cd->names_found; i++)            for (; i < cd->names_found; i++)
6824              {              {
6825              slot += cd->name_entry_size;              slot += cd->name_entry_size;
6826              if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) != 0) break;              if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) != 0 ||
6827                  (slot+IMM2_SIZE)[namelen] != 0) break;
6828              count++;              count++;
6829              }              }
6830    
6831            if (count > 1)            if (count > 1)
6832              {              {
6833              PUT2(code, 2+LINK_SIZE, offset);              PUT2(code, 2+LINK_SIZE, offset);
# Line 7076  for (;; ptr++) Line 7176  for (;; ptr++)
7176            /* Count named back references. */            /* Count named back references. */
7177    
7178            if (!is_recurse) cd->namedrefcount++;            if (!is_recurse) cd->namedrefcount++;
7179    
7180              /* We have to allow for a named reference to a duplicated name (this
7181              cannot be determined until the second pass). This needs an extra
7182              16-bit data item. */
7183    
7184              *lengthptr += IMM2_SIZE;
7185            }            }
7186    
7187          /* In the real compile, search the name table. We check the name          /* In the real compile, search the name table. We check the name
# Line 7122  for (;; ptr++) Line 7228  for (;; ptr++)
7228            for (i++; i < cd->names_found; i++)            for (i++; i < cd->names_found; i++)
7229              {              {
7230              if (STRCMP_UC_UC(slot + IMM2_SIZE, cslot + IMM2_SIZE) != 0) break;              if (STRCMP_UC_UC(slot + IMM2_SIZE, cslot + IMM2_SIZE) != 0) break;
7231    
7232    
7233              count++;              count++;
7234              cslot += cd->name_entry_size;              cslot += cd->name_entry_size;
7235              }              }
# Line 7430  for (;; ptr++) Line 7538  for (;; ptr++)
7538        goto FAILED;        goto FAILED;
7539        }        }
7540    
7541      /* Assertions used not to be repeatable, but this was changed for Perl      /* All assertions used not to be repeatable, but this was changed for Perl
7542      compatibility, so all kinds can now be repeated. We copy code into a      compatibility. All kinds can now be repeated except for assertions that are
7543        conditions (Perl also forbids these to be repeated). We copy code into a
7544      non-register variable (tempcode) in order to be able to pass its address      non-register variable (tempcode) in order to be able to pass its address
7545      because some compilers complain otherwise. */      because some compilers complain otherwise. At the start of a conditional
7546        group whose condition is an assertion, cd->iscondassert is set. We unset it
7547        here so as to allow assertions later in the group to be quantified. */
7548    
7549        if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT &&
7550            cd->iscondassert)
7551          {
7552          previous = NULL;
7553          cd->iscondassert = FALSE;
7554          }
7555        else previous = code;
7556    
     previous = code;                      /* For handling repetition */  
7557      *code = bravalue;      *code = bravalue;
7558      tempcode = code;      tempcode = code;
7559      tempreqvary = cd->req_varyopt;        /* Save value before bracket */      tempreqvary = cd->req_varyopt;        /* Save value before bracket */
# Line 7682  for (;; ptr++) Line 7800  for (;; ptr++)
7800          const pcre_uchar *p;          const pcre_uchar *p;
7801          pcre_uint32 cf;          pcre_uint32 cf;
7802    
7803          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */          save_hwm_offset = cd->hwm - cd->start_workspace;   /* Normally this is set when '(' is read */
7804          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
7805            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
7806    
# Line 8009  int length; Line 8127  int length;
8127  unsigned int orig_bracount;  unsigned int orig_bracount;
8128  unsigned int max_bracount;  unsigned int max_bracount;
8129  branch_chain bc;  branch_chain bc;
8130    size_t save_hwm_offset;
8131    
8132    /* If set, call the external function that checks for stack availability. */
8133    
8134    if (PUBL(stack_guard) != NULL && PUBL(stack_guard)())
8135      {
8136      *errorcodeptr= ERR85;
8137      return FALSE;
8138      }
8139    
8140    /* Miscellaneous initialization */
8141    
8142  bc.outer = bcptr;  bc.outer = bcptr;
8143  bc.current_branch = code;  bc.current_branch = code;
# Line 8016  bc.current_branch = code; Line 8145  bc.current_branch = code;
8145  firstchar = reqchar = 0;  firstchar = reqchar = 0;
8146  firstcharflags = reqcharflags = REQ_UNSET;  firstcharflags = reqcharflags = REQ_UNSET;
8147    
8148    save_hwm_offset = cd->hwm - cd->start_workspace;
8149    
8150  /* Accumulate the length for use in the pre-compile phase. Start with the  /* Accumulate the length for use in the pre-compile phase. Start with the
8151  length of the BRA and KET and any extra bytes that are required at the  length of the BRA and KET and any extra bytes that are required at the
8152  beginning. We accumulate in a local variable to save frequent testing of  beginning. We accumulate in a local variable to save frequent testing of
# Line 8209  for (;;) Line 8340  for (;;)
8340    
8341      /* If it was a capturing subpattern, check to see if it contained any      /* If it was a capturing subpattern, check to see if it contained any
8342      recursive back references. If so, we must wrap it in atomic brackets.      recursive back references. If so, we must wrap it in atomic brackets.
8343      In any event, remove the block from the chain. */      Because we are moving code along, we must ensure that any pending recursive
8344        references are updated. In any event, remove the block from the chain. */
8345    
8346      if (capnumber > 0)      if (capnumber > 0)
8347        {        {
8348        if (cd->open_caps->flag)        if (cd->open_caps->flag)
8349          {          {
8350            *code = OP_END;
8351            adjust_recurse(start_bracket, 1 + LINK_SIZE,
8352              (options & PCRE_UTF8) != 0, cd, save_hwm_offset);
8353          memmove(start_bracket + 1 + LINK_SIZE, start_bracket,          memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
8354            IN_UCHARS(code - start_bracket));            IN_UCHARS(code - start_bracket));
8355          *start_bracket = OP_ONCE;          *start_bracket = OP_ONCE;
# Line 8438  do { Line 8573  do {
8573         case OP_RREF:         case OP_RREF:
8574         case OP_DNRREF:         case OP_DNRREF:
8575         case OP_DEF:         case OP_DEF:
8576           case OP_FAIL:
8577         return FALSE;         return FALSE;
8578    
8579         default:     /* Assertion */         default:     /* Assertion */
# Line 9022  cd->dupnames = FALSE; Line 9158  cd->dupnames = FALSE;
9158  cd->namedrefcount = 0;  cd->namedrefcount = 0;
9159  cd->start_code = cworkspace;  cd->start_code = cworkspace;
9160  cd->hwm = cworkspace;  cd->hwm = cworkspace;
9161    cd->iscondassert = FALSE;
9162  cd->start_workspace = cworkspace;  cd->start_workspace = cworkspace;
9163  cd->workspace_size = COMPILE_WORK_SIZE;  cd->workspace_size = COMPILE_WORK_SIZE;
9164  cd->named_groups = named_groups;  cd->named_groups = named_groups;
# Line 9059  if (length > MAX_PATTERN_SIZE) Line 9196  if (length > MAX_PATTERN_SIZE)
9196    goto PCRE_EARLY_ERROR_RETURN;    goto PCRE_EARLY_ERROR_RETURN;
9197    }    }
9198    
 /* If there are groups with duplicate names and there are also references by  
 name, we must allow for the possibility of named references to duplicated  
 groups. These require an extra data item each. */  
   
 if (cd->dupnames && cd->namedrefcount > 0)  
   length += cd->namedrefcount * IMM2_SIZE * sizeof(pcre_uchar);  
   
9199  /* Compute the size of the data block for storing the compiled pattern. Integer  /* Compute the size of the data block for storing the compiled pattern. Integer
9200  overflow should no longer be possible because nowadays we limit the maximum  overflow should no longer be possible because nowadays we limit the maximum
9201  value of cd->names_found and cd->name_entry_size. */  value of cd->names_found and cd->name_entry_size. */
# Line 9124  cd->name_table = (pcre_uchar *)re + re-> Line 9254  cd->name_table = (pcre_uchar *)re + re->
9254  codestart = cd->name_table + re->name_entry_size * re->name_count;  codestart = cd->name_table + re->name_entry_size * re->name_count;
9255  cd->start_code = codestart;  cd->start_code = codestart;
9256  cd->hwm = (pcre_uchar *)(cd->start_workspace);  cd->hwm = (pcre_uchar *)(cd->start_workspace);
9257    cd->iscondassert = FALSE;
9258  cd->req_varyopt = 0;  cd->req_varyopt = 0;
9259  cd->had_accept = FALSE;  cd->had_accept = FALSE;
9260  cd->had_pruneorskip = FALSE;  cd->had_pruneorskip = FALSE;
# Line 9219  subpattern. */ Line 9350  subpattern. */
9350    
9351  if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15;  if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15;
9352    
9353  /* Unless disabled, check whether single character iterators can be  /* Unless disabled, check whether any single character iterators can be
9354  auto-possessified. The function overwrites the appropriate opcode values. */  auto-possessified. The function overwrites the appropriate opcode values, so
9355    the type of the pointer must be cast. NOTE: the intermediate variable "temp" is
9356    used in this code because at least one compiler gives a warning about loss of
9357    "const" attribute if the cast (pcre_uchar *)codestart is used directly in the
9358    function call. */
9359    
9360  if ((options & PCRE_NO_AUTO_POSSESS) == 0)  if ((options & PCRE_NO_AUTO_POSSESS) == 0)
9361    auto_possessify((pcre_uchar *)codestart, utf, cd);    {
9362      pcre_uchar *temp = (pcre_uchar *)codestart;
9363      auto_possessify(temp, utf, cd);
9364      }
9365    
9366  /* If there were any lookbehind assertions that contained OP_RECURSE  /* If there were any lookbehind assertions that contained OP_RECURSE
9367  (recursions or subroutine calls), a flag is set for them to be checked here,  (recursions or subroutine calls), a flag is set for them to be checked here,

Legend:
Removed from v.1414  
changed lines
  Added in v.1538

  ViewVC Help
Powered by ViewVC 1.1.5