/[pcre]/code/branches/pcre16/pcre_compile.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 794 by zherczeg, Thu Dec 8 07:36:41 2011 UTC revision 801 by ph10, Mon Dec 12 16:23:37 2011 UTC
# Line 88  so this number is very generous. Line 88  so this number is very generous.
88  The same workspace is used during the second, actual compile phase for  The same workspace is used during the second, actual compile phase for
89  remembering forward references to groups so that they can be filled in at the  remembering forward references to groups so that they can be filled in at the
90  end. Each entry in this list occupies LINK_SIZE bytes, so even when LINK_SIZE  end. Each entry in this list occupies LINK_SIZE bytes, so even when LINK_SIZE
91  is 4 there is plenty of room. */  is 4 there is plenty of room for most patterns. However, the memory can get
92    filled up by repetitions of forward references, for example patterns like
93    /(?1){0,1999}(b)/, and one user did hit the limit. The code has been changed so
94    that the workspace is expanded using malloc() in this situation. The value
95    below is therefore a minimum, and we put a maximum on it for safety. The
96    minimum is now also defined in terms of LINK_SIZE so that the use of malloc()
97    kicks in at the same number of forward references in all cases. */
98    
99  #define COMPILE_WORK_SIZE (4096)  #define COMPILE_WORK_SIZE (2048*LINK_SIZE)
100    #define COMPILE_WORK_SIZE_MAX (100*COMPILE_WORK_SIZE)
101    
102  /* The overrun tests check for a slightly smaller size so that they detect the  /* The overrun tests check for a slightly smaller size so that they detect the
103  overrun before it actually does run off the end of the data block. */  overrun before it actually does run off the end of the data block. */
104    
105  #define WORK_SIZE_CHECK (COMPILE_WORK_SIZE - 100)  #define WORK_SIZE_SAFETY_MARGIN (100)
106    
107  /* Private flags added to firstchar and reqchar. */  /* Private flags added to firstchar and reqchar. */
108    
# Line 474  static const char error_texts[] = Line 481  static const char error_texts[] =
481    "\\k is not followed by a braced, angle-bracketed, or quoted name\0"    "\\k is not followed by a braced, angle-bracketed, or quoted name\0"
482    /* 70 */    /* 70 */
483    "internal error: unknown opcode in find_fixedlength()\0"    "internal error: unknown opcode in find_fixedlength()\0"
484    "Not allowed UTF-8 / UTF-16 code point (>= 0xd800 && <= 0xdfff)\0"    "\\N is not supported in a class\0"
485      "too many forward references\0"
486      "disallowed UTF-8/16 code point (>= 0xd800 && <= 0xdfff)\0"
487    ;    ;
488    
489  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 649  return s; Line 658  return s;
658    
659    
660  /*************************************************  /*************************************************
661    *           Expand the workspace                 *
662    *************************************************/
663    
664    /* This function is called during the second compiling phase, if the number of
665    forward references fills the existing workspace, which is originally a block on
666    the stack. A larger block is obtained from malloc() unless the ultimate limit
667    has been reached or the increase will be rather small.
668    
669    Argument: pointer to the compile data block
670    Returns:  0 if all went well, else an error number
671    */
672    
673    static int
674    expand_workspace(compile_data *cd)
675    {
676    pcre_uchar *newspace;
677    int newsize = cd->workspace_size * 2;
678    
679    if (newsize > COMPILE_WORK_SIZE_MAX) newsize = COMPILE_WORK_SIZE_MAX;
680    if (cd->workspace_size >= COMPILE_WORK_SIZE_MAX ||
681        newsize - cd->workspace_size < WORK_SIZE_SAFETY_MARGIN)
682     return ERR72;
683    
684    newspace = (pcre_malloc)(newsize);
685    if (newspace == NULL) return ERR21;
686    
687    memcpy(newspace, cd->start_workspace, cd->workspace_size);
688    cd->hwm = (pcre_uchar *)newspace + (cd->hwm - cd->start_workspace);
689    if (cd->workspace_size > COMPILE_WORK_SIZE)
690      (pcre_free)((void *)cd->start_workspace);
691    cd->start_workspace = newspace;
692    cd->workspace_size = newsize;
693    return 0;
694    }
695    
696    
697    
698    /*************************************************
699  *            Check for counted repeat            *  *            Check for counted repeat            *
700  *************************************************/  *************************************************/
701    
# Line 1013  else Line 1060  else
1060    
1061        if (*pt == CHAR_RIGHT_CURLY_BRACKET)        if (*pt == CHAR_RIGHT_CURLY_BRACKET)
1062          {          {
1063          if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR71;          if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
1064          ptr = pt;          ptr = pt;
1065          break;          break;
1066          }          }
# Line 1811  for (;;) Line 1858  for (;;)
1858      cc++;      cc++;
1859      break;      break;
1860    
1861      /* The single-byte matcher isn't allowed. This only happens in UTF-8 mode;      /* The single-byte matcher isn't allowed. This only happens in UTF-8 mode;
1862      otherwise \C is coded as OP_ALLANY. */      otherwise \C is coded as OP_ALLANY. */
1863    
1864      case OP_ANYBYTE:      case OP_ANYBYTE:
# Line 2357  for (code = first_significant_code(code Line 2404  for (code = first_significant_code(code
2404      actual length is stored in the compiled code, so we must update "code"      actual length is stored in the compiled code, so we must update "code"
2405      here. */      here. */
2406    
2407  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2408      case OP_XCLASS:      case OP_XCLASS:
2409      ccode = code += GET(code, 1);      ccode = code += GET(code, 1);
2410      goto CHECK_CLASS_REPEAT;      goto CHECK_CLASS_REPEAT;
# Line 2367  for (code = first_significant_code(code Line 2414  for (code = first_significant_code(code
2414      case OP_NCLASS:      case OP_NCLASS:
2415      ccode = code + PRIV(OP_lengths)[OP_CLASS];      ccode = code + PRIV(OP_lengths)[OP_CLASS];
2416    
2417  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2418      CHECK_CLASS_REPEAT:      CHECK_CLASS_REPEAT:
2419  #endif  #endif
2420    
# Line 2980  the next item is a character. */ Line 3027  the next item is a character. */
3027  if (next >= 0) switch(op_code)  if (next >= 0) switch(op_code)
3028    {    {
3029    case OP_CHAR:    case OP_CHAR:
3030  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3031    GETCHARTEST(c, previous);    GETCHARTEST(c, previous);
3032  #else  #else
3033    c = *previous;    c = *previous;
# Line 2992  if (next >= 0) switch(op_code) Line 3039  if (next >= 0) switch(op_code)
3039    high-valued characters. */    high-valued characters. */
3040    
3041    case OP_CHARI:    case OP_CHARI:
3042  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3043    GETCHARTEST(c, previous);    GETCHARTEST(c, previous);
3044  #else  #else
3045    c = *previous;    c = *previous;
3046  #endif  #endif
3047    if (c == next) return FALSE;    if (c == next) return FALSE;
3048  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3049    if (utf)    if (utf)
3050      {      {
3051      unsigned int othercase;      unsigned int othercase;
# Line 3011  if (next >= 0) switch(op_code) Line 3058  if (next >= 0) switch(op_code)
3058      return (unsigned int)c != othercase;      return (unsigned int)c != othercase;
3059      }      }
3060    else    else
3061  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
3062    return (c != cd->fcc[next]);  /* Non-UTF-8 mode */    return (c != cd->fcc[next]);  /* Non-UTF-8 mode */
3063    
3064    /* For OP_NOT and OP_NOTI, the data is always a single-byte character. These    /* For OP_NOT and OP_NOTI, the data is always a single-byte character. These
# Line 3023  if (next >= 0) switch(op_code) Line 3070  if (next >= 0) switch(op_code)
3070    
3071    case OP_NOTI:    case OP_NOTI:
3072    if ((c = *previous) == next) return TRUE;    if ((c = *previous) == next) return TRUE;
3073  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3074    if (utf)    if (utf)
3075      {      {
3076      unsigned int othercase;      unsigned int othercase;
# Line 3036  if (next >= 0) switch(op_code) Line 3083  if (next >= 0) switch(op_code)
3083      return (unsigned int)c == othercase;      return (unsigned int)c == othercase;
3084      }      }
3085    else    else
3086  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
3087    return (c == cd->fcc[next]);  /* Non-UTF-8 mode */    return (c == cd->fcc[next]);  /* Non-UTF-8 mode */
3088    
3089    /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.    /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.
# Line 3128  switch(op_code) Line 3175  switch(op_code)
3175    {    {
3176    case OP_CHAR:    case OP_CHAR:
3177    case OP_CHARI:    case OP_CHARI:
3178  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3179    GETCHARTEST(c, previous);    GETCHARTEST(c, previous);
3180  #else  #else
3181    c = *previous;    c = *previous;
# Line 3358  pcre_uint8 classbits[32]; Line 3405  pcre_uint8 classbits[32];
3405  must not do this for other options (e.g. PCRE_EXTENDED) because they may change  must not do this for other options (e.g. PCRE_EXTENDED) because they may change
3406  dynamically as we process the pattern. */  dynamically as we process the pattern. */
3407    
3408  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3409  /* PCRE_UTF16 has the same value as PCRE_UTF8. */  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
3410  BOOL utf = (options & PCRE_UTF8) != 0;  BOOL utf = (options & PCRE_UTF8) != 0;
3411  pcre_uchar utf_chars[6];  pcre_uchar utf_chars[6];
# Line 3413  for (;; ptr++) Line 3460  for (;; ptr++)
3460    BOOL is_quantifier;    BOOL is_quantifier;
3461    BOOL is_recurse;    BOOL is_recurse;
3462    BOOL reset_bracount;    BOOL reset_bracount;
3463    int class_charcount;    int class_has_8bitchar;
3464      int class_single_char;
3465    int class_lastchar;    int class_lastchar;
3466    int newoptions;    int newoptions;
3467    int recno;    int recno;
# Line 3448  for (;; ptr++) Line 3496  for (;; ptr++)
3496  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
3497      if (code > cd->hwm) cd->hwm = code;                 /* High water info */      if (code > cd->hwm) cd->hwm = code;                 /* High water info */
3498  #endif  #endif
3499      if (code > cd->start_workspace + WORK_SIZE_CHECK)   /* Check for overrun */      if (code > cd->start_workspace + cd->workspace_size -
3500            WORK_SIZE_SAFETY_MARGIN)                       /* Check for overrun */
3501        {        {
3502        *errorcodeptr = ERR52;        *errorcodeptr = ERR52;
3503        goto FAILED;        goto FAILED;
# Line 3473  for (;; ptr++) Line 3522  for (;; ptr++)
3522      *lengthptr += (int)(code - last_code);      *lengthptr += (int)(code - last_code);
3523      DPRINTF(("length=%d added %d c=%c (0x%x)\n", *lengthptr,      DPRINTF(("length=%d added %d c=%c (0x%x)\n", *lengthptr,
3524        (int)(code - last_code), c, c));        (int)(code - last_code), c, c));
3525    
3526      /* If "previous" is set and it is not at the start of the work space, move      /* If "previous" is set and it is not at the start of the work space, move
3527      it back to there, in order to avoid filling up the work space. Otherwise,      it back to there, in order to avoid filling up the work space. Otherwise,
3528      if "previous" is NULL, reset the current code pointer to the start. */      if "previous" is NULL, reset the current code pointer to the start. */
# Line 3498  for (;; ptr++) Line 3547  for (;; ptr++)
3547    /* In the real compile phase, just check the workspace used by the forward    /* In the real compile phase, just check the workspace used by the forward
3548    reference list. */    reference list. */
3549    
3550    else if (cd->hwm > cd->start_workspace + WORK_SIZE_CHECK)    else if (cd->hwm > cd->start_workspace + cd->workspace_size -
3551               WORK_SIZE_SAFETY_MARGIN)
3552      {      {
3553      *errorcodeptr = ERR52;      *errorcodeptr = ERR52;
3554      goto FAILED;      goto FAILED;
# Line 3710  for (;; ptr++) Line 3760  for (;; ptr++)
3760    
3761      should_flip_negation = FALSE;      should_flip_negation = FALSE;
3762    
3763      /* Keep a count of chars with values < 256 so that we can optimize the case      /* For optimization purposes, we track some properties of the class.
3764      of just a single character (as long as it's < 256). However, For higher      class_has_8bitchar will be non-zero, if the class contains at least one
3765      valued UTF-8 characters, we don't yet do any optimization. */      < 256 character. class_single_char will be 1, if the class only contains
3766        a single character. */
3767    
3768      class_charcount = 0;      class_has_8bitchar = 0;
3769        class_single_char = 0;
3770      class_lastchar = -1;      class_lastchar = -1;
3771    
3772      /* Initialize the 32-char bit map to all zeros. We build the map in a      /* Initialize the 32-char bit map to all zeros. We build the map in a
# Line 3870  for (;; ptr++) Line 3922  for (;; ptr++)
3922            for (c = 0; c < 32; c++) classbits[c] |= pbits[c];            for (c = 0; c < 32; c++) classbits[c] |= pbits[c];
3923    
3924          ptr = tempptr + 1;          ptr = tempptr + 1;
3925          class_charcount = 10;  /* Set > 1; assumes more than 1 per class */          /* Every class contains at least one < 256 characters. */
3926            class_has_8bitchar = 1;
3927            /* Every class contains at least two characters. */
3928            class_single_char = 2;
3929          continue;    /* End of POSIX syntax handling */          continue;    /* End of POSIX syntax handling */
3930          }          }
3931    
3932        /* Backslash may introduce a single character, or it may introduce one        /* Backslash may introduce a single character, or it may introduce one
3933        of the specials, which just set a flag. The sequence \b is a special        of the specials, which just set a flag. The sequence \b is a special
3934        case. Inside a class (and only there) it is treated as backspace. We        case. Inside a class (and only there) it is treated as backspace. We
3935        assume that other escapes have more than one character in them, so set        assume that other escapes have more than one character in them, so
3936        class_charcount bigger than one. Unrecognized escapes fall through and        speculatively set both class_has_8bitchar class_single_char bigger
3937        are either treated as literal characters (by default), or are faulted if        than one. Unrecognized escapes fall through and are either treated
3938          as literal characters (by default), or are faulted if
3939        PCRE_EXTRA is set. */        PCRE_EXTRA is set. */
3940    
3941        if (c == CHAR_BACKSLASH)        if (c == CHAR_BACKSLASH)
# Line 3888  for (;; ptr++) Line 3944  for (;; ptr++)
3944          if (*errorcodeptr != 0) goto FAILED;          if (*errorcodeptr != 0) goto FAILED;
3945    
3946          if (-c == ESC_b) c = CHAR_BS;    /* \b is backspace in a class */          if (-c == ESC_b) c = CHAR_BS;    /* \b is backspace in a class */
3947            else if (-c == ESC_N)            /* \N is not supported in a class */
3948              {
3949              *errorcodeptr = ERR71;
3950              goto FAILED;
3951              }
3952          else if (-c == ESC_Q)            /* Handle start of quoted string */          else if (-c == ESC_Q)            /* Handle start of quoted string */
3953            {            {
3954            if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)            if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
# Line 3902  for (;; ptr++) Line 3963  for (;; ptr++)
3963          if (c < 0)          if (c < 0)
3964            {            {
3965            register const pcre_uint8 *cbits = cd->cbits;            register const pcre_uint8 *cbits = cd->cbits;
3966            class_charcount += 2;     /* Greater than 1 is what matters */            /* Every class contains at least two < 256 characters. */
3967              class_has_8bitchar++;
3968              /* Every class contains at least two characters. */
3969              class_single_char += 2;
3970    
3971            switch (-c)            switch (-c)
3972              {              {
# Line 3915  for (;; ptr++) Line 3979  for (;; ptr++)
3979              case ESC_SU:              case ESC_SU:
3980              nestptr = ptr;              nestptr = ptr;
3981              ptr = substitutes[-c - ESC_DU] - 1;  /* Just before substitute */              ptr = substitutes[-c - ESC_DU] - 1;  /* Just before substitute */
3982              class_charcount -= 2;                /* Undo! */              class_has_8bitchar--;                /* Undo! */
3983              continue;              continue;
3984  #endif  #endif
3985              case ESC_d:              case ESC_d:
# Line 4081  for (;; ptr++) Line 4145  for (;; ptr++)
4145                  XCL_PROP : XCL_NOTPROP;                  XCL_PROP : XCL_NOTPROP;
4146                *class_uchardata++ = ptype;                *class_uchardata++ = ptype;
4147                *class_uchardata++ = pdata;                *class_uchardata++ = pdata;
4148                class_charcount -= 2;   /* Not a < 256 character */                class_has_8bitchar--;                /* Undo! */
4149                continue;                continue;
4150                }                }
4151  #endif  #endif
# Line 4095  for (;; ptr++) Line 4159  for (;; ptr++)
4159                *errorcodeptr = ERR7;                *errorcodeptr = ERR7;
4160                goto FAILED;                goto FAILED;
4161                }                }
4162              class_charcount -= 2;  /* Undo the default count from above */              class_has_8bitchar--;    /* Undo the speculative increase. */
4163              c = *ptr;              /* Get the final character and fall through */              class_single_char -= 2;  /* Undo the speculative increase. */
4164                c = *ptr;                /* Get the final character and fall through */
4165              break;              break;
4166              }              }
4167            }            }
4168    
4169          /* Fall through if we have a single character (c >= 0). This may be          /* Fall through if we have a single character (c >= 0). This may be
4170          greater than 256 mode. */          greater than 256. */
4171    
4172          }   /* End of backslash handling */          }   /* End of backslash handling */
4173    
# Line 4150  for (;; ptr++) Line 4215  for (;; ptr++)
4215            goto LONE_SINGLE_CHARACTER;            goto LONE_SINGLE_CHARACTER;
4216            }            }
4217    
4218  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4219          if (utf)          if (utf)
4220            {                           /* Braces are required because the */            {                           /* Braces are required because the */
4221            GETCHARLEN(d, ptr, ptr);    /* macro generates multiple statements */            GETCHARLEN(d, ptr, ptr);    /* macro generates multiple statements */
# Line 4195  for (;; ptr++) Line 4260  for (;; ptr++)
4260    
4261          if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;          if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
4262    
4263            /* Since we found a character range, single character optimizations
4264            cannot be done anymore. */
4265            class_single_char = 2;
4266    
4267          /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless          /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless
4268          matching, we have to use an XCLASS with extra data items. Caseless          matching, we have to use an XCLASS with extra data items. Caseless
4269          matching for characters > 127 is available only if UCP support is          matching for characters > 127 is available only if UCP support is
4270          available. */          available. */
4271    
4272  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !(defined COMPILE_PCRE8)
4273            if ((d > 255) || (utf && ((options & PCRE_CASELESS) != 0 && d > 127)))
4274    #elif defined  SUPPORT_UTF
4275          if (utf && (d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127)))          if (utf && (d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127)))
4276  #elif !(defined COMPILE_PCRE8)  #elif !(defined COMPILE_PCRE8)
4277          if (d > 255)          if (d > 255)
# Line 4214  for (;; ptr++) Line 4285  for (;; ptr++)
4285            they fit with the basic range. */            they fit with the basic range. */
4286    
4287  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4288    #ifndef COMPILE_PCRE8
4289              if (utf && (options & PCRE_CASELESS) != 0)
4290    #else
4291            if ((options & PCRE_CASELESS) != 0)            if ((options & PCRE_CASELESS) != 0)
4292    #endif
4293              {              {
4294              unsigned int occ, ocd;              unsigned int occ, ocd;
4295              unsigned int cc = c;              unsigned int cc = c;
# Line 4257  for (;; ptr++) Line 4332  for (;; ptr++)
4332    
4333            *class_uchardata++ = XCL_RANGE;            *class_uchardata++ = XCL_RANGE;
4334  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
4335    #ifndef COMPILE_PCRE8
4336              if (utf)
4337                {
4338                class_uchardata += PRIV(ord2utf)(c, class_uchardata);
4339                class_uchardata += PRIV(ord2utf)(d, class_uchardata);
4340                }
4341              else
4342                {
4343                *class_uchardata++ = c;
4344                *class_uchardata++ = d;
4345                }
4346    #else
4347            class_uchardata += PRIV(ord2utf)(c, class_uchardata);            class_uchardata += PRIV(ord2utf)(c, class_uchardata);
4348            class_uchardata += PRIV(ord2utf)(d, class_uchardata);            class_uchardata += PRIV(ord2utf)(d, class_uchardata);
4349  #else  #endif
4350    #else /* SUPPORT_UTF */
4351            *class_uchardata++ = c;            *class_uchardata++ = c;
4352            *class_uchardata++ = d;            *class_uchardata++ = d;
4353  #endif  #endif /* SUPPORT_UTF */
4354    
4355            /* With UCP support, we are done. Without UCP support, there is no            /* With UCP support, we are done. Without UCP support, there is no
4356            caseless matching for UTF characters > 127; we can use the bit map            caseless matching for UTF characters > 127; we can use the bit map
# Line 4270  for (;; ptr++) Line 4358  for (;; ptr++)
4358            can still use  */            can still use  */
4359    
4360  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4361            continue;    /* With next character in the class */  #ifndef COMPILE_PCRE8
4362  #else            if (utf)
4363  #ifdef SUPPORT_UTF  #endif
4364                continue;    /* With next character in the class */
4365    #endif  /* SUPPORT_UCP */
4366    
4367    #if defined SUPPORT_UTF && !defined(SUPPORT_UCP) && !(defined COMPILE_PCRE8)
4368              if (utf)
4369                {
4370                if ((options & PCRE_CASELESS) == 0 || c > 127) continue;
4371                /* Adjust upper limit and fall through to set up the map */
4372                d = 127;
4373                }
4374              else
4375                {
4376                if (c > 255) continue;
4377                /* Adjust upper limit and fall through to set up the map */
4378                d = 255;
4379                }
4380    #elif defined SUPPORT_UTF && !defined(SUPPORT_UCP)
4381            if ((options & PCRE_CASELESS) == 0 || c > 127) continue;            if ((options & PCRE_CASELESS) == 0 || c > 127) continue;
4382            /* Adjust upper limit and fall through to set up the map */            /* Adjust upper limit and fall through to set up the map */
4383            d = 127;            d = 127;
# Line 4280  for (;; ptr++) Line 4385  for (;; ptr++)
4385            if (c > 255) continue;            if (c > 255) continue;
4386            /* Adjust upper limit and fall through to set up the map */            /* Adjust upper limit and fall through to set up the map */
4387            d = 255;            d = 255;
4388  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF && !SUPPORT_UCP && !COMPILE_PCRE8 */
 #endif  /* SUPPORT_UCP */  
4389            }            }
4390  #endif  /* SUPPORT_UTF8 || COMPILE_PCRE16 */  #endif  /* SUPPORT_UTF || !COMPILE_PCRE8 */
4391    
4392          /* We use the bit map for 8 bit mode, or when the characters fall          /* We use the bit map for 8 bit mode, or when the characters fall
4393          partially or entirely to [0-255] ([0-127] for UCP) ranges. */          partially or entirely to [0-255] ([0-127] for UCP) ranges. */
4394    
4395          class_charcount += d - c + 1;          class_has_8bitchar = 1;
         class_lastchar = d;  
4396    
4397          /* We can save a bit of time by skipping this in the pre-compile. */          /* We can save a bit of time by skipping this in the pre-compile. */
4398    
# Line 4312  for (;; ptr++) Line 4415  for (;; ptr++)
4415    
4416        LONE_SINGLE_CHARACTER:        LONE_SINGLE_CHARACTER:
4417    
4418        /* Handle a character that cannot go in the bit map */        /* Only the value of 1 matters for class_single_char. */
4419          if (class_single_char < 2) class_single_char++;
4420          class_lastchar = c;
4421    
4422  #ifdef SUPPORT_UTF        /* Handle a character that cannot go in the bit map */
4423    #if defined SUPPORT_UTF && !(defined COMPILE_PCRE8)
4424          if ((c > 255) || (utf && ((options & PCRE_CASELESS) != 0 && c > 127)))
4425    #elif defined SUPPORT_UTF
4426        if (utf && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127)))        if (utf && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127)))
4427  #elif !(defined COMPILE_PCRE8)  #elif !(defined COMPILE_PCRE8)
4428        if (c > 255)        if (c > 255)
# Line 4324  for (;; ptr++) Line 4432  for (;; ptr++)
4432          xclass = TRUE;          xclass = TRUE;
4433          *class_uchardata++ = XCL_SINGLE;          *class_uchardata++ = XCL_SINGLE;
4434  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
4435          class_uchardata += PRIV(ord2utf)(c, class_uchardata);  #ifndef COMPILE_PCRE8
4436  #else          /* In non 8 bit mode, we can get here even
4437          *class_uchardata++ = c;          if we are not in UTF mode. */
4438            if (!utf)
4439              *class_uchardata++ = c;
4440            else
4441  #endif  #endif
4442              class_uchardata += PRIV(ord2utf)(c, class_uchardata);
4443    #else /* SUPPORT_UTF */
4444            *class_uchardata++ = c;
4445    #endif /* SUPPORT_UTF */
4446    
4447  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4448    #ifdef COMPILE_PCRE8
4449          if ((options & PCRE_CASELESS) != 0)          if ((options & PCRE_CASELESS) != 0)
4450    #else
4451            /* In non 8 bit mode, we can get here even
4452            if we are not in UTF mode. */
4453            if (utf && (options & PCRE_CASELESS) != 0)
4454    #endif
4455            {            {
4456            unsigned int othercase;            unsigned int othercase;
4457            if ((othercase = UCD_OTHERCASE(c)) != c)            if ((othercase = UCD_OTHERCASE(c)) != c)
4458              {              {
4459              *class_uchardata++ = XCL_SINGLE;              *class_uchardata++ = XCL_SINGLE;
4460              class_uchardata += PRIV(ord2utf)(othercase, class_uchardata);              class_uchardata += PRIV(ord2utf)(othercase, class_uchardata);
4461    
4462                /* In the first pass, we must accumulate the space used here for
4463                the following reason: If this ends up as the only character in the
4464                class, it will later be optimized down to a single character.
4465                However, that uses less memory, and so if this happens to be at the
4466                end of the regex, there will not be enough memory in the real
4467                compile for this temporary storage. */
4468    
4469                if (lengthptr != NULL)
4470                  {
4471                  *lengthptr += class_uchardata - class_uchardata_base;
4472                  class_uchardata = class_uchardata_base;
4473                  }
4474              }              }
4475            }            }
4476  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
# Line 4346  for (;; ptr++) Line 4480  for (;; ptr++)
4480  #endif  /* SUPPORT_UTF || COMPILE_PCRE16 */  #endif  /* SUPPORT_UTF || COMPILE_PCRE16 */
4481        /* Handle a single-byte character */        /* Handle a single-byte character */
4482          {          {
4483            class_has_8bitchar = 1;
4484          classbits[c/8] |= (1 << (c&7));          classbits[c/8] |= (1 << (c&7));
4485          if ((options & PCRE_CASELESS) != 0)          if ((options & PCRE_CASELESS) != 0)
4486            {            {
4487            c = cd->fcc[c];   /* flip case */            c = cd->fcc[c];   /* flip case */
4488            classbits[c/8] |= (1 << (c&7));            classbits[c/8] |= (1 << (c&7));
4489            }            }
         class_charcount++;  
         class_lastchar = c;  
4490          }          }
4491    
4492        }        }
# Line 4375  for (;; ptr++) Line 4508  for (;; ptr++)
4508        goto FAILED;        goto FAILED;
4509        }        }
4510    
4511      /* If class_charcount is 1, we saw precisely one character whose value is      /* COMMENT NEEDS FIXING - no longer true.
4512        If class_charcount is 1, we saw precisely one character whose value is
4513      less than 256. As long as there were no characters >= 128 and there was no      less than 256. As long as there were no characters >= 128 and there was no
4514      use of \p or \P, in other words, no use of any XCLASS features, we can      use of \p or \P, in other words, no use of any XCLASS features, we can
4515      optimize.      optimize.
# Line 4391  for (;; ptr++) Line 4525  for (;; ptr++)
4525      case, it can cause firstchar to be set. Otherwise, there can be no first      case, it can cause firstchar to be set. Otherwise, there can be no first
4526      char if this item is first, whatever repeat count may follow. In the case      char if this item is first, whatever repeat count may follow. In the case
4527      of reqchar, save the previous value for reinstating. */      of reqchar, save the previous value for reinstating. */
4528    
4529  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
4530      if (class_charcount == 1 && !xclass &&      if (class_single_char == 1 && (!utf || !negate_class
4531        (!utf || !negate_class || class_lastchar < 128))        || class_lastchar < (MAX_VALUE_FOR_SINGLE_CHAR + 1)))
4532  #else  #else
4533      if (class_charcount == 1)      if (class_single_char == 1)
4534  #endif  #endif
4535        {        {
4536        zeroreqchar = reqchar;        zeroreqchar = reqchar;
4537    
4538        /* The OP_NOT[I] opcodes work on one-byte characters only. */        /* The OP_NOT[I] opcodes work on single characters only. */
4539    
4540        if (negate_class)        if (negate_class)
4541          {          {
# Line 4415  for (;; ptr++) Line 4549  for (;; ptr++)
4549        /* For a single, positive character, get the value into mcbuffer, and        /* For a single, positive character, get the value into mcbuffer, and
4550        then we can handle this with the normal one-character code. */        then we can handle this with the normal one-character code. */
4551    
4552  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4553        if (utf && class_lastchar > 127)        if (utf && class_lastchar > MAX_VALUE_FOR_SINGLE_CHAR)
4554          mclength = PRIV(ord2utf)(class_lastchar, mcbuffer);          mclength = PRIV(ord2utf)(class_lastchar, mcbuffer);
4555        else        else
4556  #endif  #endif
# Line 4460  for (;; ptr++) Line 4594  for (;; ptr++)
4594        /* If the map is required, move up the extra data to make room for it;        /* If the map is required, move up the extra data to make room for it;
4595        otherwise just move the code pointer to the end of the extra data. */        otherwise just move the code pointer to the end of the extra data. */
4596    
4597        if (class_charcount > 0)        if (class_has_8bitchar > 0)
4598          {          {
4599          *code++ |= XCL_MAP;          *code++ |= XCL_MAP;
4600          memmove(code + (32 / sizeof(pcre_uchar)), code,          memmove(code + (32 / sizeof(pcre_uchar)), code,
# Line 4472  for (;; ptr++) Line 4606  for (;; ptr++)
4606    
4607        /* Now fill in the complete length of the item */        /* Now fill in the complete length of the item */
4608    
4609        PUT(previous, 1, code - previous);        PUT(previous, 1, (int)(code - previous));
4610        break;   /* End of class handling */        break;   /* End of class handling */
4611        }        }
4612  #endif  #endif
# Line 4567  for (;; ptr++) Line 4701  for (;; ptr++)
4701      past, but it no longer happens for non-repeated recursions. In fact, the      past, but it no longer happens for non-repeated recursions. In fact, the
4702      repeated ones could be re-implemented independently so as not to need this,      repeated ones could be re-implemented independently so as not to need this,
4703      but for the moment we rely on the code for repeating groups. */      but for the moment we rely on the code for repeating groups. */
4704    
4705      if (*previous == OP_RECURSE)      if (*previous == OP_RECURSE)
4706        {        {
4707        memmove(previous + 1 + LINK_SIZE, previous, IN_UCHARS(1 + LINK_SIZE));        memmove(previous + 1 + LINK_SIZE, previous, IN_UCHARS(1 + LINK_SIZE));
# Line 4611  for (;; ptr++) Line 4745  for (;; ptr++)
4745          {          {
4746          pcre_uchar *lastchar = code - 1;          pcre_uchar *lastchar = code - 1;
4747          BACKCHAR(lastchar);          BACKCHAR(lastchar);
4748          c = code - lastchar;            /* Length of UTF-8 character */          c = (int)(code - lastchar);     /* Length of UTF-8 character */
4749          memcpy(utf_chars, lastchar, IN_UCHARS(c)); /* Save the char */          memcpy(utf_chars, lastchar, IN_UCHARS(c)); /* Save the char */
4750          c |= UTF_LENGTH;                /* Flag c as a length */          c |= UTF_LENGTH;                /* Flag c as a length */
4751          }          }
# Line 4843  for (;; ptr++) Line 4977  for (;; ptr++)
4977    
4978      else if (*previous == OP_CLASS ||      else if (*previous == OP_CLASS ||
4979               *previous == OP_NCLASS ||               *previous == OP_NCLASS ||
4980  #if defined SUPPORT_UTF8 || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4981               *previous == OP_XCLASS ||               *previous == OP_XCLASS ||
4982  #endif  #endif
4983               *previous == OP_REF ||               *previous == OP_REF ||
# Line 5017  for (;; ptr++) Line 5151  for (;; ptr++)
5151              *lengthptr += delta;              *lengthptr += delta;
5152              }              }
5153    
5154            /* This is compiling for real */            /* This is compiling for real. If there is a set first byte for
5155              the group, and we have not yet set a "required byte", set it. Make
5156              sure there is enough workspace for copying forward references before
5157              doing the copy. */
5158    
5159            else            else
5160              {              {
5161              if (groupsetfirstchar && reqchar < 0) reqchar = firstchar;              if (groupsetfirstchar && reqchar < 0) reqchar = firstchar;
5162    
5163              for (i = 1; i < repeat_min; i++)              for (i = 1; i < repeat_min; i++)
5164                {                {
5165                pcre_uchar *hc;                pcre_uchar *hc;
5166                pcre_uchar *this_hwm = cd->hwm;                pcre_uchar *this_hwm = cd->hwm;
5167                memcpy(code, previous, IN_UCHARS(len));                memcpy(code, previous, IN_UCHARS(len));
5168    
5169                  while (cd->hwm > cd->start_workspace + cd->workspace_size -
5170                         WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm))
5171                    {
5172                    int save_offset = save_hwm - cd->start_workspace;
5173                    int this_offset = this_hwm - cd->start_workspace;
5174                    *errorcodeptr = expand_workspace(cd);
5175                    if (*errorcodeptr != 0) goto FAILED;
5176                    save_hwm = (pcre_uchar *)cd->start_workspace + save_offset;
5177                    this_hwm = (pcre_uchar *)cd->start_workspace + this_offset;
5178                    }
5179    
5180                for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)                for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
5181                  {                  {
5182                  PUT(cd->hwm, 0, GET(hc, 0) + len);                  PUT(cd->hwm, 0, GET(hc, 0) + len);
# Line 5094  for (;; ptr++) Line 5244  for (;; ptr++)
5244              }              }
5245    
5246            memcpy(code, previous, IN_UCHARS(len));            memcpy(code, previous, IN_UCHARS(len));
5247    
5248              /* Ensure there is enough workspace for forward references before
5249              copying them. */
5250    
5251              while (cd->hwm > cd->start_workspace + cd->workspace_size -
5252                     WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm))
5253                {
5254                int save_offset = save_hwm - cd->start_workspace;
5255                int this_offset = this_hwm - cd->start_workspace;
5256                *errorcodeptr = expand_workspace(cd);
5257                if (*errorcodeptr != 0) goto FAILED;
5258                save_hwm = (pcre_uchar *)cd->start_workspace + save_offset;
5259                this_hwm = (pcre_uchar *)cd->start_workspace + this_offset;
5260                }
5261    
5262            for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)            for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
5263              {              {
5264              PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));              PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));
# Line 5124  for (;; ptr++) Line 5289  for (;; ptr++)
5289        ONCE brackets can be converted into non-capturing brackets, as the        ONCE brackets can be converted into non-capturing brackets, as the
5290        behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to        behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to
5291        deal with possessive ONCEs specially.        deal with possessive ONCEs specially.
5292    
5293        Otherwise, when we are doing the actual compile phase, check to see        Otherwise, when we are doing the actual compile phase, check to see
5294        whether this group is one that could match an empty string. If so,        whether this group is one that could match an empty string. If so,
5295        convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so        convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so
5296        that runtime checking can be done. [This check is also applied to ONCE        that runtime checking can be done. [This check is also applied to ONCE
5297        groups at runtime, but in a different way.]        groups at runtime, but in a different way.]
5298    
5299        Then, if the quantifier was possessive and the bracket is not a        Then, if the quantifier was possessive and the bracket is not a
5300        conditional, we convert the BRA code to the POS form, and the KET code to        conditional, we convert the BRA code to the POS form, and the KET code to
5301        KETRPOS. (It turns out to be convenient at runtime to detect this kind of        KETRPOS. (It turns out to be convenient at runtime to detect this kind of
5302        subpattern at both the start and at the end.) The use of special opcodes        subpattern at both the start and at the end.) The use of special opcodes
5303        makes it possible to reduce greatly the stack usage in pcre_exec(). If        makes it possible to reduce greatly the stack usage in pcre_exec(). If
5304        the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO.        the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO.
5305    
5306        Then, if the minimum number of matches is 1 or 0, cancel the possessive        Then, if the minimum number of matches is 1 or 0, cancel the possessive
5307        flag so that the default action below, of wrapping everything inside        flag so that the default action below, of wrapping everything inside
5308        atomic brackets, does not happen. When the minimum is greater than 1,        atomic brackets, does not happen. When the minimum is greater than 1,
5309        there will be earlier copies of the group, and so we still have to wrap        there will be earlier copies of the group, and so we still have to wrap
5310        the whole thing. */        the whole thing. */
5311    
5312        else        else
# Line 5150  for (;; ptr++) Line 5315  for (;; ptr++)
5315          pcre_uchar *bracode = ketcode - GET(ketcode, 1);          pcre_uchar *bracode = ketcode - GET(ketcode, 1);
5316    
5317          /* Convert possessive ONCE brackets to non-capturing */          /* Convert possessive ONCE brackets to non-capturing */
5318    
5319          if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) &&          if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) &&
5320              possessive_quantifier) *bracode = OP_BRA;              possessive_quantifier) *bracode = OP_BRA;
5321    
5322          /* For non-possessive ONCE brackets, all we need to do is to          /* For non-possessive ONCE brackets, all we need to do is to
5323          set the KET. */          set the KET. */
5324    
5325          if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC)          if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC)
5326            *ketcode = OP_KETRMAX + repeat_type;            *ketcode = OP_KETRMAX + repeat_type;
5327    
5328          /* Handle non-ONCE brackets and possessive ONCEs (which have been          /* Handle non-ONCE brackets and possessive ONCEs (which have been
5329          converted to non-capturing above). */          converted to non-capturing above). */
5330    
5331          else          else
5332            {            {
5333            /* In the compile phase, check for empty string matching. */            /* In the compile phase, check for empty string matching. */
5334    
5335            if (lengthptr == NULL)            if (lengthptr == NULL)
5336              {              {
5337              pcre_uchar *scode = bracode;              pcre_uchar *scode = bracode;
# Line 5181  for (;; ptr++) Line 5346  for (;; ptr++)
5346                }                }
5347              while (*scode == OP_ALT);              while (*scode == OP_ALT);
5348              }              }
5349    
5350            /* Handle possessive quantifiers. */            /* Handle possessive quantifiers. */
5351    
5352            if (possessive_quantifier)            if (possessive_quantifier)
# Line 5190  for (;; ptr++) Line 5355  for (;; ptr++)
5355              repeated non-capturing bracket, because we have not invented POS              repeated non-capturing bracket, because we have not invented POS
5356              versions of the COND opcodes. Because we are moving code along, we              versions of the COND opcodes. Because we are moving code along, we
5357              must ensure that any pending recursive references are updated. */              must ensure that any pending recursive references are updated. */
5358    
5359              if (*bracode == OP_COND || *bracode == OP_SCOND)              if (*bracode == OP_COND || *bracode == OP_SCOND)
5360                {                {
5361                int nlen = (int)(code - bracode);                int nlen = (int)(code - bracode);
# Line 5203  for (;; ptr++) Line 5368  for (;; ptr++)
5368                *code++ = OP_KETRPOS;                *code++ = OP_KETRPOS;
5369                PUTINC(code, 0, nlen);                PUTINC(code, 0, nlen);
5370                PUT(bracode, 1, nlen);                PUT(bracode, 1, nlen);
5371                }                }
5372    
5373              /* For non-COND brackets, we modify the BRA code and use KETRPOS. */              /* For non-COND brackets, we modify the BRA code and use KETRPOS. */
5374    
5375              else              else
5376                {                {
5377                *bracode += 1;              /* Switch to xxxPOS opcodes */                *bracode += 1;              /* Switch to xxxPOS opcodes */
5378                *ketcode = OP_KETRPOS;                *ketcode = OP_KETRPOS;
5379                }                }
5380    
5381              /* If the minimum is zero, mark it as possessive, then unset the              /* If the minimum is zero, mark it as possessive, then unset the
5382              possessive flag when the minimum is 0 or 1. */              possessive flag when the minimum is 0 or 1. */
5383    
5384              if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO;              if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO;
5385              if (repeat_min < 2) possessive_quantifier = FALSE;              if (repeat_min < 2) possessive_quantifier = FALSE;
5386              }              }
5387    
5388            /* Non-possessive quantifier */            /* Non-possessive quantifier */
5389    
5390            else *ketcode = OP_KETRMAX + repeat_type;            else *ketcode = OP_KETRMAX + repeat_type;
5391            }            }
5392          }          }
# Line 6114  for (;; ptr++) Line 6279  for (;; ptr++)
6279                of the group. Then remember the forward reference. */                of the group. Then remember the forward reference. */
6280    
6281                called = cd->start_code + recno;                called = cd->start_code + recno;
6282                  if (cd->hwm >= cd->start_workspace + cd->workspace_size -
6283                      WORK_SIZE_SAFETY_MARGIN)
6284                    {
6285                    *errorcodeptr = expand_workspace(cd);
6286                    if (*errorcodeptr != 0) goto FAILED;
6287                    }
6288                PUTINC(cd->hwm, 0, (int)(code + 1 - cd->start_code));                PUTINC(cd->hwm, 0, (int)(code + 1 - cd->start_code));
6289                }                }
6290    
# Line 6134  for (;; ptr++) Line 6305  for (;; ptr++)
6305                }                }
6306              }              }
6307    
6308            /* Insert the recursion/subroutine item. */            /* Insert the recursion/subroutine item. It does not have a set first
6309              character (relevant if it is repeated, because it will then be
6310              wrapped with ONCE brackets). */
6311    
6312            *code = OP_RECURSE;            *code = OP_RECURSE;
6313            PUT(code, 1, (int)(called - cd->start_code));            PUT(code, 1, (int)(called - cd->start_code));
6314            code += 1 + LINK_SIZE;            code += 1 + LINK_SIZE;
6315              groupsetfirstchar = FALSE;
6316            }            }
6317    
6318          /* Can't determine a first byte now */          /* Can't determine a first byte now */
# Line 6622  for (;; ptr++) Line 6796  for (;; ptr++)
6796  #endif  #endif
6797          /* In non-UTF-8 mode, we turn \C into OP_ALLANY instead of OP_ANYBYTE          /* In non-UTF-8 mode, we turn \C into OP_ALLANY instead of OP_ANYBYTE
6798          so that it works in DFA mode and in lookbehinds. */          so that it works in DFA mode and in lookbehinds. */
6799    
6800            {            {
6801            previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;            previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
6802            *code++ = (!utf && c == -ESC_C)? OP_ALLANY : -c;            *code++ = (!utf && c == -ESC_C)? OP_ALLANY : -c;
6803            }            }
# Line 6635  for (;; ptr++) Line 6809  for (;; ptr++)
6809      a value > 127. We set its representation in the length/buffer, and then      a value > 127. We set its representation in the length/buffer, and then
6810      handle it as a data character. */      handle it as a data character. */
6811    
6812  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
6813      if (utf && c > 127)      if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
6814        mclength = PRIV(ord2utf)(c, mcbuffer);        mclength = PRIV(ord2utf)(c, mcbuffer);
6815      else      else
6816  #endif  #endif
# Line 7376  compile_data *cd = &compile_block; Line 7550  compile_data *cd = &compile_block;
7550  computing the amount of memory that is needed. Compiled items are thrown away  computing the amount of memory that is needed. Compiled items are thrown away
7551  as soon as possible, so that a fairly large buffer should be sufficient for  as soon as possible, so that a fairly large buffer should be sufficient for
7552  this purpose. The same space is used in the second phase for remembering where  this purpose. The same space is used in the second phase for remembering where
7553  to fill in forward references to subpatterns. */  to fill in forward references to subpatterns. That may overflow, in which case
7554    new memory is obtained from malloc(). */
7555    
7556  pcre_uchar cworkspace[COMPILE_WORK_SIZE];  pcre_uchar cworkspace[COMPILE_WORK_SIZE];
7557    
# Line 7471  while (ptr[skipatstart] == CHAR_LEFT_PAR Line 7646  while (ptr[skipatstart] == CHAR_LEFT_PAR
7646  /* PCRE_UTF16 has the same value as PCRE_UTF8. */  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
7647  utf = (options & PCRE_UTF8) != 0;  utf = (options & PCRE_UTF8) != 0;
7648    
7649  /* Can't support UTF8 unless PCRE has been compiled to include the code. The  /* Can't support UTF unless PCRE has been compiled to include the code. The
7650  return of an error code from PRIV(valid_utf)() is a new feature, introduced in  return of an error code from PRIV(valid_utf)() is a new feature, introduced in
7651  release 8.13. It is passed back from pcre_[dfa_]exec(), but at the moment is  release 8.13. It is passed back from pcre_[dfa_]exec(), but at the moment is
7652  not used here. */  not used here. */
7653    
7654  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
7655  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0 &&  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0 &&
7656       (errorcode = PRIV(valid_utf)((PCRE_PUCHAR)pattern, -1, erroroffset)) != 0)       (errorcode = PRIV(valid_utf)((PCRE_PUCHAR)pattern, -1, erroroffset)) != 0)
7657    {    {
# Line 7576  cd->bracount = cd->final_bracount = 0; Line 7751  cd->bracount = cd->final_bracount = 0;
7751  cd->names_found = 0;  cd->names_found = 0;
7752  cd->name_entry_size = 0;  cd->name_entry_size = 0;
7753  cd->name_table = NULL;  cd->name_table = NULL;
 cd->start_workspace = cworkspace;  
7754  cd->start_code = cworkspace;  cd->start_code = cworkspace;
7755  cd->hwm = cworkspace;  cd->hwm = cworkspace;
7756    cd->start_workspace = cworkspace;
7757    cd->workspace_size = COMPILE_WORK_SIZE;
7758  cd->start_pattern = (const pcre_uchar *)pattern;  cd->start_pattern = (const pcre_uchar *)pattern;
7759  cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));  cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));
7760  cd->req_varyopt = 0;  cd->req_varyopt = 0;
# Line 7656  cd->names_found = 0; Line 7832  cd->names_found = 0;
7832  cd->name_table = (pcre_uchar *)re + re->name_table_offset;  cd->name_table = (pcre_uchar *)re + re->name_table_offset;
7833  codestart = cd->name_table + re->name_entry_size * re->name_count;  codestart = cd->name_table + re->name_entry_size * re->name_count;
7834  cd->start_code = codestart;  cd->start_code = codestart;
7835  cd->hwm = cworkspace;  cd->hwm = (pcre_uchar *)(cd->start_workspace);
7836  cd->req_varyopt = 0;  cd->req_varyopt = 0;
7837  cd->had_accept = FALSE;  cd->had_accept = FALSE;
7838  cd->check_lookbehind = FALSE;  cd->check_lookbehind = FALSE;
# Line 7673  code = (pcre_uchar *)codestart; Line 7849  code = (pcre_uchar *)codestart;
7849    &firstchar, &reqchar, NULL, cd, NULL);    &firstchar, &reqchar, NULL, cd, NULL);
7850  re->top_bracket = cd->bracount;  re->top_bracket = cd->bracount;
7851  re->top_backref = cd->top_backref;  re->top_backref = cd->top_backref;
7852  re->flags = cd->external_flags;  re->flags = cd->external_flags | PCRE_MODE;
7853    
7854  if (cd->had_accept) reqchar = REQ_NONE;   /* Must disable after (*ACCEPT) */  if (cd->had_accept) reqchar = REQ_NONE;   /* Must disable after (*ACCEPT) */
7855    
# Line 7690  if debugging, leave the test till after Line 7866  if debugging, leave the test till after
7866  if (code - codestart > length) errorcode = ERR23;  if (code - codestart > length) errorcode = ERR23;
7867  #endif  #endif
7868    
7869  /* Fill in any forward references that are required. */  /* Fill in any forward references that are required. There may be repeated
7870    references; optimize for them, as searching a large regex takes time. */
7871    
7872  while (errorcode == 0 && cd->hwm > cworkspace)  if (cd->hwm > cd->start_workspace)
7873    {    {
7874    int offset, recno;    int prev_recno = -1;
7875    const pcre_uchar *groupptr;    const pcre_uchar *groupptr = NULL;
7876    cd->hwm -= LINK_SIZE;    while (errorcode == 0 && cd->hwm > cd->start_workspace)
7877    offset = GET(cd->hwm, 0);      {
7878    recno = GET(codestart, offset);      int offset, recno;
7879    groupptr = PRIV(find_bracket)(codestart, utf, recno);      cd->hwm -= LINK_SIZE;
7880    if (groupptr == NULL) errorcode = ERR53;      offset = GET(cd->hwm, 0);
7881      else PUT(((pcre_uchar *)codestart), offset, (int)(groupptr - codestart));      recno = GET(codestart, offset);
7882        if (recno != prev_recno)
7883          {
7884          groupptr = PRIV(find_bracket)(codestart, utf, recno);
7885          prev_recno = recno;
7886          }
7887        if (groupptr == NULL) errorcode = ERR53;
7888          else PUT(((pcre_uchar *)codestart), offset, (int)(groupptr - codestart));
7889        }
7890    }    }
7891    
7892    /* If the workspace had to be expanded, free the new memory. */
7893    
7894    if (cd->workspace_size > COMPILE_WORK_SIZE)
7895      (pcre_free)((void *)cd->start_workspace);
7896    
7897  /* Give an error if there's back reference to a non-existent capturing  /* Give an error if there's back reference to a non-existent capturing
7898  subpattern. */  subpattern. */
7899    

Legend:
Removed from v.794  
changed lines
  Added in v.801

  ViewVC Help
Powered by ViewVC 1.1.5