/[pcre]/code/branches/pcre16/pcre_compile.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 797 by zherczeg, Sat Dec 10 02:20:06 2011 UTC revision 798 by zherczeg, Sun Dec 11 18:07:25 2011 UTC
# Line 3413  for (;; ptr++) Line 3413  for (;; ptr++)
3413    BOOL is_quantifier;    BOOL is_quantifier;
3414    BOOL is_recurse;    BOOL is_recurse;
3415    BOOL reset_bracount;    BOOL reset_bracount;
3416    int class_charcount;    int class_has_8bitchar;
3417      int class_single_char;
3418    int class_lastchar;    int class_lastchar;
3419    int newoptions;    int newoptions;
3420    int recno;    int recno;
# Line 3710  for (;; ptr++) Line 3711  for (;; ptr++)
3711    
3712      should_flip_negation = FALSE;      should_flip_negation = FALSE;
3713    
3714      /* Keep a count of chars with values < 256 so that we can optimize the case      /* For optimization purposes, we track some properties of the class.
3715      of just a single character (as long as it's < 256). However, For higher      class_has_8bitchar will be non-zero, if the class contains at least one
3716      valued UTF-8 characters, we don't yet do any optimization. */      < 256 character. class_single_char will be 1, if the class only contains
3717        a single character. */
3718    
3719      class_charcount = 0;      class_has_8bitchar = 0;
3720        class_single_char = 0;
3721      class_lastchar = -1;      class_lastchar = -1;
3722    
3723      /* Initialize the 32-char bit map to all zeros. We build the map in a      /* Initialize the 32-char bit map to all zeros. We build the map in a
# Line 3870  for (;; ptr++) Line 3873  for (;; ptr++)
3873            for (c = 0; c < 32; c++) classbits[c] |= pbits[c];            for (c = 0; c < 32; c++) classbits[c] |= pbits[c];
3874    
3875          ptr = tempptr + 1;          ptr = tempptr + 1;
3876          class_charcount = 10;  /* Set > 1; assumes more than 1 per class */          /* Every class contains at least one < 256 characters. */
3877            class_has_8bitchar = 1;
3878            /* Every class contains at least two characters. */
3879            class_single_char = 2;
3880          continue;    /* End of POSIX syntax handling */          continue;    /* End of POSIX syntax handling */
3881          }          }
3882    
3883        /* Backslash may introduce a single character, or it may introduce one        /* Backslash may introduce a single character, or it may introduce one
3884        of the specials, which just set a flag. The sequence \b is a special        of the specials, which just set a flag. The sequence \b is a special
3885        case. Inside a class (and only there) it is treated as backspace. We        case. Inside a class (and only there) it is treated as backspace. We
3886        assume that other escapes have more than one character in them, so set        assume that other escapes have more than one character in them, so
3887        class_charcount bigger than one. Unrecognized escapes fall through and        speculatively set both class_has_8bitchar class_single_char bigger
3888        are either treated as literal characters (by default), or are faulted if        than one. Unrecognized escapes fall through and are either treated
3889          as literal characters (by default), or are faulted if
3890        PCRE_EXTRA is set. */        PCRE_EXTRA is set. */
3891    
3892        if (c == CHAR_BACKSLASH)        if (c == CHAR_BACKSLASH)
# Line 3902  for (;; ptr++) Line 3909  for (;; ptr++)
3909          if (c < 0)          if (c < 0)
3910            {            {
3911            register const pcre_uint8 *cbits = cd->cbits;            register const pcre_uint8 *cbits = cd->cbits;
3912            class_charcount += 2;     /* Greater than 1 is what matters */            /* Every class contains at least two < 256 characters. */
3913              class_has_8bitchar++;
3914              /* Every class contains at least two characters. */
3915              class_single_char += 2;
3916    
3917            switch (-c)            switch (-c)
3918              {              {
# Line 3915  for (;; ptr++) Line 3925  for (;; ptr++)
3925              case ESC_SU:              case ESC_SU:
3926              nestptr = ptr;              nestptr = ptr;
3927              ptr = substitutes[-c - ESC_DU] - 1;  /* Just before substitute */              ptr = substitutes[-c - ESC_DU] - 1;  /* Just before substitute */
3928              class_charcount -= 2;                /* Undo! */              class_has_8bitchar--;                /* Undo! */
3929              continue;              continue;
3930  #endif  #endif
3931              case ESC_d:              case ESC_d:
# Line 4081  for (;; ptr++) Line 4091  for (;; ptr++)
4091                  XCL_PROP : XCL_NOTPROP;                  XCL_PROP : XCL_NOTPROP;
4092                *class_uchardata++ = ptype;                *class_uchardata++ = ptype;
4093                *class_uchardata++ = pdata;                *class_uchardata++ = pdata;
4094                class_charcount -= 2;   /* Not a < 256 character */                class_has_8bitchar--;                /* Undo! */
4095                continue;                continue;
4096                }                }
4097  #endif  #endif
# Line 4095  for (;; ptr++) Line 4105  for (;; ptr++)
4105                *errorcodeptr = ERR7;                *errorcodeptr = ERR7;
4106                goto FAILED;                goto FAILED;
4107                }                }
4108              class_charcount -= 2;  /* Undo the default count from above */              class_has_8bitchar--;    /* Undo the speculative increase. */
4109              c = *ptr;              /* Get the final character and fall through */              class_single_char -= 2;  /* Undo the speculative increase. */
4110                c = *ptr;                /* Get the final character and fall through */
4111              break;              break;
4112              }              }
4113            }            }
4114    
4115          /* Fall through if we have a single character (c >= 0). This may be          /* Fall through if we have a single character (c >= 0). This may be
4116          greater than 256 mode. */          greater than 256. */
4117    
4118          }   /* End of backslash handling */          }   /* End of backslash handling */
4119    
# Line 4195  for (;; ptr++) Line 4206  for (;; ptr++)
4206    
4207          if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;          if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
4208    
4209            /* Since we found a character range, single character optimizations
4210            cannot be done anymore. */
4211            class_single_char = 2;
4212    
4213          /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless          /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless
4214          matching, we have to use an XCLASS with extra data items. Caseless          matching, we have to use an XCLASS with extra data items. Caseless
4215          matching for characters > 127 is available only if UCP support is          matching for characters > 127 is available only if UCP support is
# Line 4323  for (;; ptr++) Line 4338  for (;; ptr++)
4338          /* We use the bit map for 8 bit mode, or when the characters fall          /* We use the bit map for 8 bit mode, or when the characters fall
4339          partially or entirely to [0-255] ([0-127] for UCP) ranges. */          partially or entirely to [0-255] ([0-127] for UCP) ranges. */
4340    
4341          class_charcount += d - c + 1;          class_has_8bitchar = 1;
         class_lastchar = d;  
4342    
4343          /* We can save a bit of time by skipping this in the pre-compile. */          /* We can save a bit of time by skipping this in the pre-compile. */
4344    
# Line 4347  for (;; ptr++) Line 4361  for (;; ptr++)
4361    
4362        LONE_SINGLE_CHARACTER:        LONE_SINGLE_CHARACTER:
4363    
4364        /* Handle a character that cannot go in the bit map */        /* Only the value of 1 matters for class_single_char. */
4365          if (class_single_char < 2) class_single_char++;
4366          class_lastchar = c;
4367    
4368          /* Handle a character that cannot go in the bit map */
4369  #if defined SUPPORT_UTF && !(defined COMPILE_PCRE8)  #if defined SUPPORT_UTF && !(defined COMPILE_PCRE8)
4370        if ((c > 255) || (utf && ((options & PCRE_CASELESS) != 0 && c > 127)))        if ((c > 255) || (utf && ((options & PCRE_CASELESS) != 0 && c > 127)))
4371  #elif defined SUPPORT_UTF  #elif defined SUPPORT_UTF
# Line 4396  for (;; ptr++) Line 4413  for (;; ptr++)
4413  #endif  /* SUPPORT_UTF || COMPILE_PCRE16 */  #endif  /* SUPPORT_UTF || COMPILE_PCRE16 */
4414        /* Handle a single-byte character */        /* Handle a single-byte character */
4415          {          {
4416            class_has_8bitchar = 1;
4417          classbits[c/8] |= (1 << (c&7));          classbits[c/8] |= (1 << (c&7));
4418          if ((options & PCRE_CASELESS) != 0)          if ((options & PCRE_CASELESS) != 0)
4419            {            {
4420            c = cd->fcc[c];   /* flip case */            c = cd->fcc[c];   /* flip case */
4421            classbits[c/8] |= (1 << (c&7));            classbits[c/8] |= (1 << (c&7));
4422            }            }
         class_charcount++;  
         class_lastchar = c;  
4423          }          }
4424    
4425        }        }
# Line 4443  for (;; ptr++) Line 4459  for (;; ptr++)
4459      of reqchar, save the previous value for reinstating. */      of reqchar, save the previous value for reinstating. */
4460    
4461  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
4462      if (class_charcount == 1 && !xclass &&      if (class_single_char == 1 && (!utf || !negate_class
4463        (!utf || !negate_class || class_lastchar < 128))        || class_lastchar < (MAX_VALUE_FOR_SINGLE_CHAR + 1)))
4464  #else  #else
4465      if (class_charcount == 1)      if (class_single_char == 1)
4466  #endif  #endif
4467        {        {
4468        zeroreqchar = reqchar;        zeroreqchar = reqchar;
4469    
4470        /* The OP_NOT[I] opcodes work on one-byte characters only. */        /* The OP_NOT[I] opcodes work on single characters only. */
4471    
4472        if (negate_class)        if (negate_class)
4473          {          {
# Line 4466  for (;; ptr++) Line 4482  for (;; ptr++)
4482        then we can handle this with the normal one-character code. */        then we can handle this with the normal one-character code. */
4483    
4484  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
4485        if (utf && class_lastchar > 127)        if (utf && class_lastchar > MAX_VALUE_FOR_SINGLE_CHAR)
4486          mclength = PRIV(ord2utf)(class_lastchar, mcbuffer);          mclength = PRIV(ord2utf)(class_lastchar, mcbuffer);
4487        else        else
4488  #endif  #endif
# Line 4510  for (;; ptr++) Line 4526  for (;; ptr++)
4526        /* If the map is required, move up the extra data to make room for it;        /* If the map is required, move up the extra data to make room for it;
4527        otherwise just move the code pointer to the end of the extra data. */        otherwise just move the code pointer to the end of the extra data. */
4528    
4529        if (class_charcount > 0)        if (class_has_8bitchar > 0)
4530          {          {
4531          *code++ |= XCL_MAP;          *code++ |= XCL_MAP;
4532          memmove(code + (32 / sizeof(pcre_uchar)), code,          memmove(code + (32 / sizeof(pcre_uchar)), code,
# Line 6686  for (;; ptr++) Line 6702  for (;; ptr++)
6702      handle it as a data character. */      handle it as a data character. */
6703    
6704  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
6705      if (utf && c > 127)      if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
6706        mclength = PRIV(ord2utf)(c, mcbuffer);        mclength = PRIV(ord2utf)(c, mcbuffer);
6707      else      else
6708  #endif  #endif

Legend:
Removed from v.797  
changed lines
  Added in v.798

  ViewVC Help
Powered by ViewVC 1.1.5