/[pcre]/code/branches/pcre16/pcre_compile.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 801 by ph10, Mon Dec 12 16:23:37 2011 UTC revision 802 by ph10, Tue Dec 13 09:52:20 2011 UTC
# Line 3762  for (;; ptr++) Line 3762  for (;; ptr++)
3762    
3763      /* For optimization purposes, we track some properties of the class.      /* For optimization purposes, we track some properties of the class.
3764      class_has_8bitchar will be non-zero, if the class contains at least one      class_has_8bitchar will be non-zero, if the class contains at least one
3765      < 256 character. class_single_char will be 1, if the class only contains      < 256 character. class_single_char will be 1 if the class contains only
3766      a single character. */      a single character. */
3767    
3768      class_has_8bitchar = 0;      class_has_8bitchar = 0;
# Line 3933  for (;; ptr++) Line 3933  for (;; ptr++)
3933        of the specials, which just set a flag. The sequence \b is a special        of the specials, which just set a flag. The sequence \b is a special
3934        case. Inside a class (and only there) it is treated as backspace. We        case. Inside a class (and only there) it is treated as backspace. We
3935        assume that other escapes have more than one character in them, so        assume that other escapes have more than one character in them, so
3936        speculatively set both class_has_8bitchar class_single_char bigger        speculatively set both class_has_8bitchar and class_single_char bigger
3937        than one. Unrecognized escapes fall through and are either treated        than one. Unrecognized escapes fall through and are either treated
3938        as literal characters (by default), or are faulted if        as literal characters (by default), or are faulted if
3939        PCRE_EXTRA is set. */        PCRE_EXTRA is set. */
# Line 4420  for (;; ptr++) Line 4420  for (;; ptr++)
4420        class_lastchar = c;        class_lastchar = c;
4421    
4422        /* Handle a character that cannot go in the bit map */        /* Handle a character that cannot go in the bit map */
4423    
4424  #if defined SUPPORT_UTF && !(defined COMPILE_PCRE8)  #if defined SUPPORT_UTF && !(defined COMPILE_PCRE8)
4425        if ((c > 255) || (utf && ((options & PCRE_CASELESS) != 0 && c > 127)))        if ((c > 255) || (utf && ((options & PCRE_CASELESS) != 0 && c > 127)))
4426  #elif defined SUPPORT_UTF  #elif defined SUPPORT_UTF
# Line 4427  for (;; ptr++) Line 4428  for (;; ptr++)
4428  #elif !(defined COMPILE_PCRE8)  #elif !(defined COMPILE_PCRE8)
4429        if (c > 255)        if (c > 255)
4430  #endif  #endif
4431    
4432  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
4433          {          {
4434          xclass = TRUE;          xclass = TRUE;
4435          *class_uchardata++ = XCL_SINGLE;          *class_uchardata++ = XCL_SINGLE;
4436  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
4437  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
4438          /* In non 8 bit mode, we can get here even          /* In non 8 bit mode, we can get here even if we are not in UTF mode. */
4439          if we are not in UTF mode. */          if (!utf)
         if (!utf)  
4440            *class_uchardata++ = c;            *class_uchardata++ = c;
4441          else          else
4442  #endif  #endif
# Line 4448  for (;; ptr++) Line 4449  for (;; ptr++)
4449  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
4450          if ((options & PCRE_CASELESS) != 0)          if ((options & PCRE_CASELESS) != 0)
4451  #else  #else
4452          /* In non 8 bit mode, we can get here even          /* In non 8 bit mode, we can get here even if we are not in UTF mode. */
         if we are not in UTF mode. */  
4453          if (utf && (options & PCRE_CASELESS) != 0)          if (utf && (options & PCRE_CASELESS) != 0)
4454  #endif  #endif
4455            {            {
# Line 4465  for (;; ptr++) Line 4465  for (;; ptr++)
4465              However, that uses less memory, and so if this happens to be at the              However, that uses less memory, and so if this happens to be at the
4466              end of the regex, there will not be enough memory in the real              end of the regex, there will not be enough memory in the real
4467              compile for this temporary storage. */              compile for this temporary storage. */
4468    
4469              if (lengthptr != NULL)              if (lengthptr != NULL)
4470                {                {
4471                *lengthptr += class_uchardata - class_uchardata_base;                *lengthptr += class_uchardata - class_uchardata_base;
# Line 4478  for (;; ptr++) Line 4478  for (;; ptr++)
4478          }          }
4479        else        else
4480  #endif  /* SUPPORT_UTF || COMPILE_PCRE16 */  #endif  /* SUPPORT_UTF || COMPILE_PCRE16 */
4481    
4482        /* Handle a single-byte character */        /* Handle a single-byte character */
4483          {          {
4484          class_has_8bitchar = 1;          class_has_8bitchar = 1;
# Line 4488  for (;; ptr++) Line 4489  for (;; ptr++)
4489            classbits[c/8] |= (1 << (c&7));            classbits[c/8] |= (1 << (c&7));
4490            }            }
4491          }          }
   
4492        }        }
4493    
4494      /* Loop until ']' reached. This "while" is the end of the "do" far above.      /* Loop until ']' reached. This "while" is the end of the "do" far above.
# Line 4508  for (;; ptr++) Line 4508  for (;; ptr++)
4508        goto FAILED;        goto FAILED;
4509        }        }
4510    
4511      /* COMMENT NEEDS FIXING - no longer true.      /* If class_charcount is 1, we saw precisely one character. As long as
4512      If class_charcount is 1, we saw precisely one character whose value is      there were no negated characters >= 128 and there was no use of \p or \P,
4513      less than 256. As long as there were no characters >= 128 and there was no      in other words, no use of any XCLASS features, we can optimize.
     use of \p or \P, in other words, no use of any XCLASS features, we can  
     optimize.  
4514    
4515      In UTF-8 mode, we can optimize the negative case only if there were no      In UTF-8 mode, we can optimize the negative case only if there were no
4516      characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR      characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR

Legend:
Removed from v.801  
changed lines
  Added in v.802

  ViewVC Help
Powered by ViewVC 1.1.5