/[pcre]/code/branches/pcre16/pcre_compile.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 794 by zherczeg, Thu Dec 8 07:36:41 2011 UTC revision 795 by zherczeg, Sat Dec 10 02:20:06 2011 UTC
# Line 2357  for (code = first_significant_code(code Line 2357  for (code = first_significant_code(code
2357      actual length is stored in the compiled code, so we must update "code"      actual length is stored in the compiled code, so we must update "code"
2358      here. */      here. */
2359    
2360  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2361      case OP_XCLASS:      case OP_XCLASS:
2362      ccode = code += GET(code, 1);      ccode = code += GET(code, 1);
2363      goto CHECK_CLASS_REPEAT;      goto CHECK_CLASS_REPEAT;
# Line 2367  for (code = first_significant_code(code Line 2367  for (code = first_significant_code(code
2367      case OP_NCLASS:      case OP_NCLASS:
2368      ccode = code + PRIV(OP_lengths)[OP_CLASS];      ccode = code + PRIV(OP_lengths)[OP_CLASS];
2369    
2370  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2371      CHECK_CLASS_REPEAT:      CHECK_CLASS_REPEAT:
2372  #endif  #endif
2373    
# Line 2980  the next item is a character. */ Line 2980  the next item is a character. */
2980  if (next >= 0) switch(op_code)  if (next >= 0) switch(op_code)
2981    {    {
2982    case OP_CHAR:    case OP_CHAR:
2983  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2984    GETCHARTEST(c, previous);    GETCHARTEST(c, previous);
2985  #else  #else
2986    c = *previous;    c = *previous;
# Line 2992  if (next >= 0) switch(op_code) Line 2992  if (next >= 0) switch(op_code)
2992    high-valued characters. */    high-valued characters. */
2993    
2994    case OP_CHARI:    case OP_CHARI:
2995  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2996    GETCHARTEST(c, previous);    GETCHARTEST(c, previous);
2997  #else  #else
2998    c = *previous;    c = *previous;
2999  #endif  #endif
3000    if (c == next) return FALSE;    if (c == next) return FALSE;
3001  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3002    if (utf)    if (utf)
3003      {      {
3004      unsigned int othercase;      unsigned int othercase;
# Line 3011  if (next >= 0) switch(op_code) Line 3011  if (next >= 0) switch(op_code)
3011      return (unsigned int)c != othercase;      return (unsigned int)c != othercase;
3012      }      }
3013    else    else
3014  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
3015    return (c != cd->fcc[next]);  /* Non-UTF-8 mode */    return (c != cd->fcc[next]);  /* Non-UTF-8 mode */
3016    
3017    /* For OP_NOT and OP_NOTI, the data is always a single-byte character. These    /* For OP_NOT and OP_NOTI, the data is always a single-byte character. These
# Line 3023  if (next >= 0) switch(op_code) Line 3023  if (next >= 0) switch(op_code)
3023    
3024    case OP_NOTI:    case OP_NOTI:
3025    if ((c = *previous) == next) return TRUE;    if ((c = *previous) == next) return TRUE;
3026  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3027    if (utf)    if (utf)
3028      {      {
3029      unsigned int othercase;      unsigned int othercase;
# Line 3036  if (next >= 0) switch(op_code) Line 3036  if (next >= 0) switch(op_code)
3036      return (unsigned int)c == othercase;      return (unsigned int)c == othercase;
3037      }      }
3038    else    else
3039  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
3040    return (c == cd->fcc[next]);  /* Non-UTF-8 mode */    return (c == cd->fcc[next]);  /* Non-UTF-8 mode */
3041    
3042    /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.    /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.
# Line 3128  switch(op_code) Line 3128  switch(op_code)
3128    {    {
3129    case OP_CHAR:    case OP_CHAR:
3130    case OP_CHARI:    case OP_CHARI:
3131  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3132    GETCHARTEST(c, previous);    GETCHARTEST(c, previous);
3133  #else  #else
3134    c = *previous;    c = *previous;
# Line 3358  pcre_uint8 classbits[32]; Line 3358  pcre_uint8 classbits[32];
3358  must not do this for other options (e.g. PCRE_EXTENDED) because they may change  must not do this for other options (e.g. PCRE_EXTENDED) because they may change
3359  dynamically as we process the pattern. */  dynamically as we process the pattern. */
3360    
3361  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3362  /* PCRE_UTF16 has the same value as PCRE_UTF8. */  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
3363  BOOL utf = (options & PCRE_UTF8) != 0;  BOOL utf = (options & PCRE_UTF8) != 0;
3364  pcre_uchar utf_chars[6];  pcre_uchar utf_chars[6];
# Line 4150  for (;; ptr++) Line 4150  for (;; ptr++)
4150            goto LONE_SINGLE_CHARACTER;            goto LONE_SINGLE_CHARACTER;
4151            }            }
4152    
4153  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4154          if (utf)          if (utf)
4155            {                           /* Braces are required because the */            {                           /* Braces are required because the */
4156            GETCHARLEN(d, ptr, ptr);    /* macro generates multiple statements */            GETCHARLEN(d, ptr, ptr);    /* macro generates multiple statements */
# Line 4200  for (;; ptr++) Line 4200  for (;; ptr++)
4200          matching for characters > 127 is available only if UCP support is          matching for characters > 127 is available only if UCP support is
4201          available. */          available. */
4202    
4203  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !(defined COMPILE_PCRE8)
4204            if ((d > 255) || (utf && ((options & PCRE_CASELESS) != 0 && d > 127)))
4205    #elif defined  SUPPORT_UTF
4206          if (utf && (d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127)))          if (utf && (d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127)))
4207  #elif !(defined COMPILE_PCRE8)  #elif !(defined COMPILE_PCRE8)
4208          if (d > 255)          if (d > 255)
# Line 4214  for (;; ptr++) Line 4216  for (;; ptr++)
4216            they fit with the basic range. */            they fit with the basic range. */
4217    
4218  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4219    #ifndef COMPILE_PCRE8
4220              if (utf && (options & PCRE_CASELESS) != 0)
4221    #else
4222            if ((options & PCRE_CASELESS) != 0)            if ((options & PCRE_CASELESS) != 0)
4223    #endif
4224              {              {
4225              unsigned int occ, ocd;              unsigned int occ, ocd;
4226              unsigned int cc = c;              unsigned int cc = c;
# Line 4257  for (;; ptr++) Line 4263  for (;; ptr++)
4263    
4264            *class_uchardata++ = XCL_RANGE;            *class_uchardata++ = XCL_RANGE;
4265  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
4266    #ifndef COMPILE_PCRE8
4267              if (utf)
4268                {
4269                class_uchardata += PRIV(ord2utf)(c, class_uchardata);
4270                class_uchardata += PRIV(ord2utf)(d, class_uchardata);
4271                }
4272              else
4273                {
4274                *class_uchardata++ = c;
4275                *class_uchardata++ = d;
4276                }
4277    #else
4278            class_uchardata += PRIV(ord2utf)(c, class_uchardata);            class_uchardata += PRIV(ord2utf)(c, class_uchardata);
4279            class_uchardata += PRIV(ord2utf)(d, class_uchardata);            class_uchardata += PRIV(ord2utf)(d, class_uchardata);
4280  #else  #endif
4281    #else /* SUPPORT_UTF */
4282            *class_uchardata++ = c;            *class_uchardata++ = c;
4283            *class_uchardata++ = d;            *class_uchardata++ = d;
4284  #endif  #endif /* SUPPORT_UTF */
4285    
4286            /* With UCP support, we are done. Without UCP support, there is no            /* With UCP support, we are done. Without UCP support, there is no
4287            caseless matching for UTF characters > 127; we can use the bit map            caseless matching for UTF characters > 127; we can use the bit map
# Line 4270  for (;; ptr++) Line 4289  for (;; ptr++)
4289            can still use  */            can still use  */
4290    
4291  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4292            continue;    /* With next character in the class */  #ifndef COMPILE_PCRE8
4293  #else            if (utf)
4294  #ifdef SUPPORT_UTF  #endif
4295                continue;    /* With next character in the class */
4296    #endif  /* SUPPORT_UCP */
4297    
4298    #if defined SUPPORT_UTF && !defined(SUPPORT_UCP) && !(defined COMPILE_PCRE8)
4299              if (utf)
4300                {
4301                if ((options & PCRE_CASELESS) == 0 || c > 127) continue;
4302                /* Adjust upper limit and fall through to set up the map */
4303                d = 127;
4304                }
4305              else
4306                {
4307                if (c > 255) continue;
4308                /* Adjust upper limit and fall through to set up the map */
4309                d = 255;
4310                }
4311    #elif defined SUPPORT_UTF && !defined(SUPPORT_UCP)
4312            if ((options & PCRE_CASELESS) == 0 || c > 127) continue;            if ((options & PCRE_CASELESS) == 0 || c > 127) continue;
4313            /* Adjust upper limit and fall through to set up the map */            /* Adjust upper limit and fall through to set up the map */
4314            d = 127;            d = 127;
# Line 4280  for (;; ptr++) Line 4316  for (;; ptr++)
4316            if (c > 255) continue;            if (c > 255) continue;
4317            /* Adjust upper limit and fall through to set up the map */            /* Adjust upper limit and fall through to set up the map */
4318            d = 255;            d = 255;
4319  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF && !SUPPORT_UCP && !COMPILE_PCRE8 */
 #endif  /* SUPPORT_UCP */  
4320            }            }
4321  #endif  /* SUPPORT_UTF8 || COMPILE_PCRE16 */  #endif  /* SUPPORT_UTF || !COMPILE_PCRE8 */
4322    
4323          /* We use the bit map for 8 bit mode, or when the characters fall          /* We use the bit map for 8 bit mode, or when the characters fall
4324          partially or entirely to [0-255] ([0-127] for UCP) ranges. */          partially or entirely to [0-255] ([0-127] for UCP) ranges. */
# Line 4314  for (;; ptr++) Line 4349  for (;; ptr++)
4349    
4350        /* Handle a character that cannot go in the bit map */        /* Handle a character that cannot go in the bit map */
4351    
4352  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !(defined COMPILE_PCRE8)
4353          if ((c > 255) || (utf && ((options & PCRE_CASELESS) != 0 && c > 127)))
4354    #elif defined SUPPORT_UTF
4355        if (utf && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127)))        if (utf && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127)))
4356  #elif !(defined COMPILE_PCRE8)  #elif !(defined COMPILE_PCRE8)
4357        if (c > 255)        if (c > 255)
# Line 4324  for (;; ptr++) Line 4361  for (;; ptr++)
4361          xclass = TRUE;          xclass = TRUE;
4362          *class_uchardata++ = XCL_SINGLE;          *class_uchardata++ = XCL_SINGLE;
4363  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
4364          class_uchardata += PRIV(ord2utf)(c, class_uchardata);  #ifndef COMPILE_PCRE8
4365  #else          /* In non 8 bit mode, we can get here even
4366          *class_uchardata++ = c;          if we are not in UTF mode. */
4367            if (!utf)
4368              *class_uchardata++ = c;
4369            else
4370  #endif  #endif
4371              class_uchardata += PRIV(ord2utf)(c, class_uchardata);
4372    #else /* SUPPORT_UTF */
4373            *class_uchardata++ = c;
4374    #endif /* SUPPORT_UTF */
4375    
4376  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4377    #ifdef COMPILE_PCRE8
4378          if ((options & PCRE_CASELESS) != 0)          if ((options & PCRE_CASELESS) != 0)
4379    #else
4380            /* In non 8 bit mode, we can get here even
4381            if we are not in UTF mode. */
4382            if (utf && (options & PCRE_CASELESS) != 0)
4383    #endif
4384            {            {
4385            unsigned int othercase;            unsigned int othercase;
4386            if ((othercase = UCD_OTHERCASE(c)) != c)            if ((othercase = UCD_OTHERCASE(c)) != c)
# Line 4415  for (;; ptr++) Line 4465  for (;; ptr++)
4465        /* For a single, positive character, get the value into mcbuffer, and        /* For a single, positive character, get the value into mcbuffer, and
4466        then we can handle this with the normal one-character code. */        then we can handle this with the normal one-character code. */
4467    
4468  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4469        if (utf && class_lastchar > 127)        if (utf && class_lastchar > 127)
4470          mclength = PRIV(ord2utf)(class_lastchar, mcbuffer);          mclength = PRIV(ord2utf)(class_lastchar, mcbuffer);
4471        else        else
# Line 4843  for (;; ptr++) Line 4893  for (;; ptr++)
4893    
4894      else if (*previous == OP_CLASS ||      else if (*previous == OP_CLASS ||
4895               *previous == OP_NCLASS ||               *previous == OP_NCLASS ||
4896  #if defined SUPPORT_UTF8 || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4897               *previous == OP_XCLASS ||               *previous == OP_XCLASS ||
4898  #endif  #endif
4899               *previous == OP_REF ||               *previous == OP_REF ||
# Line 6635  for (;; ptr++) Line 6685  for (;; ptr++)
6685      a value > 127. We set its representation in the length/buffer, and then      a value > 127. We set its representation in the length/buffer, and then
6686      handle it as a data character. */      handle it as a data character. */
6687    
6688  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
6689      if (utf && c > 127)      if (utf && c > 127)
6690        mclength = PRIV(ord2utf)(c, mcbuffer);        mclength = PRIV(ord2utf)(c, mcbuffer);
6691      else      else
# Line 7471  while (ptr[skipatstart] == CHAR_LEFT_PAR Line 7521  while (ptr[skipatstart] == CHAR_LEFT_PAR
7521  /* PCRE_UTF16 has the same value as PCRE_UTF8. */  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
7522  utf = (options & PCRE_UTF8) != 0;  utf = (options & PCRE_UTF8) != 0;
7523    
7524  /* Can't support UTF8 unless PCRE has been compiled to include the code. The  /* Can't support UTF unless PCRE has been compiled to include the code. The
7525  return of an error code from PRIV(valid_utf)() is a new feature, introduced in  return of an error code from PRIV(valid_utf)() is a new feature, introduced in
7526  release 8.13. It is passed back from pcre_[dfa_]exec(), but at the moment is  release 8.13. It is passed back from pcre_[dfa_]exec(), but at the moment is
7527  not used here. */  not used here. */
7528    
7529  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
7530  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0 &&  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0 &&
7531       (errorcode = PRIV(valid_utf)((PCRE_PUCHAR)pattern, -1, erroroffset)) != 0)       (errorcode = PRIV(valid_utf)((PCRE_PUCHAR)pattern, -1, erroroffset)) != 0)
7532    {    {
# Line 7673  code = (pcre_uchar *)codestart; Line 7723  code = (pcre_uchar *)codestart;
7723    &firstchar, &reqchar, NULL, cd, NULL);    &firstchar, &reqchar, NULL, cd, NULL);
7724  re->top_bracket = cd->bracount;  re->top_bracket = cd->bracount;
7725  re->top_backref = cd->top_backref;  re->top_backref = cd->top_backref;
7726  re->flags = cd->external_flags;  re->flags = cd->external_flags | PCRE_MODE;
7727    
7728  if (cd->had_accept) reqchar = REQ_NONE;   /* Must disable after (*ACCEPT) */  if (cd->had_accept) reqchar = REQ_NONE;   /* Must disable after (*ACCEPT) */
7729    

Legend:
Removed from v.794  
changed lines
  Added in v.795

  ViewVC Help
Powered by ViewVC 1.1.5