/[pcre]/code/branches/pcre16/pcre_compile.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 782 by zherczeg, Sat Dec 3 23:58:37 2011 UTC revision 795 by zherczeg, Sat Dec 10 02:20:06 2011 UTC
# Line 102  overrun before it actually does run off Line 102  overrun before it actually does run off
102  #define REQ_CASELESS   0x10000000l      /* Indicates caselessness */  #define REQ_CASELESS   0x10000000l      /* Indicates caselessness */
103  #define REQ_VARY       0x20000000l      /* Reqchar followed non-literal item */  #define REQ_VARY       0x20000000l      /* Reqchar followed non-literal item */
104    
105    /* Repeated character flags. */
106    
107    #define UTF_LENGTH     0x10000000l      /* The char contains its length. */
108    
109  /* Table for handling escaped characters in the range '0'-'z'. Positive returns  /* Table for handling escaped characters in the range '0'-'z'. Positive returns
110  are simple data values; negative values are for special things like \d and so  are simple data values; negative values are for special things like \d and so
111  on. Zero means further processing is needed (for things like \x), or the escape  on. Zero means further processing is needed (for things like \x), or the escape
# Line 2353  for (code = first_significant_code(code Line 2357  for (code = first_significant_code(code
2357      actual length is stored in the compiled code, so we must update "code"      actual length is stored in the compiled code, so we must update "code"
2358      here. */      here. */
2359    
2360  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2361      case OP_XCLASS:      case OP_XCLASS:
2362      ccode = code += GET(code, 1);      ccode = code += GET(code, 1);
2363      goto CHECK_CLASS_REPEAT;      goto CHECK_CLASS_REPEAT;
# Line 2363  for (code = first_significant_code(code Line 2367  for (code = first_significant_code(code
2367      case OP_NCLASS:      case OP_NCLASS:
2368      ccode = code + PRIV(OP_lengths)[OP_CLASS];      ccode = code + PRIV(OP_lengths)[OP_CLASS];
2369    
2370  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2371      CHECK_CLASS_REPEAT:      CHECK_CLASS_REPEAT:
2372  #endif  #endif
2373    
# Line 2896  static BOOL Line 2900  static BOOL
2900  check_auto_possessive(const pcre_uchar *previous, BOOL utf,  check_auto_possessive(const pcre_uchar *previous, BOOL utf,
2901    const pcre_uchar *ptr, int options, compile_data *cd)    const pcre_uchar *ptr, int options, compile_data *cd)
2902  {  {
2903  int c, next;  pcre_int32 c, next;
2904  int op_code = *previous++;  int op_code = *previous++;
2905    
2906  /* Skip whitespace and comments in extended mode */  /* Skip whitespace and comments in extended mode */
# Line 2932  if (*ptr == CHAR_BACKSLASH) Line 2936  if (*ptr == CHAR_BACKSLASH)
2936    if (temperrorcode != 0) return FALSE;    if (temperrorcode != 0) return FALSE;
2937    ptr++;    /* Point after the escape sequence */    ptr++;    /* Point after the escape sequence */
2938    }    }
2939    else if (!MAX_255(*ptr) || (cd->ctypes[*ptr] & ctype_meta) == 0)
 else if ((cd->ctypes[*ptr] & ctype_meta) == 0)  
2940    {    {
2941  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2942    if (utf) { GETCHARINC(next, ptr); } else    if (utf) { GETCHARINC(next, ptr); } else
2943  #endif  #endif
2944    next = *ptr++;    next = *ptr++;
2945    }    }
   
2946  else return FALSE;  else return FALSE;
2947    
2948  /* Skip whitespace and comments in extended mode */  /* Skip whitespace and comments in extended mode */
# Line 2978  the next item is a character. */ Line 2980  the next item is a character. */
2980  if (next >= 0) switch(op_code)  if (next >= 0) switch(op_code)
2981    {    {
2982    case OP_CHAR:    case OP_CHAR:
2983  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2984    GETCHARTEST(c, previous);    GETCHARTEST(c, previous);
2985  #else  #else
2986    c = *previous;    c = *previous;
# Line 2990  if (next >= 0) switch(op_code) Line 2992  if (next >= 0) switch(op_code)
2992    high-valued characters. */    high-valued characters. */
2993    
2994    case OP_CHARI:    case OP_CHARI:
2995  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2996    GETCHARTEST(c, previous);    GETCHARTEST(c, previous);
2997  #else  #else
2998    c = *previous;    c = *previous;
2999  #endif  #endif
3000    if (c == next) return FALSE;    if (c == next) return FALSE;
3001  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3002    if (utf)    if (utf)
3003      {      {
3004      unsigned int othercase;      unsigned int othercase;
# Line 3009  if (next >= 0) switch(op_code) Line 3011  if (next >= 0) switch(op_code)
3011      return (unsigned int)c != othercase;      return (unsigned int)c != othercase;
3012      }      }
3013    else    else
3014  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
3015    return (c != cd->fcc[next]);  /* Non-UTF-8 mode */    return (c != cd->fcc[next]);  /* Non-UTF-8 mode */
3016    
3017    /* For OP_NOT and OP_NOTI, the data is always a single-byte character. These    /* For OP_NOT and OP_NOTI, the data is always a single-byte character. These
# Line 3021  if (next >= 0) switch(op_code) Line 3023  if (next >= 0) switch(op_code)
3023    
3024    case OP_NOTI:    case OP_NOTI:
3025    if ((c = *previous) == next) return TRUE;    if ((c = *previous) == next) return TRUE;
3026  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3027    if (utf)    if (utf)
3028      {      {
3029      unsigned int othercase;      unsigned int othercase;
# Line 3034  if (next >= 0) switch(op_code) Line 3036  if (next >= 0) switch(op_code)
3036      return (unsigned int)c == othercase;      return (unsigned int)c == othercase;
3037      }      }
3038    else    else
3039  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
3040    return (c == cd->fcc[next]);  /* Non-UTF-8 mode */    return (c == cd->fcc[next]);  /* Non-UTF-8 mode */
3041    
3042    /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.    /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.
# Line 3126  switch(op_code) Line 3128  switch(op_code)
3128    {    {
3129    case OP_CHAR:    case OP_CHAR:
3130    case OP_CHARI:    case OP_CHARI:
3131  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3132    GETCHARTEST(c, previous);    GETCHARTEST(c, previous);
3133  #else  #else
3134    c = *previous;    c = *previous;
# Line 3356  pcre_uint8 classbits[32]; Line 3358  pcre_uint8 classbits[32];
3358  must not do this for other options (e.g. PCRE_EXTENDED) because they may change  must not do this for other options (e.g. PCRE_EXTENDED) because they may change
3359  dynamically as we process the pattern. */  dynamically as we process the pattern. */
3360    
3361  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3362  /* PCRE_UTF16 has the same value as PCRE_UTF8. */  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
3363  BOOL utf = (options & PCRE_UTF8) != 0;  BOOL utf = (options & PCRE_UTF8) != 0;
3364  pcre_uchar utf_chars[6];  pcre_uchar utf_chars[6];
# Line 3736  for (;; ptr++) Line 3738  for (;; ptr++)
3738        {        {
3739        const pcre_uchar *oldptr;        const pcre_uchar *oldptr;
3740    
3741  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3742        if (utf && c > 127)        if (utf && HAS_EXTRALEN(c))
3743          {                           /* Braces are required because the */          {                           /* Braces are required because the */
3744          GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */          GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */
3745          }          }
# Line 4148  for (;; ptr++) Line 4150  for (;; ptr++)
4150            goto LONE_SINGLE_CHARACTER;            goto LONE_SINGLE_CHARACTER;
4151            }            }
4152    
4153  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4154          if (utf)          if (utf)
4155            {                           /* Braces are required because the */            {                           /* Braces are required because the */
4156            GETCHARLEN(d, ptr, ptr);    /* macro generates multiple statements */            GETCHARLEN(d, ptr, ptr);    /* macro generates multiple statements */
# Line 4198  for (;; ptr++) Line 4200  for (;; ptr++)
4200          matching for characters > 127 is available only if UCP support is          matching for characters > 127 is available only if UCP support is
4201          available. */          available. */
4202    
4203  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !(defined COMPILE_PCRE8)
4204            if ((d > 255) || (utf && ((options & PCRE_CASELESS) != 0 && d > 127)))
4205    #elif defined  SUPPORT_UTF
4206          if (utf && (d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127)))          if (utf && (d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127)))
4207  #endif  #elif !(defined COMPILE_PCRE8)
 #ifndef COMPILE_PCRE8  
4208          if (d > 255)          if (d > 255)
4209  #endif  #endif
4210  #if defined SUPPORT_UTF || defined COMPILE_PCRE16  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
4211            {            {
4212            xclass = TRUE;            xclass = TRUE;
4213    
# Line 4213  for (;; ptr++) Line 4216  for (;; ptr++)
4216            they fit with the basic range. */            they fit with the basic range. */
4217    
4218  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4219    #ifndef COMPILE_PCRE8
4220              if (utf && (options & PCRE_CASELESS) != 0)
4221    #else
4222            if ((options & PCRE_CASELESS) != 0)            if ((options & PCRE_CASELESS) != 0)
4223    #endif
4224              {              {
4225              unsigned int occ, ocd;              unsigned int occ, ocd;
4226              unsigned int cc = c;              unsigned int cc = c;
# Line 4256  for (;; ptr++) Line 4263  for (;; ptr++)
4263    
4264            *class_uchardata++ = XCL_RANGE;            *class_uchardata++ = XCL_RANGE;
4265  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
4266    #ifndef COMPILE_PCRE8
4267              if (utf)
4268                {
4269                class_uchardata += PRIV(ord2utf)(c, class_uchardata);
4270                class_uchardata += PRIV(ord2utf)(d, class_uchardata);
4271                }
4272              else
4273                {
4274                *class_uchardata++ = c;
4275                *class_uchardata++ = d;
4276                }
4277    #else
4278            class_uchardata += PRIV(ord2utf)(c, class_uchardata);            class_uchardata += PRIV(ord2utf)(c, class_uchardata);
4279            class_uchardata += PRIV(ord2utf)(d, class_uchardata);            class_uchardata += PRIV(ord2utf)(d, class_uchardata);
4280  #else  #endif
4281    #else /* SUPPORT_UTF */
4282            *class_uchardata++ = c;            *class_uchardata++ = c;
4283            *class_uchardata++ = d;            *class_uchardata++ = d;
4284  #endif  #endif /* SUPPORT_UTF */
4285    
4286            /* With UCP support, we are done. Without UCP support, there is no            /* With UCP support, we are done. Without UCP support, there is no
4287            caseless matching for UTF characters > 127; we can use the bit map            caseless matching for UTF characters > 127; we can use the bit map
# Line 4269  for (;; ptr++) Line 4289  for (;; ptr++)
4289            can still use  */            can still use  */
4290    
4291  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4292            continue;    /* With next character in the class */  #ifndef COMPILE_PCRE8
4293  #else            if (utf)
4294  #ifdef SUPPORT_UTF  #endif
4295                continue;    /* With next character in the class */
4296    #endif  /* SUPPORT_UCP */
4297    
4298    #if defined SUPPORT_UTF && !defined(SUPPORT_UCP) && !(defined COMPILE_PCRE8)
4299              if (utf)
4300                {
4301                if ((options & PCRE_CASELESS) == 0 || c > 127) continue;
4302                /* Adjust upper limit and fall through to set up the map */
4303                d = 127;
4304                }
4305              else
4306                {
4307                if (c > 255) continue;
4308                /* Adjust upper limit and fall through to set up the map */
4309                d = 255;
4310                }
4311    #elif defined SUPPORT_UTF && !defined(SUPPORT_UCP)
4312            if ((options & PCRE_CASELESS) == 0 || c > 127) continue;            if ((options & PCRE_CASELESS) == 0 || c > 127) continue;
4313            /* Adjust upper limit and fall through to set up the map */            /* Adjust upper limit and fall through to set up the map */
4314            d = 127;            d = 127;
# Line 4279  for (;; ptr++) Line 4316  for (;; ptr++)
4316            if (c > 255) continue;            if (c > 255) continue;
4317            /* Adjust upper limit and fall through to set up the map */            /* Adjust upper limit and fall through to set up the map */
4318            d = 255;            d = 255;
4319  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF && !SUPPORT_UCP && !COMPILE_PCRE8 */
 #endif  /* SUPPORT_UCP */  
4320            }            }
4321  #endif  /* SUPPORT_UTF8 || COMPILE_PCRE16 */  #endif  /* SUPPORT_UTF || !COMPILE_PCRE8 */
4322    
4323          /* We use the bit map for 8 bit mode, or when the characters fall          /* We use the bit map for 8 bit mode, or when the characters fall
4324          partially or entirely to [0-255] ([0-127] for UCP) ranges. */          partially or entirely to [0-255] ([0-127] for UCP) ranges. */
# Line 4313  for (;; ptr++) Line 4349  for (;; ptr++)
4349    
4350        /* Handle a character that cannot go in the bit map */        /* Handle a character that cannot go in the bit map */
4351    
4352  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && !(defined COMPILE_PCRE8)
4353          if ((c > 255) || (utf && ((options & PCRE_CASELESS) != 0 && c > 127)))
4354    #elif defined SUPPORT_UTF
4355        if (utf && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127)))        if (utf && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127)))
4356  #endif  #elif !(defined COMPILE_PCRE8)
 #ifndef COMPILE_PCRE8  
4357        if (c > 255)        if (c > 255)
4358  #endif  #endif
4359  #if defined SUPPORT_UTF || defined COMPILE_PCRE16  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
4360          {          {
4361          xclass = TRUE;          xclass = TRUE;
4362          *class_uchardata++ = XCL_SINGLE;          *class_uchardata++ = XCL_SINGLE;
4363  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
4364          class_uchardata += PRIV(ord2utf)(c, class_uchardata);  #ifndef COMPILE_PCRE8
4365  #else          /* In non 8 bit mode, we can get here even
4366          *class_uchardata++ = c;          if we are not in UTF mode. */
4367            if (!utf)
4368              *class_uchardata++ = c;
4369            else
4370  #endif  #endif
4371              class_uchardata += PRIV(ord2utf)(c, class_uchardata);
4372    #else /* SUPPORT_UTF */
4373            *class_uchardata++ = c;
4374    #endif /* SUPPORT_UTF */
4375    
4376  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4377    #ifdef COMPILE_PCRE8
4378          if ((options & PCRE_CASELESS) != 0)          if ((options & PCRE_CASELESS) != 0)
4379    #else
4380            /* In non 8 bit mode, we can get here even
4381            if we are not in UTF mode. */
4382            if (utf && (options & PCRE_CASELESS) != 0)
4383    #endif
4384            {            {
4385            unsigned int othercase;            unsigned int othercase;
4386            if ((othercase = UCD_OTHERCASE(c)) != c)            if ((othercase = UCD_OTHERCASE(c)) != c)
# Line 4343  for (;; ptr++) Line 4393  for (;; ptr++)
4393    
4394          }          }
4395        else        else
4396  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF || COMPILE_PCRE16 */
   
4397        /* Handle a single-byte character */        /* Handle a single-byte character */
4398          {          {
4399          classbits[c/8] |= (1 << (c&7));          classbits[c/8] |= (1 << (c&7));
# Line 4356  for (;; ptr++) Line 4405  for (;; ptr++)
4405          class_charcount++;          class_charcount++;
4406          class_lastchar = c;          class_lastchar = c;
4407          }          }
4408    
4409        }        }
4410    
4411      /* Loop until ']' reached. This "while" is the end of the "do" far above.      /* Loop until ']' reached. This "while" is the end of the "do" far above.
# Line 4415  for (;; ptr++) Line 4465  for (;; ptr++)
4465        /* For a single, positive character, get the value into mcbuffer, and        /* For a single, positive character, get the value into mcbuffer, and
4466        then we can handle this with the normal one-character code. */        then we can handle this with the normal one-character code. */
4467    
4468  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4469        if (utf && class_lastchar > 127)        if (utf && class_lastchar > 127)
4470          mclength = PRIV(ord2utf)(class_lastchar, mcbuffer);          mclength = PRIV(ord2utf)(class_lastchar, mcbuffer);
4471        else        else
# Line 4603  for (;; ptr++) Line 4653  for (;; ptr++)
4653    
4654        /* Deal with UTF characters that take up more than one character. It's        /* Deal with UTF characters that take up more than one character. It's
4655        easier to write this out separately than try to macrify it. Use c to        easier to write this out separately than try to macrify it. Use c to
4656        hold the length of the character in bytes, plus 0x80 to flag that it's a        hold the length of the character in bytes, plus UTF_LENGTH to flag that
4657        length rather than a small character. */        it's a length rather than a small character. */
4658    
4659  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4660        if (utf && (code[-1] & 0x80) != 0)        if (utf && NOT_FIRSTCHAR(code[-1]))
4661          {          {
4662          pcre_uchar *lastchar = code - 1;          pcre_uchar *lastchar = code - 1;
4663          BACKCHAR(lastchar);          BACKCHAR(lastchar);
4664          c = code - lastchar;            /* Length of UTF-8 character */          c = code - lastchar;            /* Length of UTF-8 character */
4665          memcpy(utf_chars, lastchar, IN_UCHARS(c)); /* Save the char */          memcpy(utf_chars, lastchar, IN_UCHARS(c)); /* Save the char */
4666          c |= 0x80;                      /* Flag c as a length */          c |= UTF_LENGTH;                /* Flag c as a length */
4667          }          }
4668        else        else
4669  #endif  #endif /* SUPPORT_UTF */
4670    
4671        /* Handle the case of a single charater - either with no UTF support, or        /* Handle the case of a single charater - either with no UTF support, or
4672        with UTF disabled, or for a single character UTF character. */        with UTF disabled, or for a single character UTF character. */
   
4673          {          {
4674          c = code[-1];          c = code[-1];
4675          if (repeat_min > 1) reqchar = c | req_caseopt | cd->req_varyopt;          if (repeat_min > 1) reqchar = c | req_caseopt | cd->req_varyopt;
# Line 4758  for (;; ptr++) Line 4807  for (;; ptr++)
4807          we have to insert the character for the previous code. For a repeated          we have to insert the character for the previous code. For a repeated
4808          Unicode property match, there are two extra bytes that define the          Unicode property match, there are two extra bytes that define the
4809          required property. In UTF-8 mode, long characters have their length in          required property. In UTF-8 mode, long characters have their length in
4810          c, with the 0x80 bit as a flag. */          c, with the UTF_LENGTH bit as a flag. */
4811    
4812          if (repeat_max < 0)          if (repeat_max < 0)
4813            {            {
4814  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4815            if (utf && c >= 128)            if (utf && (c & UTF_LENGTH) != 0)
4816              {              {
4817              memcpy(code, utf_chars, c & 7);              memcpy(code, utf_chars, IN_UCHARS(c & 7));
4818              code += c & 7;              code += c & 7;
4819              }              }
4820            else            else
# Line 4787  for (;; ptr++) Line 4836  for (;; ptr++)
4836    
4837          else if (repeat_max != repeat_min)          else if (repeat_max != repeat_min)
4838            {            {
4839  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4840            if (utf && c >= 128)            if (utf && (c & UTF_LENGTH) != 0)
4841              {              {
4842              memcpy(code, utf_chars, c & 7);              memcpy(code, utf_chars, IN_UCHARS(c & 7));
4843              code += c & 7;              code += c & 7;
4844              }              }
4845            else            else
# Line 4817  for (;; ptr++) Line 4866  for (;; ptr++)
4866    
4867        /* The character or character type itself comes last in all cases. */        /* The character or character type itself comes last in all cases. */
4868    
4869  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4870        if (utf && c >= 128)        if (utf && (c & UTF_LENGTH) != 0)
4871          {          {
4872          memcpy(code, utf_chars, c & 7);          memcpy(code, utf_chars, IN_UCHARS(c & 7));
4873          code += c & 7;          code += c & 7;
4874          }          }
4875        else        else
# Line 4844  for (;; ptr++) Line 4893  for (;; ptr++)
4893    
4894      else if (*previous == OP_CLASS ||      else if (*previous == OP_CLASS ||
4895               *previous == OP_NCLASS ||               *previous == OP_NCLASS ||
4896  #if defined SUPPORT_UTF8 || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4897               *previous == OP_XCLASS ||               *previous == OP_XCLASS ||
4898  #endif  #endif
4899               *previous == OP_REF ||               *previous == OP_REF ||
# Line 5811  for (;; ptr++) Line 5860  for (;; ptr++)
5860                *errorcodeptr = ERR49;                *errorcodeptr = ERR49;
5861                goto FAILED;                goto FAILED;
5862                }                }
5863              if (namelen + 3 > cd->name_entry_size)              if (namelen + IMM2_SIZE + 1 > cd->name_entry_size)
5864                {                {
5865                cd->name_entry_size = namelen + 3;                cd->name_entry_size = namelen + IMM2_SIZE + 1;
5866                if (namelen > MAX_NAME_SIZE)                if (namelen > MAX_NAME_SIZE)
5867                  {                  {
5868                  *errorcodeptr = ERR48;                  *errorcodeptr = ERR48;
# Line 5842  for (;; ptr++) Line 5891  for (;; ptr++)
5891    
5892              for (i = 0; i < cd->names_found; i++)              for (i = 0; i < cd->names_found; i++)
5893                {                {
5894                int crc = memcmp(name, slot+2, namelen);                int crc = memcmp(name, slot+IMM2_SIZE, IN_UCHARS(namelen));
5895                if (crc == 0)                if (crc == 0)
5896                  {                  {
5897                  if (slot[2+namelen] == 0)                  if (slot[IMM2_SIZE+namelen] == 0)
5898                    {                    {
5899                    if (GET2(slot, 0) != cd->bracount + 1 &&                    if (GET2(slot, 0) != cd->bracount + 1 &&
5900                        (options & PCRE_DUPNAMES) == 0)                        (options & PCRE_DUPNAMES) == 0)
# Line 5897  for (;; ptr++) Line 5946  for (;; ptr++)
5946                }                }
5947    
5948              PUT2(slot, 0, cd->bracount + 1);              PUT2(slot, 0, cd->bracount + 1);
5949              memcpy(slot + 2, name, IN_UCHARS(namelen));              memcpy(slot + IMM2_SIZE, name, IN_UCHARS(namelen));
5950              slot[2 + namelen] = 0;              slot[IMM2_SIZE + namelen] = 0;
5951              }              }
5952            }            }
5953    
# Line 5982  for (;; ptr++) Line 6031  for (;; ptr++)
6031            for (i = 0; i < cd->names_found; i++)            for (i = 0; i < cd->names_found; i++)
6032              {              {
6033              if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0 &&              if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0 &&
6034                  slot[2+namelen] == 0)                  slot[IMM2_SIZE+namelen] == 0)
6035                break;                break;
6036              slot += cd->name_entry_size;              slot += cd->name_entry_size;
6037              }              }
# Line 6636  for (;; ptr++) Line 6685  for (;; ptr++)
6685      a value > 127. We set its representation in the length/buffer, and then      a value > 127. We set its representation in the length/buffer, and then
6686      handle it as a data character. */      handle it as a data character. */
6687    
6688  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
6689      if (utf && c > 127)      if (utf && c > 127)
6690        mclength = PRIV(ord2utf)(c, mcbuffer);        mclength = PRIV(ord2utf)(c, mcbuffer);
6691      else      else
# Line 6661  for (;; ptr++) Line 6710  for (;; ptr++)
6710    
6711  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
6712      if (utf && HAS_EXTRALEN(c))      if (utf && HAS_EXTRALEN(c))
6713        {        ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr));
       INTERNALCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr));  
       }  
6714  #endif  #endif
6715    
6716      /* At this point we have the character's bytes in mcbuffer, and the length      /* At this point we have the character's bytes in mcbuffer, and the length
# Line 7435  while (ptr[skipatstart] == CHAR_LEFT_PAR Line 7482  while (ptr[skipatstart] == CHAR_LEFT_PAR
7482    int newnl = 0;    int newnl = 0;
7483    int newbsr = 0;    int newbsr = 0;
7484    
7485    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF8_RIGHTPAR, 5) == 0)  #ifdef COMPILE_PCRE8
7486      if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 5) == 0)
7487      { skipatstart += 7; options |= PCRE_UTF8; continue; }      { skipatstart += 7; options |= PCRE_UTF8; continue; }
7488    #endif
7489    #ifdef COMPILE_PCRE16
7490      if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 6) == 0)
7491        { skipatstart += 8; options |= PCRE_UTF16; continue; }
7492    #endif
7493    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0)
7494      { skipatstart += 6; options |= PCRE_UCP; continue; }      { skipatstart += 6; options |= PCRE_UCP; continue; }
7495    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0)
# Line 7468  while (ptr[skipatstart] == CHAR_LEFT_PAR Line 7521  while (ptr[skipatstart] == CHAR_LEFT_PAR
7521  /* PCRE_UTF16 has the same value as PCRE_UTF8. */  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
7522  utf = (options & PCRE_UTF8) != 0;  utf = (options & PCRE_UTF8) != 0;
7523    
7524  /* Can't support UTF8 unless PCRE has been compiled to include the code. The  /* Can't support UTF unless PCRE has been compiled to include the code. The
7525  return of an error code from PRIV(valid_utf)() is a new feature, introduced in  return of an error code from PRIV(valid_utf)() is a new feature, introduced in
7526  release 8.13. It is passed back from pcre_[dfa_]exec(), but at the moment is  release 8.13. It is passed back from pcre_[dfa_]exec(), but at the moment is
7527  not used here. */  not used here. */
7528    
7529  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
7530  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0 &&  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0 &&
7531       (errorcode = PRIV(valid_utf)((PCRE_PUCHAR)pattern, -1, erroroffset)) != 0)       (errorcode = PRIV(valid_utf)((PCRE_PUCHAR)pattern, -1, erroroffset)) != 0)
7532    {    {
# Line 7610  externally provided function. Integer ov Line 7663  externally provided function. Integer ov
7663  because nowadays we limit the maximum value of cd->names_found and  because nowadays we limit the maximum value of cd->names_found and
7664  cd->name_entry_size. */  cd->name_entry_size. */
7665    
7666  size = sizeof(real_pcre) + (length + cd->names_found * (cd->name_entry_size + 3)) * sizeof(pcre_uchar);  size = sizeof(real_pcre) + (length + cd->names_found * cd->name_entry_size) * sizeof(pcre_uchar);
7667  re = (real_pcre *)(pcre_malloc)(size);  re = (real_pcre *)(pcre_malloc)(size);
7668    
7669  if (re == NULL)  if (re == NULL)
# Line 7670  code = (pcre_uchar *)codestart; Line 7723  code = (pcre_uchar *)codestart;
7723    &firstchar, &reqchar, NULL, cd, NULL);    &firstchar, &reqchar, NULL, cd, NULL);
7724  re->top_bracket = cd->bracount;  re->top_bracket = cd->bracount;
7725  re->top_backref = cd->top_backref;  re->top_backref = cd->top_backref;
7726  re->flags = cd->external_flags;  re->flags = cd->external_flags | PCRE_MODE;
7727    
7728  if (cd->had_accept) reqchar = REQ_NONE;   /* Must disable after (*ACCEPT) */  if (cd->had_accept) reqchar = REQ_NONE;   /* Must disable after (*ACCEPT) */
7729    
# Line 7789  if ((re->options & PCRE_ANCHORED) == 0) Line 7842  if ((re->options & PCRE_ANCHORED) == 0)
7842        re->first_char = firstchar & 0xffff;        re->first_char = firstchar & 0xffff;
7843  #endif  #endif
7844  #endif  #endif
7845        if ((firstchar & REQ_CASELESS) != 0 && MAX_255(re->first_char)        if ((firstchar & REQ_CASELESS) != 0)
7846          && cd->fcc[re->first_char] != re->first_char)          {
7847          re->flags |= PCRE_FCH_CASELESS;  #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
7848            /* We ignore non-ASCII first chars in 8 bit mode. */
7849            if (utf)
7850              {
7851              if (re->first_char < 128)
7852                {
7853                if (cd->fcc[re->first_char] != re->first_char)
7854                  re->flags |= PCRE_FCH_CASELESS;
7855                }
7856              else if (UCD_OTHERCASE(re->first_char) != re->first_char)
7857                re->flags |= PCRE_FCH_CASELESS;
7858              }
7859            else
7860    #endif
7861            if (MAX_255(re->first_char)
7862                && cd->fcc[re->first_char] != re->first_char)
7863              re->flags |= PCRE_FCH_CASELESS;
7864            }
7865    
7866        re->flags |= PCRE_FIRSTSET;        re->flags |= PCRE_FIRSTSET;
7867        }        }
# Line 7814  if (reqchar >= 0 && Line 7884  if (reqchar >= 0 &&
7884    re->req_char = reqchar & 0xffff;    re->req_char = reqchar & 0xffff;
7885  #endif  #endif
7886  #endif  #endif
7887    if ((reqchar & REQ_CASELESS) != 0 && MAX_255(re->req_char)    if ((reqchar & REQ_CASELESS) != 0)
7888      && cd->fcc[re->req_char] != re->req_char)      {
7889      re->flags |= PCRE_RCH_CASELESS;  #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
7890        /* We ignore non-ASCII first chars in 8 bit mode. */
7891        if (utf)
7892          {
7893          if (re->req_char < 128)
7894            {
7895            if (cd->fcc[re->req_char] != re->req_char)
7896              re->flags |= PCRE_RCH_CASELESS;
7897            }
7898          else if (UCD_OTHERCASE(re->req_char) != re->req_char)
7899            re->flags |= PCRE_RCH_CASELESS;
7900          }
7901        else
7902    #endif
7903        if (MAX_255(re->req_char) && cd->fcc[re->req_char] != re->req_char)
7904          re->flags |= PCRE_RCH_CASELESS;
7905        }
7906    
7907    re->flags |= PCRE_REQCHSET;    re->flags |= PCRE_REQCHSET;
7908    }    }

Legend:
Removed from v.782  
changed lines
  Added in v.795

  ViewVC Help
Powered by ViewVC 1.1.5