# Diff of /code/trunk/pcre.c

revision 66 by nigel, Sat Feb 24 21:40:08 2007 UTC revision 67 by nigel, Sat Feb 24 21:40:13 2007 UTC
# Line 221  we know we are in UTF-8 mode. */ Line 221  we know we are in UTF-8 mode. */
221    c = *eptr; \    c = *eptr; \
222    if ((c & 0xc0) == 0xc0) \    if ((c & 0xc0) == 0xc0) \
223      { \      { \
224      int i; \      int gcii; \
225      int a = utf8_table4[c & 0x3f];  /* Number of additional bytes */ \      int gcaa = utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
226      int s = 6*a; \      int gcss = 6*gcaa; \
227      c = (c & utf8_table3[a]) << s; \      c = (c & utf8_table3[gcaa]) << gcss; \
228      for (i = 1; i <= a; i++) \      for (gcii = 1; gcii <= gcaa; gcii++) \
229        { \        { \
230        s -= 6; \        gcss -= 6; \
231        c |= (eptr[i] & 0x3f) << s; \        c |= (eptr[gcii] & 0x3f) << gcss; \
232        } \        } \
233      }      }
234
# Line 239  know we are in UTF-8 mode. */ Line 239  know we are in UTF-8 mode. */
239    c = *eptr++; \    c = *eptr++; \
240    if ((c & 0xc0) == 0xc0) \    if ((c & 0xc0) == 0xc0) \
241      { \      { \
242      int a = utf8_table4[c & 0x3f];  /* Number of additional bytes */ \      int gcaa = utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
243      int s = 6*a; \      int gcss = 6*gcaa; \
244      c = (c & utf8_table3[a]) << s; \      c = (c & utf8_table3[gcaa]) << gcss; \
245      while (a-- > 0) \      while (gcaa-- > 0) \
246        { \        { \
247        s -= 6; \        gcss -= 6; \
248        c |= (*eptr++ & 0x3f) << s; \        c |= (*eptr++ & 0x3f) << gcss; \
249        } \        } \
250      }      }
251
# Line 255  know we are in UTF-8 mode. */ Line 255  know we are in UTF-8 mode. */
255    c = *eptr++; \    c = *eptr++; \
256    if (md->utf8 && (c & 0xc0) == 0xc0) \    if (md->utf8 && (c & 0xc0) == 0xc0) \
257      { \      { \
258      int a = utf8_table4[c & 0x3f];  /* Number of additional bytes */ \      int gcaa = utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
259      int s = 6*a; \      int gcss = 6*gcaa; \
260      c = (c & utf8_table3[a]) << s; \      c = (c & utf8_table3[gcaa]) << gcss; \
261      while (a-- > 0) \      while (gcaa-- > 0) \
262        { \        { \
263        s -= 6; \        gcss -= 6; \
264        c |= (*eptr++ & 0x3f) << s; \        c |= (*eptr++ & 0x3f) << gcss; \
265        } \        } \
266      }      }
267
# Line 272  if there are extra bytes. This is called Line 272  if there are extra bytes. This is called
272    c = *eptr; \    c = *eptr; \
273    if ((c & 0xc0) == 0xc0) \    if ((c & 0xc0) == 0xc0) \
274      { \      { \
275      int i; \      int gcii; \
276      int a = utf8_table4[c & 0x3f];  /* Number of additional bytes */ \      int gcaa = utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
277      int s = 6*a; \      int gcss = 6*gcaa; \
278      c = (c & utf8_table3[a]) << s; \      c = (c & utf8_table3[gcaa]) << gcss; \
279      for (i = 1; i <= a; i++) \      for (gcii = 1; gcii <= gcaa; gcii++) \
280        { \        { \
281        s -= 6; \        gcss -= 6; \
282        c |= (eptr[i] & 0x3f) << s; \        c |= (eptr[gcii] & 0x3f) << gcss; \
283        } \        } \
284      len += a; \      len += gcaa; \
285      }      }
286
287  /* If the pointer is not at the start of a character, move it back until  /* If the pointer is not at the start of a character, move it back until
# Line 1643  for (;; ptr++) Line 1643  for (;; ptr++)
1643      do      do
1644        {        {
1645  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1646        if (utf8 && c > 127) GETCHARLEN(c, ptr, ptr);        if (utf8 && c > 127)
1647            {                           /* Braces are required because the */
1648            GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */
1649            }
1650  #endif  #endif
1651
1652        /* Inside \Q...\E everything is literal except \E */        /* Inside \Q...\E everything is literal except \E */
# Line 1936  for (;; ptr++) Line 1939  for (;; ptr++)
1939      reqbyte, save the previous value for reinstating. */      reqbyte, save the previous value for reinstating. */
1940
1941  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1942      if (!class_utf8 && class_charcount == 1 && class_lastchar < 128)      if (class_charcount == 1 &&
1943              (!utf8 ||
1944              (!class_utf8 && class_lastchar < 128)))
1945  #else  #else
1946      if (class_charcount == 1)      if (class_charcount == 1)
1947  #endif  #endif
# Line 2645  for (;; ptr++) Line 2650  for (;; ptr++)
2650
2651            for (i = 0; i < cd->names_found; i++)            for (i = 0; i < cd->names_found; i++)
2652              {              {
2653              int c = memcmp(name, slot+2, namelen);              int crc = memcmp(name, slot+2, namelen);
2654              if (c == 0)              if (crc == 0)
2655                {                {
2656                if (slot[2+namelen] == 0)                if (slot[2+namelen] == 0)
2657                  {                  {
2658                  *errorptr = ERR43;                  *errorptr = ERR43;
2659                  goto FAILED;                  goto FAILED;
2660                  }                  }
2661                c = -1;             /* Current name is substring */                crc = -1;             /* Current name is substring */
2662                }                }
2663              if (c < 0)              if (crc < 0)
2664                {                {
2665                memmove(slot + cd->name_entry_size, slot,                memmove(slot + cd->name_entry_size, slot,
2666                  (cd->names_found - i) * cd->name_entry_size);                  (cd->names_found - i) * cd->name_entry_size);
# Line 3153  for (;; ptr++) Line 3158  for (;; ptr++)
3158
3159        else        else
3160          {          {
3161          uschar *t = code - 1;               /* After this code, t is at the */          t = code - 1;                       /* After this code, t is at the */
3162          while ((*t & 0xc0) == 0x80) t--;    /* start of the last character */          while ((*t & 0xc0) == 0x80) t--;    /* start of the last character */
3163
3164          /* If no previous first byte, set it from the first character, and          /* If no previous first byte, set it from the first character, and
# Line 4048  while ((c = *(++ptr)) != 0) Line 4053  while ((c = *(++ptr)) != 0)
4053  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4054          if (utf8)          if (utf8)
4055            {            {
4056            int c;            int ch;
4057            int extra = 0;            int extra = 0;
4058            GETCHARLEN(c, ptr, extra);            GETCHARLEN(ch, ptr, extra);
4059            if (c > 127) class_optcount = 10;   /* No optimization possible */            if (ch > 127) class_optcount = 10;   /* No optimization possible */
4060            if (c > 255)            if (ch > 255)
4061              {              {
4062              if (!class_utf8)              if (!class_utf8)
4063                {                {
# Line 5599  for (;;) Line 5604  for (;;)
5604        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
5605        be "non-word" characters. */        be "non-word" characters. */
5606
5607  #if SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5608        if (md->utf8)        if (md->utf8)
5609          {          {
5610          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else

Legend:
 Removed from v.66 changed lines Added in v.67