/[pcre]/code/branches/pcre16/pcre_compile.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 781 by zherczeg, Sat Dec 3 07:58:30 2011 UTC revision 782 by zherczeg, Sat Dec 3 23:58:37 2011 UTC
# Line 1466  for (; ptr < cd->end_pattern; ptr++) Line 1466  for (; ptr < cd->end_pattern; ptr++)
1466        {        {
1467        if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }        if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }
1468        ptr++;        ptr++;
1469  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1470        if (utf) while ((*ptr & 0xc0) == 0x80) ptr++;        if (utf) FORWARDCHAR(ptr);
1471  #endif  #endif
1472        }        }
1473      if (*ptr == 0) goto FAIL_EXIT;      if (*ptr == 0) goto FAIL_EXIT;
# Line 1759  for (;;) Line 1759  for (;;)
1759      case OP_NOTI:      case OP_NOTI:
1760      branchlength++;      branchlength++;
1761      cc += 2;      cc += 2;
1762  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1763      if (utf && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1764  #endif  #endif
1765      break;      break;
1766    
# Line 1773  for (;;) Line 1773  for (;;)
1773      case OP_NOTEXACTI:      case OP_NOTEXACTI:
1774      branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
1775      cc += 2 + IMM2_SIZE;      cc += 2 + IMM2_SIZE;
1776  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1777      if (utf && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f];      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1778  #endif  #endif
1779      break;      break;
1780    
# Line 2041  for (;;) Line 2041  for (;;)
2041    a multi-byte character. The length in the table is a minimum, so we have to    a multi-byte character. The length in the table is a minimum, so we have to
2042    arrange to skip the extra bytes. */    arrange to skip the extra bytes. */
2043    
2044  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2045      if (utf) switch(c)      if (utf) switch(c)
2046        {        {
2047        case OP_CHAR:        case OP_CHAR:
# Line 2072  for (;;) Line 2072  for (;;)
2072        case OP_MINQUERYI:        case OP_MINQUERYI:
2073        case OP_POSQUERY:        case OP_POSQUERY:
2074        case OP_POSQUERYI:        case OP_POSQUERYI:
2075        if (code[-1] >= 0xc0) code += PRIV(utf8_table4)[code[-1] & 0x3f];        if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
2076        break;        break;
2077        }        }
2078  #else  #else
# Line 2161  for (;;) Line 2161  for (;;)
2161      by a multi-byte character. The length in the table is a minimum, so we have      by a multi-byte character. The length in the table is a minimum, so we have
2162      to arrange to skip the extra bytes. */      to arrange to skip the extra bytes. */
2163    
2164  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2165      if (utf) switch(c)      if (utf) switch(c)
2166        {        {
2167        case OP_CHAR:        case OP_CHAR:
# Line 2192  for (;;) Line 2192  for (;;)
2192        case OP_MINQUERYI:        case OP_MINQUERYI:
2193        case OP_POSQUERY:        case OP_POSQUERY:
2194        case OP_POSQUERYI:        case OP_POSQUERYI:
2195        if (code[-1] >= 0xc0) code += PRIV(utf8_table4)[code[-1] & 0x3f];        if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
2196        break;        break;
2197        }        }
2198  #else  #else
# Line 2452  for (code = first_significant_code(code Line 2452  for (code = first_significant_code(code
2452      /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO,      /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO,
2453      MINUPTO, and POSUPTO may be followed by a multibyte character */      MINUPTO, and POSUPTO may be followed by a multibyte character */
2454    
2455  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2456      case OP_STAR:      case OP_STAR:
2457      case OP_STARI:      case OP_STARI:
2458      case OP_MINSTAR:      case OP_MINSTAR:
# Line 2465  for (code = first_significant_code(code Line 2465  for (code = first_significant_code(code
2465      case OP_MINQUERYI:      case OP_MINQUERYI:
2466      case OP_POSQUERY:      case OP_POSQUERY:
2467      case OP_POSQUERYI:      case OP_POSQUERYI:
2468      if (utf && code[1] >= 0xc0) code += PRIV(utf8_table4)[code[1] & 0x3f];      if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);
2469      break;      break;
2470    
2471      case OP_UPTO:      case OP_UPTO:
# Line 2474  for (code = first_significant_code(code Line 2474  for (code = first_significant_code(code
2474      case OP_MINUPTOI:      case OP_MINUPTOI:
2475      case OP_POSUPTO:      case OP_POSUPTO:
2476      case OP_POSUPTOI:      case OP_POSUPTOI:
2477      if (utf && code[1 + IMM2_SIZE] >= 0xc0) code += PRIV(utf8_table4)[code[1 + IMM2_SIZE] & 0x3f];      if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);
2478      break;      break;
2479  #endif  #endif
2480    
# Line 2913  if ((options & PCRE_EXTENDED) != 0) Line 2913  if ((options & PCRE_EXTENDED) != 0)
2913          {          {
2914          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
2915          ptr++;          ptr++;
2916  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2917          if (utf) while ((*ptr & 0xc0) == 0x80) ptr++;          if (utf) FORWARDCHAR(ptr);
2918  #endif  #endif
2919          }          }
2920        }        }
# Line 2957  if ((options & PCRE_EXTENDED) != 0) Line 2957  if ((options & PCRE_EXTENDED) != 0)
2957          {          {
2958          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
2959          ptr++;          ptr++;
2960  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2961          if (utf) while ((*ptr & 0xc0) == 0x80) ptr++;          if (utf) FORWARDCHAR(ptr);
2962  #endif  #endif
2963          }          }
2964        }        }
# Line 3424  for (;; ptr++) Line 3424  for (;; ptr++)
3424    int tempbracount;    int tempbracount;
3425    pcre_uchar mcbuffer[8];    pcre_uchar mcbuffer[8];
3426    
3427    /* Get next byte in the pattern */    /* Get next character in the pattern */
3428    
3429    c = *ptr;    c = *ptr;
3430    
# Line 3556  for (;; ptr++) Line 3556  for (;; ptr++)
3556          {          {
3557          if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }          if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }
3558          ptr++;          ptr++;
3559  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3560          if (utf) while ((*ptr & 0xc0) == 0x80) ptr++;          if (utf) FORWARDCHAR(ptr);
3561  #endif  #endif
3562          }          }
3563        if (*ptr != 0) continue;        if (*ptr != 0) continue;
# Line 4601  for (;; ptr++) Line 4601  for (;; ptr++)
4601        {        {
4602        op_type = (*previous == OP_CHAR)? 0 : OP_STARI - OP_STAR;        op_type = (*previous == OP_CHAR)? 0 : OP_STARI - OP_STAR;
4603    
4604        /* Deal with UTF-8 characters that take up more than one byte. It's        /* Deal with UTF characters that take up more than one character. It's
4605        easier to write this out separately than try to macrify it. Use c to        easier to write this out separately than try to macrify it. Use c to
4606        hold the length of the character in bytes, plus 0x80 to flag that it's a        hold the length of the character in bytes, plus 0x80 to flag that it's a
4607        length rather than a small character. */        length rather than a small character. */
# Line 4610  for (;; ptr++) Line 4610  for (;; ptr++)
4610        if (utf && (code[-1] & 0x80) != 0)        if (utf && (code[-1] & 0x80) != 0)
4611          {          {
4612          pcre_uchar *lastchar = code - 1;          pcre_uchar *lastchar = code - 1;
4613          while((*lastchar & 0xc0) == 0x80) lastchar--;          BACKCHAR(lastchar);
4614          c = code - lastchar;            /* Length of UTF-8 character */          c = code - lastchar;            /* Length of UTF-8 character */
4615          memcpy(utf_chars, lastchar, c); /* Save the char */          memcpy(utf_chars, lastchar, IN_UCHARS(c)); /* Save the char */
4616          c |= 0x80;                      /* Flag c as a length */          c |= 0x80;                      /* Flag c as a length */
4617          }          }
4618        else        else
4619  #endif  #endif
4620    
4621        /* Handle the case of a single byte - either with no UTF8 support, or        /* Handle the case of a single charater - either with no UTF support, or
4622        with UTF-8 disabled, or for a UTF-8 character < 128. */        with UTF disabled, or for a single character UTF character. */
4623    
4624          {          {
4625          c = code[-1];          c = code[-1];
# Line 5273  for (;; ptr++) Line 5273  for (;; ptr++)
5273        else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)        else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)
5274          {          {
5275          tempcode += PRIV(OP_lengths)[*tempcode];          tempcode += PRIV(OP_lengths)[*tempcode];
5276  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
5277          if (utf && tempcode[-1] >= 0xc0)          if (utf && HAS_EXTRALEN(tempcode[-1]))
5278            tempcode += PRIV(utf8_table4)[tempcode[-1] & 0x3f];            tempcode += GET_EXTRALEN(tempcode[-1]);
5279  #endif  #endif
5280          }          }
5281    
# Line 6659  for (;; ptr++) Line 6659  for (;; ptr++)
6659      mclength = 1;      mclength = 1;
6660      mcbuffer[0] = c;      mcbuffer[0] = c;
6661    
6662  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
6663      if (utf && c >= 0xc0)      if (utf && HAS_EXTRALEN(c))
6664        {        {
6665        while ((ptr[1] & 0xc0) == 0x80)        INTERNALCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr));
         mcbuffer[mclength++] = *(++ptr);  
6666        }        }
6667  #endif  #endif
6668    

Legend:
Removed from v.781  
changed lines
  Added in v.782

  ViewVC Help
Powered by ViewVC 1.1.5