/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1381 by zherczeg, Wed Oct 16 06:23:00 2013 UTC revision 1382 by zherczeg, Fri Oct 18 07:55:07 2013 UTC
# Line 2975  switch(c) Line 2975  switch(c)
2975    case OP_XCLASS:    case OP_XCLASS:
2976    
2977    if (c == OP_XCLASS)    if (c == OP_XCLASS)
2978      end = code + GET(code, 0);      end = code + GET(code, 0) - 1;
2979    else    else
2980  #endif  #endif
2981      end = code + 32 / sizeof(pcre_uchar);      end = code + 32 / sizeof(pcre_uchar);
# Line 3031  Returns:      TRUE if the auto-possessif Line 3031  Returns:      TRUE if the auto-possessif
3031    
3032  static BOOL  static BOOL
3033  compare_opcodes(const pcre_uchar *code, BOOL utf, const compile_data *cd,  compare_opcodes(const pcre_uchar *code, BOOL utf, const compile_data *cd,
3034    const pcre_uint32* base_list, const pcre_uchar *base_end)    const pcre_uint32 *base_list, const pcre_uchar *base_end)
3035  {  {
3036  pcre_uchar c;  pcre_uchar c;
3037  pcre_uint32 list[8];  pcre_uint32 list[8];
3038  const pcre_uint32* chr_ptr;  const pcre_uint32 *chr_ptr;
3039  const pcre_uint32* ochr_ptr;  const pcre_uint32 *ochr_ptr;
3040  const pcre_uint32* list_ptr;  const pcre_uint32 *list_ptr;
3041  const pcre_uchar *next_code;  const pcre_uchar *next_code;
3042  const pcre_uint8 *class_bits;  const pcre_uint8 *class_bitset;
3043    const pcre_uint32 *set1, *set2, *set_end;
3044  pcre_uint32 chr;  pcre_uint32 chr;
3045  BOOL accepted;  BOOL accepted, invert_bits;
3046    
3047  /* Note: the base_list[1] contains whether the current opcode has greedy  /* Note: the base_list[1] contains whether the current opcode has greedy
3048  (represented by a non-zero value) quantifier. This is a different from  (represented by a non-zero value) quantifier. This is a different from
# Line 3163  for(;;) Line 3164  for(;;)
3164      list_ptr = base_list;      list_ptr = base_list;
3165      }      }
3166    
3167      /* Character bitsets can also be compared to certain opcodes. */
3168    
3169      else if (base_list[0] == OP_CLASS || list[0] == OP_CLASS
3170    #ifdef COMPILE_PCRE8
3171          /* In 8 bit, non-UTF mode, OP_CLASS and OP_NCLASS are the same. */
3172          || (!utf && (base_list[0] == OP_NCLASS || list[0] == OP_NCLASS))
3173    #endif
3174          )
3175        {
3176    #ifdef COMPILE_PCRE8
3177        if (base_list[0] == OP_CLASS || (!utf && base_list[0] == OP_NCLASS))
3178    #else
3179        if (base_list[0] == OP_CLASS)
3180    #endif
3181          {
3182          set1 = (pcre_uint32 *)(base_end - base_list[2]);
3183          list_ptr = list;
3184          }
3185        else
3186          {
3187          set1 = (pcre_uint32 *)(code - list[2]);
3188          list_ptr = base_list;
3189          }
3190    
3191        invert_bits = FALSE;
3192        switch(list_ptr[0])
3193          {
3194          case OP_CLASS:
3195          case OP_NCLASS:
3196          set2 = (pcre_uint32 *)
3197            ((list_ptr == list ? code : base_end) - list_ptr[2]);
3198          break;
3199    
3200          /* OP_XCLASS cannot be supported here, because its bitset
3201          is not necessarily complete. E.g: [a-\0x{200}] is stored
3202          as a character range, and the appropriate bits are not set. */
3203    
3204          case OP_NOT_DIGIT:
3205            invert_bits = TRUE;
3206            /* Fall through */
3207          case OP_DIGIT:
3208            set2 = (pcre_uint32 *)(cd->cbits + cbit_digit);
3209            break;
3210    
3211          case OP_NOT_WHITESPACE:
3212            invert_bits = TRUE;
3213            /* Fall through */
3214          case OP_WHITESPACE:
3215            set2 = (pcre_uint32 *)(cd->cbits + cbit_space);
3216            break;
3217    
3218          case OP_NOT_WORDCHAR:
3219            invert_bits = TRUE;
3220            /* Fall through */
3221          case OP_WORDCHAR:
3222            set2 = (pcre_uint32 *)(cd->cbits + cbit_word);
3223            break;
3224    
3225          default:
3226          return FALSE;
3227          }
3228    
3229        /* Compare 4 bytes to improve speed. */
3230        set_end = set1 + (32 / 4);
3231        if (invert_bits)
3232          {
3233          do
3234            {
3235            if ((*set1++ & ~(*set2++)) != 0) return FALSE;
3236            }
3237          while (set1 < set_end);
3238          }
3239        else
3240          {
3241          do
3242            {
3243            if ((*set1++ & *set2++) != 0) return FALSE;
3244            }
3245          while (set1 < set_end);
3246          }
3247    
3248        if (list[1] == 0) return TRUE;
3249        /* Might be an empty repeat. */
3250        continue;
3251        }
3252    
3253    /* Some property combinations also acceptable. Unicode property opcodes are    /* Some property combinations also acceptable. Unicode property opcodes are
3254    processed specially; the rest can be handled with a lookup table. */    processed specially; the rest can be handled with a lookup table. */
3255    
# Line 3414  for(;;) Line 3501  for(;;)
3501    
3502        case OP_CLASS:        case OP_CLASS:
3503        if (chr > 255) break;        if (chr > 255) break;
3504        class_bits = (pcre_uint8 *)((list_ptr == list ? code : base_end) - list_ptr[2]);        class_bitset = (pcre_uint8 *)
3505        if ((class_bits[chr >> 3] & (1 << (chr & 7))) != 0)          ((list_ptr == list ? code : base_end) - list_ptr[2]);
3506          return FALSE;        if ((class_bitset[chr >> 3] & (1 << (chr & 7))) != 0) return FALSE;
3507        break;        break;
3508    
3509  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3510        case OP_XCLASS:        case OP_XCLASS:
3511        if (list_ptr != list) return FALSE;   /* Class is first opcode */        if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) -
3512        if (PRIV(xclass)(chr, code - list_ptr[2] + LINK_SIZE, utf))            list_ptr[2] + LINK_SIZE, utf)) return FALSE;
         return FALSE;  
3513        break;        break;
3514  #endif  #endif
3515    
# Line 3465  auto_possessify(pcre_uchar *code, BOOL u Line 3551  auto_possessify(pcre_uchar *code, BOOL u
3551  {  {
3552  register pcre_uchar c;  register pcre_uchar c;
3553  const pcre_uchar *end;  const pcre_uchar *end;
3554  pcre_uchar *repeat_code;  pcre_uchar *repeat_opcode;
3555  pcre_uint32 list[8];  pcre_uint32 list[8];
3556    
3557  for (;;)  for (;;)
# Line 3522  for (;;) Line 3608  for (;;)
3608      {      {
3609  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3610      if (c == OP_XCLASS)      if (c == OP_XCLASS)
3611        repeat_code = code + 1 + GET(code, 1);        repeat_opcode = code + GET(code, 1);
3612      else      else
3613  #endif  #endif
3614        repeat_code = code + 1 + (32 / sizeof(pcre_uchar));        repeat_opcode = code + 1 + (32 / sizeof(pcre_uchar));
3615    
3616      c = *repeat_code;      c = *repeat_opcode;
3617      if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)      if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
3618        {        {
3619        /* end must not be NULL. */        /* end must not be NULL. */
# Line 3540  for (;;) Line 3626  for (;;)
3626          switch (c)          switch (c)
3627            {            {
3628            case OP_CRSTAR:            case OP_CRSTAR:
3629            *repeat_code = OP_CRPOSSTAR;            case OP_CRMINSTAR:
3630              *repeat_opcode = OP_CRPOSSTAR;
3631            break;            break;
3632    
3633            case OP_CRPLUS:            case OP_CRPLUS:
3634            *repeat_code = OP_CRPOSPLUS;            case OP_CRMINPLUS:
3635              *repeat_opcode = OP_CRPOSPLUS;
3636            break;            break;
3637    
3638            case OP_CRQUERY:            case OP_CRQUERY:
3639            *repeat_code = OP_CRPOSQUERY;            case OP_CRMINQUERY:
3640              *repeat_opcode = OP_CRPOSQUERY;
3641            break;            break;
3642    
3643            case OP_CRRANGE:            case OP_CRRANGE:
3644            *repeat_code = OP_CRPOSRANGE;            case OP_CRMINRANGE:
3645              *repeat_opcode = OP_CRPOSRANGE;
3646            break;            break;
3647            }            }
3648          }          }

Legend:
Removed from v.1381  
changed lines
  Added in v.1382

  ViewVC Help
Powered by ViewVC 1.1.5