/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1379 by ph10, Mon Oct 14 13:54:07 2013 UTC revision 1419 by zherczeg, Sun Dec 29 04:42:14 2013 UTC
# Line 306  typedef struct then_trap_backtrack { Line 306  typedef struct then_trap_backtrack {
306    int framesize;    int framesize;
307  } then_trap_backtrack;  } then_trap_backtrack;
308    
309  #define MAX_RANGE_SIZE 6  #define MAX_RANGE_SIZE 4
310    
311  typedef struct compiler_common {  typedef struct compiler_common {
312    /* The sljit ceneric compiler. */    /* The sljit ceneric compiler. */
# Line 533  cc += 1 + LINK_SIZE; Line 533  cc += 1 + LINK_SIZE;
533  return cc;  return cc;
534  }  }
535    
536    static int ones_in_half_byte[16] = {
537      /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
538      /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
539    };
540    
541  /* Functions whose might need modification for all new supported opcodes:  /* Functions whose might need modification for all new supported opcodes:
542   next_opcode   next_opcode
543   check_opcode_types   check_opcode_types
# Line 2894  if (newlinecheck) Line 2899  if (newlinecheck)
2899  return mainloop;  return mainloop;
2900  }  }
2901    
2902  #define MAX_N_CHARS 3  static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, int max_chars)
   
 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)  
2903  {  {
2904  DEFINE_COMPILER;  /* Recursive function, which scans prefix literals. */
2905  struct sljit_label *start;  int len, repeat, len_save, consumed = 0;
2906  struct sljit_jump *quit;  pcre_int32 caseless, chr, mask;
2907  pcre_uint32 chars[MAX_N_CHARS * 2];  pcre_uchar *alternative, *cc_save;
2908  pcre_uchar *cc = common->start + 1 + LINK_SIZE;  BOOL last, any;
 int location = 0;  
 pcre_int32 len, c, bit, caseless;  
 int must_stop;  
   
 /* We do not support alternatives now. */  
 if (*(common->start + GET(common->start, 1)) == OP_ALT)  
   return FALSE;  
2909    
2910    repeat = 1;
2911  while (TRUE)  while (TRUE)
2912    {    {
2913      last = TRUE;
2914      any = FALSE;
2915    caseless = 0;    caseless = 0;
2916    must_stop = 1;    switch (*cc)
   switch(*cc)  
2917      {      {
     case OP_CHAR:  
     must_stop = 0;  
     cc++;  
     break;  
   
2918      case OP_CHARI:      case OP_CHARI:
2919      caseless = 1;      caseless = 1;
2920      must_stop = 0;      case OP_CHAR:
2921        last = FALSE;
2922      cc++;      cc++;
2923      break;      break;
2924    
# Line 2949  while (TRUE) Line 2943  while (TRUE)
2943      cc++;      cc++;
2944      break;      break;
2945    
2946        case OP_EXACTI:
2947        caseless = 1;
2948      case OP_EXACT:      case OP_EXACT:
2949        repeat = GET2(cc, 1);
2950        last = FALSE;
2951      cc += 1 + IMM2_SIZE;      cc += 1 + IMM2_SIZE;
2952      break;      break;
2953    
# Line 2960  while (TRUE) Line 2958  while (TRUE)
2958      cc++;      cc++;
2959      break;      break;
2960    
2961      case OP_EXACTI:      case OP_KET:
2962      caseless = 1;      cc += 1 + LINK_SIZE;
2963      cc += 1 + IMM2_SIZE;      continue;
2964    
2965        case OP_ALT:
2966        cc += GET(cc, 1);
2967        continue;
2968    
2969        case OP_ONCE:
2970        case OP_ONCE_NC:
2971        case OP_BRA:
2972        case OP_BRAPOS:
2973        case OP_CBRA:
2974        case OP_CBRAPOS:
2975        alternative = cc + GET(cc, 1);
2976        while (*alternative == OP_ALT)
2977          {
2978          max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars);
2979          if (max_chars == 0)
2980            return consumed;
2981          alternative += GET(alternative, 1);
2982          }
2983    
2984        if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
2985          cc += IMM2_SIZE;
2986        cc += 1 + LINK_SIZE;
2987        continue;
2988    
2989        case OP_CLASS:
2990        case OP_NCLASS:
2991        any = TRUE;
2992        cc += 1 + 32 / sizeof(pcre_uchar);
2993      break;      break;
2994    
2995      default:  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2996      must_stop = 2;      case OP_XCLASS:
2997        any = TRUE;
2998        cc += GET(cc, 1);
2999      break;      break;
3000    #endif
3001    
3002        case OP_NOT_DIGIT:
3003        case OP_DIGIT:
3004        case OP_NOT_WHITESPACE:
3005        case OP_WHITESPACE:
3006        case OP_NOT_WORDCHAR:
3007        case OP_WORDCHAR:
3008        case OP_ANY:
3009        case OP_ALLANY:
3010        any = TRUE;
3011        cc++;
3012        break;
3013    
3014    #ifdef SUPPORT_UCP
3015        case OP_NOTPROP:
3016        case OP_PROP:
3017        any = TRUE;
3018        cc += 1 + 2;
3019        break;
3020    #endif
3021    
3022        case OP_TYPEEXACT:
3023        repeat = GET2(cc, 1);
3024        cc += 1 + IMM2_SIZE;
3025        continue;
3026    
3027        default:
3028        return consumed;
3029      }      }
3030    
3031    if (must_stop == 2)    if (any)
3032        break;      {
3033    #ifdef SUPPORT_UTF
3034        if (common->utf) return consumed;
3035    #endif
3036    #if defined COMPILE_PCRE8
3037        mask = 0xff;
3038    #elif defined COMPILE_PCRE16
3039        mask = 0xffff;
3040    #elif defined COMPILE_PCRE32
3041        mask = 0xffffffff;
3042    #else
3043        SLJIT_ASSERT_STOP();
3044    #endif
3045    
3046        do
3047          {
3048          chars[0] = mask;
3049          chars[1] = mask;
3050    
3051          if (--max_chars == 0)
3052            return consumed;
3053          consumed++;
3054          chars += 2;
3055          }
3056        while (--repeat > 0);
3057    
3058        repeat = 1;
3059        continue;
3060        }
3061    
3062    len = 1;    len = 1;
3063  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3064    if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);    if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3065  #endif  #endif
3066    
3067    if (caseless && char_has_othercase(common, cc))    if (caseless != 0 && char_has_othercase(common, cc))
3068      {      {
3069      caseless = char_get_othercase_bit(common, cc);      caseless = char_get_othercase_bit(common, cc);
3070      if (caseless == 0)      if (caseless == 0)
3071        return FALSE;        return consumed;
3072  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
3073      caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));      caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
3074  #else  #else
# Line 2995  while (TRUE) Line 3081  while (TRUE)
3081    else    else
3082      caseless = 0;      caseless = 0;
3083    
3084    while (len > 0 && location < MAX_N_CHARS * 2)    len_save = len;
3085      {    cc_save = cc;
3086      c = *cc;    while (TRUE)
3087      bit = 0;      {
3088      if (len == (caseless & 0xff))      do
3089        {        {
3090        bit = caseless >> 8;        chr = *cc;
3091        c |= bit;  #ifdef COMPILE_PCRE32
3092          if (SLJIT_UNLIKELY(chr == NOTACHAR))
3093            return consumed;
3094    #endif
3095          mask = 0;
3096          if (len == (caseless & 0xff))
3097            {
3098            mask = caseless >> 8;
3099            chr |= mask;
3100            }
3101    
3102          if (chars[0] == NOTACHAR)
3103            {
3104            chars[0] = chr;
3105            chars[1] = mask;
3106            }
3107          else
3108            {
3109            mask |= chars[0] ^ chr;
3110            chr |= mask;
3111            chars[0] = chr;
3112            chars[1] |= mask;
3113            }
3114    
3115          len--;
3116          if (--max_chars == 0)
3117            return consumed;
3118          consumed++;
3119          chars += 2;
3120          cc++;
3121        }        }
3122        while (len > 0);
3123    
3124        if (--repeat == 0)
3125          break;
3126    
3127      chars[location] = c;      len = len_save;
3128      chars[location + 1] = bit;      cc = cc_save;
3129        }
3130    
3131      len--;    repeat = 1;
3132      location += 2;    if (last)
3133      cc++;      return consumed;
3134      }
3135    }
3136    
3137    #define MAX_N_CHARS 16
3138    
3139    static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3140    {
3141    DEFINE_COMPILER;
3142    struct sljit_label *start;
3143    struct sljit_jump *quit;
3144    pcre_uint32 chars[MAX_N_CHARS * 2];
3145    pcre_uint8 ones[MAX_N_CHARS];
3146    pcre_uint32 mask;
3147    int i, max;
3148    int offsets[3];
3149    
3150    for (i = 0; i < MAX_N_CHARS; i++)
3151      {
3152      chars[i << 1] = NOTACHAR;
3153      chars[(i << 1) + 1] = 0;
3154      }
3155    
3156    max = scan_prefix(common, common->start, chars, MAX_N_CHARS);
3157    
3158    if (max <= 1)
3159      return FALSE;
3160    
3161    for (i = 0; i < max; i++)
3162      {
3163      mask = chars[(i << 1) + 1];
3164      ones[i] = ones_in_half_byte[mask & 0xf];
3165      mask >>= 4;
3166      while (mask != 0)
3167        {
3168        ones[i] += ones_in_half_byte[mask & 0xf];
3169        mask >>= 4;
3170      }      }
3171      }
3172    
3173    if (location >= MAX_N_CHARS * 2 || must_stop != 0)  offsets[0] = -1;
3174    /* Scan forward. */
3175    for (i = 0; i < max; i++)
3176      if (ones[i] <= 2) {
3177        offsets[0] = i;
3178      break;      break;
3179    }    }
3180    
3181  /* At least two characters are required. */  if (offsets[0] == -1)
3182  if (location < 2 * 2)    return FALSE;
3183      return FALSE;  
3184    /* Scan backward. */
3185    offsets[1] = -1;
3186    for (i = max - 1; i > offsets[0]; i--)
3187      if (ones[i] <= 2) {
3188        offsets[1] = i;
3189        break;
3190      }
3191    
3192    offsets[2] = -1;
3193    if (offsets[1] >= 0)
3194      {
3195      /* Scan from middle. */
3196      for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3197        if (ones[i] <= 2)
3198          {
3199          offsets[2] = i;
3200          break;
3201          }
3202    
3203      if (offsets[2] == -1)
3204        {
3205        for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3206          if (ones[i] <= 2)
3207            {
3208            offsets[2] = i;
3209            break;
3210            }
3211        }
3212      }
3213    
3214    SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3215    SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3216    
3217    chars[0] = chars[offsets[0] << 1];
3218    chars[1] = chars[(offsets[0] << 1) + 1];
3219    if (offsets[2] >= 0)
3220      {
3221      chars[2] = chars[offsets[2] << 1];
3222      chars[3] = chars[(offsets[2] << 1) + 1];
3223      }
3224    if (offsets[1] >= 0)
3225      {
3226      chars[4] = chars[offsets[1] << 1];
3227      chars[5] = chars[(offsets[1] << 1) + 1];
3228      }
3229    
3230    max -= 1;
3231  if (firstline)  if (firstline)
3232    {    {
3233    SLJIT_ASSERT(common->first_line_end != 0);    SLJIT_ASSERT(common->first_line_end != 0);
3234    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3235    OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));    OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS(max));
3236    }    }
3237  else  else
3238    OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));    OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3239    
3240  start = LABEL();  start = LABEL();
3241  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3242    
3243  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3244  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  if (offsets[1] >= 0)
3245      OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3246  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3247    
3248  if (chars[1] != 0)  if (chars[1] != 0)
3249    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3250  CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);  CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3251  if (location > 2 * 2)  if (offsets[2] >= 0)
3252    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3253  if (chars[3] != 0)  
3254    OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);  if (offsets[1] >= 0)
 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);  
 if (location > 2 * 2)  
3255    {    {
3256    if (chars[5] != 0)    if (chars[5] != 0)
3257      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3258    CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);    CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3259      }
3260    
3261    if (offsets[2] >= 0)
3262      {
3263      if (chars[3] != 0)
3264        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3265      CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3266    }    }
3267  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3268    
# Line 3057  JUMPHERE(quit); Line 3271  JUMPHERE(quit);
3271  if (firstline)  if (firstline)
3272    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3273  else  else
3274    OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));    OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3275  return TRUE;  return TRUE;
3276  }  }
3277    
# Line 3206  if (firstline) Line 3420  if (firstline)
3420    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3421  }  }
3422    
3423  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3424    
3425  static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)  static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3426  {  {
3427  DEFINE_COMPILER;  DEFINE_COMPILER;
3428  struct sljit_label *start;  struct sljit_label *start;
3429  struct sljit_jump *quit;  struct sljit_jump *quit;
3430  struct sljit_jump *found = NULL;  struct sljit_jump *found = NULL;
3431  jump_list *matches = NULL;  jump_list *matches = NULL;
 pcre_uint8 inverted_start_bits[32];  
 int i;  
3432  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
3433  struct sljit_jump *jump;  struct sljit_jump *jump;
3434  #endif  #endif
3435    
 for (i = 0; i < 32; ++i)  
   inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);  
   
3436  if (firstline)  if (firstline)
3437    {    {
3438    SLJIT_ASSERT(common->first_line_end != 0);    SLJIT_ASSERT(common->first_line_end != 0);
# Line 3239  if (common->utf) Line 3448  if (common->utf)
3448    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3449  #endif  #endif
3450    
3451  if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))  if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3452    {    {
3453  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
3454    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
# Line 3248  if (!check_class_ranges(common, inverted Line 3457  if (!check_class_ranges(common, inverted
3457  #endif  #endif
3458    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3459    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3460    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3461    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3462    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3463    found = JUMP(SLJIT_C_NOT_ZERO);    found = JUMP(SLJIT_C_NOT_ZERO);
# Line 3508  sljit_emit_fast_return(compiler, SLJIT_M Line 3717  sljit_emit_fast_return(compiler, SLJIT_M
3717  static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)  static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3718  {  {
3719  DEFINE_COMPILER;  DEFINE_COMPILER;
 struct sljit_jump *jump;  
3720    
3721  if (ranges[0] < 0)  if (ranges[0] < 0 || ranges[0] > 4)
3722    return FALSE;    return FALSE;
3723    
3724    /* No character is accepted. */
3725    if (ranges[0] == 0 && ranges[1] == 0)
3726      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3727    
3728    if (readch)
3729      read_char(common);
3730    
3731  switch(ranges[0])  switch(ranges[0])
3732    {    {
3733      case 0:
3734      /* When ranges[1] != 0, all characters are accepted. */
3735      return TRUE;
3736    
3737    case 1:    case 1:
   if (readch)  
     read_char(common);  
3738    add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));    add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3739    return TRUE;    return TRUE;
3740    
3741    case 2:    case 2:
3742    if (readch)    if (ranges[2] + 1 != ranges[3])
3743      read_char(common);      {
3744    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3745    add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));      add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3746        }
3747      else
3748        add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3749    return TRUE;    return TRUE;
3750    
3751    case 4:    case 3:
3752    if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])    if (ranges[1] != 0)
3753      {      {
3754      if (readch)      add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3755        read_char(common);      if (ranges[2] + 1 != ranges[3])
     if (ranges[1] != 0)  
3756        {        {
3757        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3758        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3759        }        }
3760      else      else
3761          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3762        return TRUE;
3763        }
3764    
3765      add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2]));
3766      if (ranges[3] + 1 != ranges[4])
3767        {
3768        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[3]);
3769        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[3]));
3770        }
3771      else
3772        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3]));
3773      return TRUE;
3774    
3775      case 4:
3776      if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4])
3777          && (ranges[2] | (ranges[4] - ranges[2])) == ranges[4]
3778          && is_powerof2(ranges[4] - ranges[2]))
3779        {
3780        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3781        if (ranges[4] + 1 != ranges[5])
3782        {        {
3783        jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3784        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));        add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
       JUMPHERE(jump);  
3785        }        }
3786        else
3787          add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3788      return TRUE;      return TRUE;
3789      }      }
3790    if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))  
3791      if (ranges[1] != 0)
3792      {      {
3793      if (readch)      if (ranges[2] + 1 != ranges[3])
3794        read_char(common);        {
3795      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3796      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3797      add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));        ranges[4] -= ranges[2];
3798          ranges[5] -= ranges[2];
3799          }
3800        else
3801          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3802    
3803        if (ranges[4] + 1 != ranges[5])
3804          {
3805          OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3806          add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3807          }
3808        else
3809          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3810      return TRUE;      return TRUE;
3811      }      }
3812    return FALSE;  
3813      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3814      add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[2]));
3815      if (ranges[3] + 1 != ranges[4])
3816        {
3817        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]);
3818        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[3]));
3819        }
3820      else
3821        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3822      return TRUE;
3823    
3824    default:    default:
3825      SLJIT_ASSERT_STOP();
3826    return FALSE;    return FALSE;
3827    }    }
3828  }  }
# Line 3592  if (bit != 0) Line 3857  if (bit != 0)
3857  ranges[0] = length;  ranges[0] = length;
3858  }  }
3859    
3860  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
3861  {  {
3862  int ranges[2 + MAX_RANGE_SIZE];  int ranges[2 + MAX_RANGE_SIZE];
3863  pcre_uint8 bit, cbit, all;  pcre_uint8 bit, cbit, all;
3864  int i, byte, length = 0;  int i, byte, length = 0;
3865    
3866  bit = bits[0] & 0x1;  bit = bits[0] & 0x1;
3867  ranges[1] = bit;  ranges[1] = !invert ? bit : (bit ^ 0x1);
3868  /* Can be 0 or 255. */  /* All bits will be zero or one (since bit is zero or one). */
3869  all = -bit;  all = -bit;
3870    
3871  for (i = 0; i < 256; )  for (i = 0; i < 256; )
# Line 4021  static void compile_xclass_matchingpath( Line 4286  static void compile_xclass_matchingpath(
4286  {  {
4287  DEFINE_COMPILER;  DEFINE_COMPILER;
4288  jump_list *found = NULL;  jump_list *found = NULL;
4289  jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;  jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4290  pcre_int32 c, charoffset;  pcre_int32 c, charoffset;
 const pcre_uint32 *other_cases;  
4291  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
4292  pcre_uchar *ccbegin;  pcre_uchar *ccbegin;
4293  int compares, invertcmp, numberofcmps;  int compares, invertcmp, numberofcmps;
4294    
4295  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4296  BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;  BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4297  BOOL charsaved = FALSE;  BOOL charsaved = FALSE;
4298  int typereg = TMP1, scriptreg = TMP1;  int typereg = TMP1, scriptreg = TMP1;
4299    const pcre_uint32 *other_cases;
4300  pcre_int32 typeoffset;  pcre_int32 typeoffset;
4301  #endif  #endif
4302    
# Line 4039  pcre_int32 typeoffset; Line 4305  pcre_int32 typeoffset;
4305  detect_partial_match(common, backtracks);  detect_partial_match(common, backtracks);
4306  read_char(common);  read_char(common);
4307    
4308  if ((*cc++ & XCL_MAP) != 0)  cc++;
4309    if ((cc[-1] & XCL_HASPROP) == 0)
4310    {    {
4311    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);    if ((cc[-1] & XCL_MAP) != 0)
4312  #ifndef COMPILE_PCRE8      {
4313    jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);      OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4314  #elif defined SUPPORT_UTF  #ifdef SUPPORT_UCP
4315    if (common->utf)      charsaved = TRUE;
     jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);  
4316  #endif  #endif
4317        if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, FALSE, backtracks))
4318          {
4319          jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4320    
4321    if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))        OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4322          OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4323          OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4324          OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4325          OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4326          add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));
4327          add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4328    
4329          JUMPHERE(jump);
4330          }
4331        else
4332          add_jump(compiler, &found, CMP(SLJIT_C_LESS_EQUAL, TMP3, 0, SLJIT_IMM, 0xff));
4333    
4334        OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4335        cc += 32 / sizeof(pcre_uchar);
4336        }
4337      else
4338        add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff));
4339      }
4340    else if ((cc[-1] & XCL_MAP) != 0)
4341      {
4342      OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4343    #ifdef SUPPORT_UCP
4344      charsaved = TRUE;
4345    #endif
4346      if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4347      {      {
4348    #ifdef COMPILE_PCRE8
4349        SLJIT_ASSERT(common->utf);
4350    #endif
4351        jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4352    
4353      OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);      OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4354      OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);      OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4355      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4356      OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);      OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4357      OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);      OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4358      add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));      add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
     }  
4359    
 #ifndef COMPILE_PCRE8  
   JUMPHERE(jump);  
 #elif defined SUPPORT_UTF  
   if (common->utf)  
4360      JUMPHERE(jump);      JUMPHERE(jump);
4361  #endif      }
4362    
4363    OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);    OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
 #ifdef SUPPORT_UCP  
   charsaved = TRUE;  
 #endif  
4364    cc += 32 / sizeof(pcre_uchar);    cc += 32 / sizeof(pcre_uchar);
4365    }    }
4366    
# Line 4126  while (*cc != XCL_END) Line 4418  while (*cc != XCL_END)
4418        case PT_SPACE:        case PT_SPACE:
4419        case PT_PXSPACE:        case PT_PXSPACE:
4420        case PT_WORD:        case PT_WORD:
4421          case PT_PXGRAPH:
4422          case PT_PXPRINT:
4423          case PT_PXPUNCT:
4424        needstype = TRUE;        needstype = TRUE;
4425        needschar = TRUE;        needschar = TRUE;
4426        break;        break;
# Line 4413  while (*cc != XCL_END) Line 4708  while (*cc != XCL_END)
4708        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4709        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4710        break;        break;
4711    
4712          case PT_PXGRAPH:
4713          /* C and Z groups are the farthest two groups. */
4714          SET_TYPE_OFFSET(ucp_Ll);
4715          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4716          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4717    
4718          jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4719    
4720          /* In case of ucp_Cf, we overwrite the result. */
4721          SET_CHAR_OFFSET(0x2066);
4722          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4723          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4724    
4725          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4726          OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4727    
4728          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
4729          OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4730    
4731          JUMPHERE(jump);
4732          jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4733          break;
4734    
4735          case PT_PXPRINT:
4736          /* C and Z groups are the farthest two groups. */
4737          SET_TYPE_OFFSET(ucp_Ll);
4738          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4739          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4740    
4741          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
4742          OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4743    
4744          jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4745    
4746          /* In case of ucp_Cf, we overwrite the result. */
4747          SET_CHAR_OFFSET(0x2066);
4748          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4749          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4750    
4751          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4752          OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4753    
4754          JUMPHERE(jump);
4755          jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4756          break;
4757    
4758          case PT_PXPUNCT:
4759          SET_TYPE_OFFSET(ucp_Sc);
4760          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
4761          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4762    
4763          SET_CHAR_OFFSET(0);
4764          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
4765          OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4766    
4767          SET_TYPE_OFFSET(ucp_Pc);
4768          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
4769          OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4770          jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4771          break;
4772        }        }
4773      cc += 2;      cc += 2;
4774      }      }
# Line 4559  switch(type) Line 4915  switch(type)
4915  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4916    case OP_NOTPROP:    case OP_NOTPROP:
4917    case OP_PROP:    case OP_PROP:
4918    propdata[0] = 0;    propdata[0] = XCL_HASPROP;
4919    propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;    propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4920    propdata[2] = cc[0];    propdata[2] = cc[0];
4921    propdata[3] = cc[1];    propdata[3] = cc[1];
# Line 4918  switch(type) Line 5274  switch(type)
5274    case OP_NCLASS:    case OP_NCLASS:
5275    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5276    read_char(common);    read_char(common);
5277    if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))    if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5278      return cc + 32 / sizeof(pcre_uchar);      return cc + 32 / sizeof(pcre_uchar);
5279    
5280  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
# Line 9153  if ((re->options & PCRE_ANCHORED) == 0) Line 9509  if ((re->options & PCRE_ANCHORED) == 0)
9509      else if ((re->flags & PCRE_STARTLINE) != 0)      else if ((re->flags & PCRE_STARTLINE) != 0)
9510        fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);        fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
9511      else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)      else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
9512        fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);        fast_forward_start_bits(common, study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
9513      }      }
9514    }    }
9515  else  else

Legend:
Removed from v.1379  
changed lines
  Added in v.1419

  ViewVC Help
Powered by ViewVC 1.1.5