/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1415 by zherczeg, Sun Dec 22 20:47:08 2013 UTC revision 1419 by zherczeg, Sun Dec 29 04:42:14 2013 UTC
# Line 533  cc += 1 + LINK_SIZE; Line 533  cc += 1 + LINK_SIZE;
533  return cc;  return cc;
534  }  }
535    
536    static int ones_in_half_byte[16] = {
537      /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
538      /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
539    };
540    
541  /* Functions whose might need modification for all new supported opcodes:  /* Functions whose might need modification for all new supported opcodes:
542   next_opcode   next_opcode
543   check_opcode_types   check_opcode_types
# Line 2894  if (newlinecheck) Line 2899  if (newlinecheck)
2899  return mainloop;  return mainloop;
2900  }  }
2901    
2902  #define MAX_N_CHARS 3  static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, int max_chars)
   
 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)  
2903  {  {
2904  DEFINE_COMPILER;  /* Recursive function, which scans prefix literals. */
2905  struct sljit_label *start;  int len, repeat, len_save, consumed = 0;
2906  struct sljit_jump *quit;  pcre_int32 caseless, chr, mask;
2907  pcre_uint32 chars[MAX_N_CHARS * 2];  pcre_uchar *alternative, *cc_save;
2908  pcre_uchar *cc = common->start + 1 + LINK_SIZE;  BOOL last, any;
 int location = 0;  
 pcre_int32 len, c, bit, caseless;  
 int must_stop;  
   
 /* We do not support alternatives now. */  
 if (*(common->start + GET(common->start, 1)) == OP_ALT)  
   return FALSE;  
2909    
2910    repeat = 1;
2911  while (TRUE)  while (TRUE)
2912    {    {
2913      last = TRUE;
2914      any = FALSE;
2915    caseless = 0;    caseless = 0;
2916    must_stop = 1;    switch (*cc)
   switch(*cc)  
2917      {      {
     case OP_CHAR:  
     must_stop = 0;  
     cc++;  
     break;  
   
2918      case OP_CHARI:      case OP_CHARI:
2919      caseless = 1;      caseless = 1;
2920      must_stop = 0;      case OP_CHAR:
2921        last = FALSE;
2922      cc++;      cc++;
2923      break;      break;
2924    
# Line 2949  while (TRUE) Line 2943  while (TRUE)
2943      cc++;      cc++;
2944      break;      break;
2945    
2946        case OP_EXACTI:
2947        caseless = 1;
2948      case OP_EXACT:      case OP_EXACT:
2949        repeat = GET2(cc, 1);
2950        last = FALSE;
2951      cc += 1 + IMM2_SIZE;      cc += 1 + IMM2_SIZE;
2952      break;      break;
2953    
# Line 2960  while (TRUE) Line 2958  while (TRUE)
2958      cc++;      cc++;
2959      break;      break;
2960    
2961      case OP_EXACTI:      case OP_KET:
2962      caseless = 1;      cc += 1 + LINK_SIZE;
2963      cc += 1 + IMM2_SIZE;      continue;
2964    
2965        case OP_ALT:
2966        cc += GET(cc, 1);
2967        continue;
2968    
2969        case OP_ONCE:
2970        case OP_ONCE_NC:
2971        case OP_BRA:
2972        case OP_BRAPOS:
2973        case OP_CBRA:
2974        case OP_CBRAPOS:
2975        alternative = cc + GET(cc, 1);
2976        while (*alternative == OP_ALT)
2977          {
2978          max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars);
2979          if (max_chars == 0)
2980            return consumed;
2981          alternative += GET(alternative, 1);
2982          }
2983    
2984        if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
2985          cc += IMM2_SIZE;
2986        cc += 1 + LINK_SIZE;
2987        continue;
2988    
2989        case OP_CLASS:
2990        case OP_NCLASS:
2991        any = TRUE;
2992        cc += 1 + 32 / sizeof(pcre_uchar);
2993      break;      break;
2994    
2995      default:  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2996      must_stop = 2;      case OP_XCLASS:
2997        any = TRUE;
2998        cc += GET(cc, 1);
2999        break;
3000    #endif
3001    
3002        case OP_NOT_DIGIT:
3003        case OP_DIGIT:
3004        case OP_NOT_WHITESPACE:
3005        case OP_WHITESPACE:
3006        case OP_NOT_WORDCHAR:
3007        case OP_WORDCHAR:
3008        case OP_ANY:
3009        case OP_ALLANY:
3010        any = TRUE;
3011        cc++;
3012        break;
3013    
3014    #ifdef SUPPORT_UCP
3015        case OP_NOTPROP:
3016        case OP_PROP:
3017        any = TRUE;
3018        cc += 1 + 2;
3019      break;      break;
3020    #endif
3021    
3022        case OP_TYPEEXACT:
3023        repeat = GET2(cc, 1);
3024        cc += 1 + IMM2_SIZE;
3025        continue;
3026    
3027        default:
3028        return consumed;
3029      }      }
3030    
3031    if (must_stop == 2)    if (any)
3032        break;      {
3033    #ifdef SUPPORT_UTF
3034        if (common->utf) return consumed;
3035    #endif
3036    #if defined COMPILE_PCRE8
3037        mask = 0xff;
3038    #elif defined COMPILE_PCRE16
3039        mask = 0xffff;
3040    #elif defined COMPILE_PCRE32
3041        mask = 0xffffffff;
3042    #else
3043        SLJIT_ASSERT_STOP();
3044    #endif
3045    
3046        do
3047          {
3048          chars[0] = mask;
3049          chars[1] = mask;
3050    
3051          if (--max_chars == 0)
3052            return consumed;
3053          consumed++;
3054          chars += 2;
3055          }
3056        while (--repeat > 0);
3057    
3058        repeat = 1;
3059        continue;
3060        }
3061    
3062    len = 1;    len = 1;
3063  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3064    if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);    if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3065  #endif  #endif
3066    
3067    if (caseless && char_has_othercase(common, cc))    if (caseless != 0 && char_has_othercase(common, cc))
3068      {      {
3069      caseless = char_get_othercase_bit(common, cc);      caseless = char_get_othercase_bit(common, cc);
3070      if (caseless == 0)      if (caseless == 0)
3071        return FALSE;        return consumed;
3072  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
3073      caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));      caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
3074  #else  #else
# Line 2995  while (TRUE) Line 3081  while (TRUE)
3081    else    else
3082      caseless = 0;      caseless = 0;
3083    
3084    while (len > 0 && location < MAX_N_CHARS * 2)    len_save = len;
3085      {    cc_save = cc;
3086      c = *cc;    while (TRUE)
3087      bit = 0;      {
3088      if (len == (caseless & 0xff))      do
3089        {        {
3090        bit = caseless >> 8;        chr = *cc;
3091        c |= bit;  #ifdef COMPILE_PCRE32
3092          if (SLJIT_UNLIKELY(chr == NOTACHAR))
3093            return consumed;
3094    #endif
3095          mask = 0;
3096          if (len == (caseless & 0xff))
3097            {
3098            mask = caseless >> 8;
3099            chr |= mask;
3100            }
3101    
3102          if (chars[0] == NOTACHAR)
3103            {
3104            chars[0] = chr;
3105            chars[1] = mask;
3106            }
3107          else
3108            {
3109            mask |= chars[0] ^ chr;
3110            chr |= mask;
3111            chars[0] = chr;
3112            chars[1] |= mask;
3113            }
3114    
3115          len--;
3116          if (--max_chars == 0)
3117            return consumed;
3118          consumed++;
3119          chars += 2;
3120          cc++;
3121        }        }
3122        while (len > 0);
3123    
3124      chars[location] = c;      if (--repeat == 0)
3125      chars[location + 1] = bit;        break;
3126    
3127      len--;      len = len_save;
3128      location += 2;      cc = cc_save;
     cc++;  
3129      }      }
3130    
3131    if (location >= MAX_N_CHARS * 2 || must_stop != 0)    repeat = 1;
3132      if (last)
3133        return consumed;
3134      }
3135    }
3136    
3137    #define MAX_N_CHARS 16
3138    
3139    static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3140    {
3141    DEFINE_COMPILER;
3142    struct sljit_label *start;
3143    struct sljit_jump *quit;
3144    pcre_uint32 chars[MAX_N_CHARS * 2];
3145    pcre_uint8 ones[MAX_N_CHARS];
3146    pcre_uint32 mask;
3147    int i, max;
3148    int offsets[3];
3149    
3150    for (i = 0; i < MAX_N_CHARS; i++)
3151      {
3152      chars[i << 1] = NOTACHAR;
3153      chars[(i << 1) + 1] = 0;
3154      }
3155    
3156    max = scan_prefix(common, common->start, chars, MAX_N_CHARS);
3157    
3158    if (max <= 1)
3159      return FALSE;
3160    
3161    for (i = 0; i < max; i++)
3162      {
3163      mask = chars[(i << 1) + 1];
3164      ones[i] = ones_in_half_byte[mask & 0xf];
3165      mask >>= 4;
3166      while (mask != 0)
3167        {
3168        ones[i] += ones_in_half_byte[mask & 0xf];
3169        mask >>= 4;
3170        }
3171      }
3172    
3173    offsets[0] = -1;
3174    /* Scan forward. */
3175    for (i = 0; i < max; i++)
3176      if (ones[i] <= 2) {
3177        offsets[0] = i;
3178      break;      break;
3179    }    }
3180    
3181  /* At least two characters are required. */  if (offsets[0] == -1)
3182  if (location < 2 * 2)    return FALSE;
     return FALSE;  
3183    
3184    /* Scan backward. */
3185    offsets[1] = -1;
3186    for (i = max - 1; i > offsets[0]; i--)
3187      if (ones[i] <= 2) {
3188        offsets[1] = i;
3189        break;
3190      }
3191    
3192    offsets[2] = -1;
3193    if (offsets[1] >= 0)
3194      {
3195      /* Scan from middle. */
3196      for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3197        if (ones[i] <= 2)
3198          {
3199          offsets[2] = i;
3200          break;
3201          }
3202    
3203      if (offsets[2] == -1)
3204        {
3205        for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3206          if (ones[i] <= 2)
3207            {
3208            offsets[2] = i;
3209            break;
3210            }
3211        }
3212      }
3213    
3214    SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3215    SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3216    
3217    chars[0] = chars[offsets[0] << 1];
3218    chars[1] = chars[(offsets[0] << 1) + 1];
3219    if (offsets[2] >= 0)
3220      {
3221      chars[2] = chars[offsets[2] << 1];
3222      chars[3] = chars[(offsets[2] << 1) + 1];
3223      }
3224    if (offsets[1] >= 0)
3225      {
3226      chars[4] = chars[offsets[1] << 1];
3227      chars[5] = chars[(offsets[1] << 1) + 1];
3228      }
3229    
3230    max -= 1;
3231  if (firstline)  if (firstline)
3232    {    {
3233    SLJIT_ASSERT(common->first_line_end != 0);    SLJIT_ASSERT(common->first_line_end != 0);
3234    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3235    OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));    OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS(max));
3236    }    }
3237  else  else
3238    OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));    OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3239    
3240  start = LABEL();  start = LABEL();
3241  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3242    
3243  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3244  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  if (offsets[1] >= 0)
3245      OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3246  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3247    
3248  if (chars[1] != 0)  if (chars[1] != 0)
3249    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3250  CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);  CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3251  if (location > 2 * 2)  if (offsets[2] >= 0)
3252    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3253  if (chars[3] != 0)  
3254    OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);  if (offsets[1] >= 0)
 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);  
 if (location > 2 * 2)  
3255    {    {
3256    if (chars[5] != 0)    if (chars[5] != 0)
3257      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3258    CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);    CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3259      }
3260    
3261    if (offsets[2] >= 0)
3262      {
3263      if (chars[3] != 0)
3264        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3265      CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3266    }    }
3267  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3268    
# Line 3057  JUMPHERE(quit); Line 3271  JUMPHERE(quit);
3271  if (firstline)  if (firstline)
3272    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3273  else  else
3274    OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));    OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3275  return TRUE;  return TRUE;
3276  }  }
3277    

Legend:
Removed from v.1415  
changed lines
  Added in v.1419

  ViewVC Help
Powered by ViewVC 1.1.5