/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1446 by zherczeg, Sun Jan 12 17:17:29 2014 UTC revision 1447 by zherczeg, Mon Jan 13 20:18:33 2014 UTC
# Line 3149  if (newlinecheck) Line 3149  if (newlinecheck)
3149  return mainloop;  return mainloop;
3150  }  }
3151    
3152  static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, int max_chars)  #define MAX_N_CHARS 16
3153    #define MAX_N_BYTES 8
3154    
3155    static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3156    {
3157    pcre_uint8 len = bytes[0];
3158    int i;
3159    
3160    if (len == 255)
3161      return;
3162    
3163    if (len == 0)
3164      {
3165      bytes[0] = 1;
3166      bytes[1] = byte;
3167      return;
3168      }
3169    
3170    for (i = len; i > 0; i--)
3171      if (bytes[i] == byte)
3172        return;
3173    
3174    if (len >= MAX_N_BYTES - 1)
3175      {
3176      bytes[0] = 255;
3177      return;
3178      }
3179    
3180    len++;
3181    bytes[len] = byte;
3182    bytes[0] = len;
3183    }
3184    
3185    static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars)
3186  {  {
3187  /* Recursive function, which scans prefix literals. */  /* Recursive function, which scans prefix literals. */
3188    BOOL last, any, caseless;
3189  int len, repeat, len_save, consumed = 0;  int len, repeat, len_save, consumed = 0;
3190  pcre_uint32 chr, mask;  pcre_uint32 chr, mask;
3191  pcre_uchar *alternative, *cc_save, *oc;  pcre_uchar *alternative, *cc_save, *oc;
 BOOL last, any, caseless;  
3192  #if defined SUPPORT_UTF && defined COMPILE_PCRE8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3193  pcre_uchar othercase[8];  pcre_uchar othercase[8];
3194  #elif defined SUPPORT_UTF && defined COMPILE_PCRE16  #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
# Line 3239  while (TRUE) Line 3272  while (TRUE)
3272      alternative = cc + GET(cc, 1);      alternative = cc + GET(cc, 1);
3273      while (*alternative == OP_ALT)      while (*alternative == OP_ALT)
3274        {        {
3275        max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars);        max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars);
3276        if (max_chars == 0)        if (max_chars == 0)
3277          return consumed;          return consumed;
3278        alternative += GET(alternative, 1);        alternative += GET(alternative, 1);
# Line 3351  while (TRUE) Line 3384  while (TRUE)
3384        {        {
3385        chars[0] = mask;        chars[0] = mask;
3386        chars[1] = mask;        chars[1] = mask;
3387          bytes[0] = 255;
3388    
3389        consumed++;        consumed++;
3390        if (--max_chars == 0)        if (--max_chars == 0)
3391          return consumed;          return consumed;
3392        chars += 2;        chars += 2;
3393          bytes += MAX_N_BYTES;
3394        }        }
3395      while (--repeat > 0);      while (--repeat > 0);
3396    
# Line 3374  while (TRUE) Line 3409  while (TRUE)
3409      if (common->utf)      if (common->utf)
3410        {        {
3411        GETCHAR(chr, cc);        GETCHAR(chr, cc);
3412        if (PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)        if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3413          return consumed;          return consumed;
3414        }        }
3415      else      else
# Line 3399  while (TRUE) Line 3434  while (TRUE)
3434        if (SLJIT_UNLIKELY(chr == NOTACHAR))        if (SLJIT_UNLIKELY(chr == NOTACHAR))
3435          return consumed;          return consumed;
3436  #endif  #endif
3437          add_prefix_byte((pcre_uint8)chr, bytes);
3438    
3439        mask = 0;        mask = 0;
3440        if (caseless)        if (caseless)
3441          {          {
3442            add_prefix_byte((pcre_uint8)*oc, bytes);
3443          mask = *cc ^ *oc;          mask = *cc ^ *oc;
3444          chr |= mask;          chr |= mask;
3445          }          }
# Line 3428  while (TRUE) Line 3466  while (TRUE)
3466        if (--max_chars == 0)        if (--max_chars == 0)
3467          return consumed;          return consumed;
3468        chars += 2;        chars += 2;
3469          bytes += MAX_N_BYTES;
3470        cc++;        cc++;
3471        oc++;        oc++;
3472        }        }
# Line 3446  while (TRUE) Line 3485  while (TRUE)
3485    }    }
3486  }  }
3487    
 #define MAX_N_CHARS 16  
   
3488  static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)  static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3489  {  {
3490  DEFINE_COMPILER;  DEFINE_COMPILER;
3491  struct sljit_label *start;  struct sljit_label *start;
3492  struct sljit_jump *quit;  struct sljit_jump *quit;
3493  pcre_uint32 chars[MAX_N_CHARS * 2];  pcre_uint32 chars[MAX_N_CHARS * 2];
3494    pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
3495  pcre_uint8 ones[MAX_N_CHARS];  pcre_uint8 ones[MAX_N_CHARS];
3496  int offsets[3];  int offsets[3];
3497  pcre_uint32 mask, byte;  pcre_uint32 mask;
3498    pcre_uint8 *byte_set, *byte_set_end;
3499  int i, max, from;  int i, max, from;
3500  int range_right = -1, range_len = 4 - 1;  int range_right = -1, range_len = 4 - 1;
3501  sljit_ub *update_table = NULL;  sljit_ub *update_table = NULL;
# Line 3469  for (i = 0; i < MAX_N_CHARS; i++) Line 3508  for (i = 0; i < MAX_N_CHARS; i++)
3508    {    {
3509    chars[i << 1] = NOTACHAR;    chars[i << 1] = NOTACHAR;
3510    chars[(i << 1) + 1] = 0;    chars[(i << 1) + 1] = 0;
3511      bytes[i * MAX_N_BYTES] = 0;
3512    }    }
3513    
3514  max = scan_prefix(common, common->start, chars, MAX_N_CHARS);  max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS);
3515    
3516  if (max <= 1)  if (max <= 1)
3517    return FALSE;    return FALSE;
# Line 3491  for (i = 0; i < max; i++) Line 3531  for (i = 0; i < max; i++)
3531  in_range = FALSE;  in_range = FALSE;
3532  for (i = 0; i <= max; i++)  for (i = 0; i <= max; i++)
3533    {    {
3534    if (i < max && ones[i] <= 1)    if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
3535        {
3536        range_len = i - from;
3537        range_right = i - 1;
3538        }
3539    
3540      if (i < max && bytes[i * MAX_N_BYTES] < 255)
3541      {      {
3542      if (!in_range)      if (!in_range)
3543        {        {
# Line 3500  for (i = 0; i <= max; i++) Line 3546  for (i = 0; i <= max; i++)
3546        }        }
3547      }      }
3548    else if (in_range)    else if (in_range)
     {  
     if ((i - from) > range_len)  
       {  
       range_len = i - from;  
       range_right = i - 1;  
       }  
3549      in_range = FALSE;      in_range = FALSE;
     }  
3550    }    }
3551    
3552  if (range_right >= 0)  if (range_right >= 0)
# Line 3528  if (range_right >= 0) Line 3567  if (range_right >= 0)
3567    
3568    for (i = 0; i < range_len; i++)    for (i = 0; i < range_len; i++)
3569      {      {
3570      byte = chars[(range_right - i) << 1] & 0xff;      byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
3571      if (update_table[byte] > IN_UCHARS(i))      SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
3572        update_table[byte] = IN_UCHARS(i);      byte_set_end = byte_set + byte_set[0];
3573      mask = chars[((range_right - i) << 1) + 1] & 0xff;      byte_set++;
3574      if (mask != 0)      while (byte_set <= byte_set_end)
3575        {        {
3576        byte ^= mask;        if (update_table[*byte_set] > IN_UCHARS(i))
3577        if (update_table[byte] > IN_UCHARS(i))          update_table[*byte_set] = IN_UCHARS(i);
3578          update_table[byte] = IN_UCHARS(i);        byte_set++;
3579        }        }
3580      }      }
3581    }    }
# Line 3549  for (i = 0; i < max; i++) Line 3588  for (i = 0; i < max; i++)
3588      break;      break;
3589    }    }
3590    
3591  if (offsets[0] == -1)  if (offsets[0] < 0 && range_right < 0)
3592    return FALSE;    return FALSE;
3593    
3594  /* Scan backward. */  if (offsets[0] >= 0)
 offsets[1] = -1;  
 for (i = max - 1; i > offsets[0]; i--)  
   if (ones[i] <= 2 && i != range_right)  
     {  
     offsets[1] = i;  
     break;  
     }  
   
 /* This case is handled better by fast_forward_first_char. */  
 if (offsets[1] == -1 && offsets[0] == 0)  
   return FALSE;  
   
 offsets[2] = -1;  
 if (offsets[1] >= 0 && range_right == -1)  
3595    {    {
3596    /* Scan from middle. */    /* Scan backward. */
3597    for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)    offsets[1] = -1;
3598      if (ones[i] <= 2)    for (i = max - 1; i > offsets[0]; i--)
3599        if (ones[i] <= 2 && i != range_right)
3600        {        {
3601        offsets[2] = i;        offsets[1] = i;
3602        break;        break;
3603        }        }
3604    
3605    if (offsets[2] == -1)    /* This case is handled better by fast_forward_first_char. */
3606      if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
3607        return FALSE;
3608    
3609      offsets[2] = -1;
3610      /* We only search for a middle character if there is no range check. */
3611      if (offsets[1] >= 0 && range_right == -1)
3612      {      {
3613      for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)      /* Scan from middle. */
3614        for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3615        if (ones[i] <= 2)        if (ones[i] <= 2)
3616          {          {
3617          offsets[2] = i;          offsets[2] = i;
3618          break;          break;
3619          }          }
3620    
3621        if (offsets[2] == -1)
3622          {
3623          for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3624            if (ones[i] <= 2)
3625              {
3626              offsets[2] = i;
3627              break;
3628              }
3629          }
3630      }      }
   }  
3631    
3632  SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));    SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3633  SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));    SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3634    
3635  chars[0] = chars[offsets[0] << 1];    chars[0] = chars[offsets[0] << 1];
3636  chars[1] = chars[(offsets[0] << 1) + 1];    chars[1] = chars[(offsets[0] << 1) + 1];
3637  if (offsets[2] >= 0)    if (offsets[2] >= 0)
3638    {      {
3639    chars[2] = chars[offsets[2] << 1];      chars[2] = chars[offsets[2] << 1];
3640    chars[3] = chars[(offsets[2] << 1) + 1];      chars[3] = chars[(offsets[2] << 1) + 1];
3641    }      }
3642  if (offsets[1] >= 0)    if (offsets[1] >= 0)
3643    {      {
3644    chars[4] = chars[offsets[1] << 1];      chars[4] = chars[offsets[1] << 1];
3645    chars[5] = chars[(offsets[1] << 1) + 1];      chars[5] = chars[(offsets[1] << 1) + 1];
3646        }
3647    }    }
3648    
3649  max -= 1;  max -= 1;
# Line 3625  if (range_right >= 0) Line 3668  if (range_right >= 0)
3668  start = LABEL();  start = LABEL();
3669  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3670    
3671    SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
3672    
3673  if (range_right >= 0)  if (range_right >= 0)
3674    {    {
3675  #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)  #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
# Line 3642  if (range_right >= 0) Line 3687  if (range_right >= 0)
3687    CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);    CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
3688    }    }
3689    
3690  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));  if (offsets[0] >= 0)
3691  if (offsets[1] >= 0)    {
3692    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3693  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));    if (offsets[1] >= 0)
3694        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3695      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3696    
3697  if (chars[1] != 0)    if (chars[1] != 0)
3698    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3699  CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);    CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3700  if (offsets[2] >= 0)    if (offsets[2] >= 0)
3701    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3702    
3703  if (offsets[1] >= 0)    if (offsets[1] >= 0)
3704    {      {
3705    if (chars[5] != 0)      if (chars[5] != 0)
3706      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);        OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3707    CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);      CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3708    }      }
3709    
3710  if (offsets[2] >= 0)    if (offsets[2] >= 0)
3711    {      {
3712    if (chars[3] != 0)      if (chars[3] != 0)
3713      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3714    CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);      CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3715        }
3716      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3717    }    }
 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  
3718    
3719  JUMPHERE(quit);  JUMPHERE(quit);
3720    
# Line 3688  return TRUE; Line 3736  return TRUE;
3736  }  }
3737    
3738  #undef MAX_N_CHARS  #undef MAX_N_CHARS
3739    #undef MAX_N_BYTES
3740    
3741  static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)  static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3742  {  {

Legend:
Removed from v.1446  
changed lines
  Added in v.1447

  ViewVC Help
Powered by ViewVC 1.1.5