/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1632 by zherczeg, Fri Feb 12 14:43:22 2016 UTC revision 1633 by zherczeg, Wed Feb 17 10:06:38 2016 UTC
# Line 582  SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_ Line 582  SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_
582  return count;  return count;
583  }  }
584    
 static int ones_in_half_byte[16] = {  
   /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,  
   /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4  
 };  
   
585  /* Functions whose might need modification for all new supported opcodes:  /* Functions whose might need modification for all new supported opcodes:
586   next_opcode   next_opcode
587   check_opcode_types   check_opcode_types
# Line 986  switch(*cc) Line 981  switch(*cc)
981  return FALSE;  return FALSE;
982  }  }
983    
984    static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start)
985    {
986    pcre_uchar *cc = common->start;
987    pcre_uchar *end;
988    
989    /* Skip not repeated brackets. */
990    while (TRUE)
991      {
992      switch(*cc)
993        {
994        case OP_SOD:
995        case OP_SOM:
996        case OP_SET_SOM:
997        case OP_NOT_WORD_BOUNDARY:
998        case OP_WORD_BOUNDARY:
999        case OP_EODN:
1000        case OP_EOD:
1001        case OP_CIRC:
1002        case OP_CIRCM:
1003        case OP_DOLL:
1004        case OP_DOLLM:
1005        /* Zero width assertions. */
1006        cc++;
1007        continue;
1008        }
1009    
1010      if (*cc != OP_BRA && *cc != OP_CBRA)
1011        break;
1012    
1013      end = cc + GET(cc, 1);
1014      if (*end != OP_KET || PRIVATE_DATA(end) != 0)
1015        return FALSE;
1016      if (*cc == OP_CBRA)
1017        {
1018        if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1019          return FALSE;
1020        cc += IMM2_SIZE;
1021        }
1022      cc += 1 + LINK_SIZE;
1023      }
1024    
1025    if (is_accelerated_repeat(cc))
1026      {
1027      common->fast_forward_bc_ptr = cc;
1028      common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1029      *private_data_start += sizeof(sljit_sw);
1030      return TRUE;
1031      }
1032    return FALSE;
1033    }
1034    
1035  static SLJIT_INLINE void detect_fast_fail(compiler_common *common, pcre_uchar *cc, int *private_data_start, sljit_si depth)  static SLJIT_INLINE void detect_fast_fail(compiler_common *common, pcre_uchar *cc, int *private_data_start, sljit_si depth)
1036  {  {
1037    pcre_uchar *next_alt;    pcre_uchar *next_alt;
# Line 3258  sljit_emit_fast_return(compiler, RETURN_ Line 3304  sljit_emit_fast_return(compiler, RETURN_
3304  }  }
3305  #endif  #endif
3306    
3307  static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)  static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf)
3308  {  {
3309  DEFINE_COMPILER;  DEFINE_COMPILER;
3310  struct sljit_label *mainloop;  struct sljit_label *mainloop;
3311  struct sljit_label *newlinelabel = NULL;  struct sljit_label *newlinelabel = NULL;
3312  struct sljit_jump *start;  struct sljit_jump *start;
3313  struct sljit_jump *end = NULL;  struct sljit_jump *end = NULL;
3314  struct sljit_jump *nl = NULL;  struct sljit_jump *end2 = NULL;
3315  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3316  struct sljit_jump *singlechar;  struct sljit_jump *singlechar;
3317  #endif  #endif
# Line 3273  jump_list *newline = NULL; Line 3319  jump_list *newline = NULL;
3319  BOOL newlinecheck = FALSE;  BOOL newlinecheck = FALSE;
3320  BOOL readuchar = FALSE;  BOOL readuchar = FALSE;
3321    
3322  if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||  if (!(hascrorlf || (common->first_line_end != 0)) &&
3323      common->nltype == NLTYPE_ANYCRLF || common->newline > 255))      (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3324    newlinecheck = TRUE;    newlinecheck = TRUE;
3325    
3326  if (firstline)  if (common->first_line_end != 0)
3327    {    {
3328    /* Search for the end of the first line. */    /* Search for the end of the first line. */
   SLJIT_ASSERT(common->first_line_end != 0);  
3329    OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);    OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3330    
3331    if (common->nltype == NLTYPE_FIXED && common->newline > 255)    if (common->nltype == NLTYPE_FIXED && common->newline > 255)
# Line 3326  if (newlinecheck) Line 3371  if (newlinecheck)
3371    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3372  #endif  #endif
3373    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3374    nl = JUMP(SLJIT_JUMP);    end2 = JUMP(SLJIT_JUMP);
3375    }    }
3376    
3377  mainloop = LABEL();  mainloop = LABEL();
# Line 3371  JUMPHERE(start); Line 3416  JUMPHERE(start);
3416  if (newlinecheck)  if (newlinecheck)
3417    {    {
3418    JUMPHERE(end);    JUMPHERE(end);
3419    JUMPHERE(nl);    JUMPHERE(end2);
3420    }    }
3421    
3422  return mainloop;  return mainloop;
3423  }  }
3424    
3425  #define MAX_N_CHARS 16  #define MAX_N_CHARS 16
3426  #define MAX_N_BYTES 8  #define MAX_DIFF_CHARS 6
3427    
3428  static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)  static SLJIT_INLINE void add_prefix_char(pcre_uchar chr, pcre_uchar *chars)
3429  {  {
3430  pcre_uint8 len = bytes[0];  pcre_uchar i, len;
 int i;  
3431    
3432    len = chars[0];
3433  if (len == 255)  if (len == 255)
3434    return;    return;
3435    
3436  if (len == 0)  if (len == 0)
3437    {    {
3438    bytes[0] = 1;    chars[0] = 1;
3439    bytes[1] = byte;    chars[1] = chr;
3440    return;    return;
3441    }    }
3442    
3443  for (i = len; i > 0; i--)  for (i = len; i > 0; i--)
3444    if (bytes[i] == byte)    if (chars[i] == chr)
3445      return;      return;
3446    
3447  if (len >= MAX_N_BYTES - 1)  if (len >= MAX_DIFF_CHARS - 1)
3448    {    {
3449    bytes[0] = 255;    chars[0] = 255;
3450    return;    return;
3451    }    }
3452    
3453  len++;  len++;
3454  bytes[len] = byte;  chars[len] = chr;
3455  bytes[0] = len;  chars[0] = len;
3456  }  }
3457    
3458  static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars, pcre_uint32 *rec_count)  static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uchar *chars, int max_chars, pcre_uint32 *rec_count)
3459  {  {
3460  /* Recursive function, which scans prefix literals. */  /* Recursive function, which scans prefix literals. */
3461  BOOL last, any, caseless;  BOOL last, any, class, caseless;
3462  int len, repeat, len_save, consumed = 0;  int len, repeat, len_save, consumed = 0;
3463  pcre_uint32 chr, mask;  sljit_ui chr;
3464    sljit_ub *bytes, *bytes_end, byte;
3465  pcre_uchar *alternative, *cc_save, *oc;  pcre_uchar *alternative, *cc_save, *oc;
3466  #if defined SUPPORT_UTF && defined COMPILE_PCRE8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3467  pcre_uchar othercase[8];  pcre_uchar othercase[8];
# Line 3434  while (TRUE) Line 3480  while (TRUE)
3480    
3481    last = TRUE;    last = TRUE;
3482    any = FALSE;    any = FALSE;
3483      class = FALSE;
3484    caseless = FALSE;    caseless = FALSE;
3485    
3486    switch (*cc)    switch (*cc)
# Line 3497  while (TRUE) Line 3544  while (TRUE)
3544  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3545      if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);      if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3546  #endif  #endif
3547      max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars, rec_count);      max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
3548      if (max_chars == 0)      if (max_chars == 0)
3549        return consumed;        return consumed;
3550      last = FALSE;      last = FALSE;
# Line 3520  while (TRUE) Line 3567  while (TRUE)
3567      alternative = cc + GET(cc, 1);      alternative = cc + GET(cc, 1);
3568      while (*alternative == OP_ALT)      while (*alternative == OP_ALT)
3569        {        {
3570        max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars, rec_count);        max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
3571        if (max_chars == 0)        if (max_chars == 0)
3572          return consumed;          return consumed;
3573        alternative += GET(alternative, 1);        alternative += GET(alternative, 1);
# Line 3533  while (TRUE) Line 3580  while (TRUE)
3580    
3581      case OP_CLASS:      case OP_CLASS:
3582  #if defined SUPPORT_UTF && defined COMPILE_PCRE8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3583      if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;      if (common->utf && !is_char7_bitset((const sljit_ub *)(cc + 1), FALSE))
3584          return consumed;
3585  #endif  #endif
3586      any = TRUE;      class = TRUE;
     cc += 1 + 32 / sizeof(pcre_uchar);  
3587      break;      break;
3588    
3589      case OP_NCLASS:      case OP_NCLASS:
3590  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3591      if (common->utf) return consumed;      if (common->utf) return consumed;
3592  #endif  #endif
3593      any = TRUE;      class = TRUE;
     cc += 1 + 32 / sizeof(pcre_uchar);  
3594      break;      break;
3595    
3596  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
# Line 3559  while (TRUE) Line 3605  while (TRUE)
3605    
3606      case OP_DIGIT:      case OP_DIGIT:
3607  #if defined SUPPORT_UTF && defined COMPILE_PCRE8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3608      if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))      if (common->utf && !is_char7_bitset((const sljit_ub *)common->ctypes - cbit_length + cbit_digit, FALSE))
3609        return consumed;        return consumed;
3610  #endif  #endif
3611      any = TRUE;      any = TRUE;
# Line 3568  while (TRUE) Line 3614  while (TRUE)
3614    
3615      case OP_WHITESPACE:      case OP_WHITESPACE:
3616  #if defined SUPPORT_UTF && defined COMPILE_PCRE8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3617      if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))      if (common->utf && !is_char7_bitset((const sljit_ub *)common->ctypes - cbit_length + cbit_space, FALSE))
3618        return consumed;        return consumed;
3619  #endif  #endif
3620      any = TRUE;      any = TRUE;
# Line 3577  while (TRUE) Line 3623  while (TRUE)
3623    
3624      case OP_WORDCHAR:      case OP_WORDCHAR:
3625  #if defined SUPPORT_UTF && defined COMPILE_PCRE8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3626      if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))      if (common->utf && !is_char7_bitset((const sljit_ub *)common->ctypes - cbit_length + cbit_word, FALSE))
3627        return consumed;        return consumed;
3628  #endif  #endif
3629      any = TRUE;      any = TRUE;
# Line 3600  while (TRUE) Line 3646  while (TRUE)
3646      cc++;      cc++;
3647      break;      break;
3648    
3649  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UTF
3650      case OP_NOTPROP:      case OP_NOTPROP:
3651      case OP_PROP:      case OP_PROP:
3652  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #ifndef COMPILE_PCRE32
3653      if (common->utf) return consumed;      if (common->utf) return consumed;
3654  #endif  #endif
3655      any = TRUE;      any = TRUE;
# Line 3632  while (TRUE) Line 3678  while (TRUE)
3678    
3679    if (any)    if (any)
3680      {      {
3681  #if defined COMPILE_PCRE8      do
3682      mask = 0xff;        {
3683  #elif defined COMPILE_PCRE16        chars[0] = 255;
3684      mask = 0xffff;  
3685  #elif defined COMPILE_PCRE32        consumed++;
3686      mask = 0xffffffff;        if (--max_chars == 0)
3687  #else          return consumed;
3688      SLJIT_ASSERT_STOP();        chars += MAX_DIFF_CHARS;
3689  #endif        }
3690        while (--repeat > 0);
3691    
3692        repeat = 1;
3693        continue;
3694        }
3695    
3696      if (class)
3697        {
3698        bytes = (sljit_ub*) (cc + 1);
3699        cc += 1 + 32 / sizeof(pcre_uchar);
3700    
3701        switch (*cc)
3702          {
3703          case OP_CRSTAR:
3704          case OP_CRMINSTAR:
3705          case OP_CRPOSSTAR:
3706          case OP_CRQUERY:
3707          case OP_CRMINQUERY:
3708          case OP_CRPOSQUERY:
3709          max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
3710          if (max_chars == 0)
3711            return consumed;
3712          break;
3713    
3714          default:
3715          case OP_CRPLUS:
3716          case OP_CRMINPLUS:
3717          case OP_CRPOSPLUS:
3718          break;
3719    
3720          case OP_CRRANGE:
3721          case OP_CRMINRANGE:
3722          case OP_CRPOSRANGE:
3723          repeat = GET2(cc, 1);
3724          if (repeat <= 0)
3725            return consumed;
3726          break;
3727          }
3728    
3729      do      do
3730        {        {
3731        chars[0] = mask;        if (bytes[31] & 0x80)
3732        chars[1] = mask;          chars[0] = 255;
3733        bytes[0] = 255;        else if (chars[0] != 255)
3734            {
3735            bytes_end = bytes + 32;
3736            chr = 0;
3737            do
3738              {
3739              byte = *bytes++;
3740              SLJIT_ASSERT((chr & 0x7) == 0);
3741              if (byte == 0)
3742                chr += 8;
3743              else
3744                {
3745                do
3746                  {
3747                  if ((byte & 0x1) != 0)
3748                    add_prefix_char(chr, chars);
3749                  byte >>= 1;
3750                  chr++;
3751                  }
3752                while (byte != 0);
3753                chr = (chr + 7) & ~7;
3754                }
3755              }
3756            while (chars[0] != 255 && bytes < bytes_end);
3757            bytes = bytes_end - 32;
3758            }
3759    
3760        consumed++;        consumed++;
3761        if (--max_chars == 0)        if (--max_chars == 0)
3762          return consumed;          return consumed;
3763        chars += 2;        chars += MAX_DIFF_CHARS;
       bytes += MAX_N_BYTES;  
3764        }        }
3765      while (--repeat > 0);      while (--repeat > 0);
3766    
3767        switch (*cc)
3768          {
3769          case OP_CRSTAR:
3770          case OP_CRMINSTAR:
3771          case OP_CRPOSSTAR:
3772          return consumed;
3773    
3774          case OP_CRQUERY:
3775          case OP_CRMINQUERY:
3776          case OP_CRPOSQUERY:
3777          cc++;
3778          break;
3779    
3780          case OP_CRRANGE:
3781          case OP_CRMINRANGE:
3782          case OP_CRPOSRANGE:
3783          if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
3784            return consumed;
3785          cc += 1 + 2 * IMM2_SIZE;
3786          break;
3787          }
3788    
3789      repeat = 1;      repeat = 1;
3790      continue;      continue;
3791      }      }
# Line 3682  while (TRUE) Line 3812  while (TRUE)
3812        }        }
3813      }      }
3814    else    else
3815        {
3816      caseless = FALSE;      caseless = FALSE;
3817        othercase[0] = 0; /* Stops compiler warning - PH */
3818        }
3819    
3820    len_save = len;    len_save = len;
3821    cc_save = cc;    cc_save = cc;
# Line 3692  while (TRUE) Line 3825  while (TRUE)
3825      do      do
3826        {        {
3827        chr = *cc;        chr = *cc;
3828  #ifdef COMPILE_PCRE32        add_prefix_char(*cc, chars);
       if (SLJIT_UNLIKELY(chr == NOTACHAR))  
         return consumed;  
 #endif  
       add_prefix_byte((pcre_uint8)chr, bytes);  
3829    
       mask = 0;  
3830        if (caseless)        if (caseless)
3831          {          add_prefix_char(*oc, chars);
         add_prefix_byte((pcre_uint8)*oc, bytes);  
         mask = *cc ^ *oc;  
         chr |= mask;  
         }  
   
 #ifdef COMPILE_PCRE32  
       if (chars[0] == NOTACHAR && chars[1] == 0)  
 #else  
       if (chars[0] == NOTACHAR)  
 #endif  
         {  
         chars[0] = chr;  
         chars[1] = mask;  
         }  
       else  
         {  
         mask |= chars[0] ^ chr;  
         chr |= mask;  
         chars[0] = chr;  
         chars[1] |= mask;  
         }  
3832    
3833        len--;        len--;
3834        consumed++;        consumed++;
3835        if (--max_chars == 0)        if (--max_chars == 0)
3836          return consumed;          return consumed;
3837        chars += 2;        chars += MAX_DIFF_CHARS;
       bytes += MAX_N_BYTES;  
3838        cc++;        cc++;
3839        oc++;        oc++;
3840        }        }
# Line 4196  if (offset > 0) Line 4302  if (offset > 0)
4302    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4303  }  }
4304    
4305  static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)  static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
4306  {  {
4307  DEFINE_COMPILER;  DEFINE_COMPILER;
4308  struct sljit_label *start;  struct sljit_label *start;
4309  struct sljit_jump *quit;  struct sljit_jump *quit;
4310  pcre_uint32 chars[MAX_N_CHARS * 2];  struct sljit_jump *match;
4311  pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];  /* bytes[0] represent the number of characters between 0
4312  pcre_uint8 ones[MAX_N_CHARS];  and MAX_N_BYTES - 1, 255 represents any character. */
4313  int offsets[3];  pcre_uchar chars[MAX_N_CHARS * MAX_DIFF_CHARS];
4314  pcre_uint32 mask;  sljit_si offset;
4315  pcre_uint8 *byte_set, *byte_set_end;  pcre_uchar mask;
4316    pcre_uchar *char_set, *char_set_end;
4317  int i, max, from;  int i, max, from;
4318  int range_right = -1, range_len = 3 - 1;  int range_right = -1, range_len;
4319  sljit_ub *update_table = NULL;  sljit_ub *update_table = NULL;
4320  BOOL in_range;  BOOL in_range;
4321  pcre_uint32 rec_count;  uint32_t rec_count;
4322    
4323  for (i = 0; i < MAX_N_CHARS; i++)  for (i = 0; i < MAX_N_CHARS; i++)
4324    {    chars[i * MAX_DIFF_CHARS] = 0;
   chars[i << 1] = NOTACHAR;  
   chars[(i << 1) + 1] = 0;  
   bytes[i * MAX_N_BYTES] = 0;  
   }  
4325    
4326  rec_count = 10000;  rec_count = 10000;
4327  max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS, &rec_count);  max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
4328    
4329  if (max <= 1)  if (max < 1)
4330    return FALSE;    return FALSE;
4331    
 for (i = 0; i < max; i++)  
   {  
   mask = chars[(i << 1) + 1];  
   ones[i] = ones_in_half_byte[mask & 0xf];  
   mask >>= 4;  
   while (mask != 0)  
     {  
     ones[i] += ones_in_half_byte[mask & 0xf];  
     mask >>= 4;  
     }  
   }  
   
4332  in_range = FALSE;  in_range = FALSE;
4333  from = 0;   /* Prevent compiler "uninitialized" warning */  /* Prevent compiler "uninitialized" warning */
4334    from = 0;
4335    range_len = 4 /* minimum length */ - 1;
4336  for (i = 0; i <= max; i++)  for (i = 0; i <= max; i++)
4337    {    {
4338    if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))    if (in_range && (i - from) > range_len && (chars[(i - 1) * MAX_DIFF_CHARS] < 255))
4339      {      {
4340      range_len = i - from;      range_len = i - from;
4341      range_right = i - 1;      range_right = i - 1;
4342      }      }
4343    
4344    if (i < max && bytes[i * MAX_N_BYTES] < 255)    if (i < max && chars[i * MAX_DIFF_CHARS] < 255)
4345      {      {
4346        SLJIT_ASSERT(chars[i * MAX_DIFF_CHARS] > 0);
4347      if (!in_range)      if (!in_range)
4348        {        {
4349        in_range = TRUE;        in_range = TRUE;
4350        from = i;        from = i;
4351        }        }
4352      }      }
4353    else if (in_range)    else
4354      in_range = FALSE;      in_range = FALSE;
4355    }    }
4356    
# Line 4269  if (range_right >= 0) Line 4363  if (range_right >= 0)
4363    
4364    for (i = 0; i < range_len; i++)    for (i = 0; i < range_len; i++)
4365      {      {
4366      byte_set = bytes + ((range_right - i) * MAX_N_BYTES);      char_set = chars + ((range_right - i) * MAX_DIFF_CHARS);
4367      SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);      SLJIT_ASSERT(char_set[0] > 0 && char_set[0] < 255);
4368      byte_set_end = byte_set + byte_set[0];      char_set_end = char_set + char_set[0];
4369      byte_set++;      char_set++;
4370      while (byte_set <= byte_set_end)      while (char_set <= char_set_end)
4371        {        {
4372        if (update_table[*byte_set] > IN_UCHARS(i))        if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
4373          update_table[*byte_set] = IN_UCHARS(i);          update_table[(*char_set) & 0xff] = IN_UCHARS(i);
4374        byte_set++;        char_set++;
4375        }        }
4376      }      }
4377    }    }
4378    
4379  offsets[0] = -1;  offset = -1;
4380  /* Scan forward. */  /* Scan forward. */
4381  for (i = 0; i < max; i++)  for (i = 0; i < max; i++)
   if (ones[i] <= 2) {  
     offsets[0] = i;  
     break;  
   }  
   
 if (offsets[0] < 0 && range_right < 0)  
   return FALSE;  
   
 if (offsets[0] >= 0)  
4382    {    {
4383    /* Scan backward. */    if (offset == -1)
   offsets[1] = -1;  
   for (i = max - 1; i > offsets[0]; i--)  
     if (ones[i] <= 2 && i != range_right)  
       {  
       offsets[1] = i;  
       break;  
       }  
   
   /* This case is handled better by fast_forward_first_char. */  
   if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)  
     return FALSE;  
   
   offsets[2] = -1;  
   /* We only search for a middle character if there is no range check. */  
   if (offsets[1] >= 0 && range_right == -1)  
4384      {      {
4385      /* Scan from middle. */      if (chars[i * MAX_DIFF_CHARS] <= 2)
4386      for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)        offset = i;
4387        if (ones[i] <= 2)      }
4388      else if (chars[offset * MAX_DIFF_CHARS] == 2 && chars[i * MAX_DIFF_CHARS] <= 2)
4389        {
4390        if (chars[i * MAX_DIFF_CHARS] == 1)
4391          offset = i;
4392        else
4393          {
4394          mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4395          if (!is_powerof2(mask))
4396          {          {
4397          offsets[2] = i;          mask = chars[i * MAX_DIFF_CHARS + 1] ^ chars[i * MAX_DIFF_CHARS + 2];
4398          break;          if (is_powerof2(mask))
4399              offset = i;
4400          }          }
   
     if (offsets[2] == -1)  
       {  
       for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)  
         if (ones[i] <= 2)  
           {  
           offsets[2] = i;  
           break;  
           }  
4401        }        }
4402      }      }
4403      }
4404    
4405    SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));  if (range_right < 0)
4406    SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));    {
4407      if (offset < 0)
4408    chars[0] = chars[offsets[0] << 1];      return FALSE;
4409    chars[1] = chars[(offsets[0] << 1) + 1];    SLJIT_ASSERT(chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2);
4410    if (offsets[2] >= 0)    /* Works regardless the value is 1 or 2. */
4411      {    mask = chars[offset * MAX_DIFF_CHARS + chars[offset * MAX_DIFF_CHARS]];
4412      chars[2] = chars[offsets[2] << 1];    fast_forward_first_char2(common, chars[offset * MAX_DIFF_CHARS + 1], mask, offset);
4413      chars[3] = chars[(offsets[2] << 1) + 1];    return TRUE;
     }  
   if (offsets[1] >= 0)  
     {  
     chars[4] = chars[offsets[1] << 1];  
     chars[5] = chars[(offsets[1] << 1) + 1];  
     }  
4414    }    }
4415    
4416    if (range_right == offset)
4417      offset = -1;
4418    
4419    SLJIT_ASSERT(offset == -1 || (chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2));
4420    
4421  max -= 1;  max -= 1;
4422  if (firstline)  SLJIT_ASSERT(max > 0);
4423    if (common->first_line_end != 0)
4424    {    {
   SLJIT_ASSERT(common->first_line_end != 0);  
4425    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
4426    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4427    OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));    OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
# Line 4362  if (firstline) Line 4432  if (firstline)
4432  else  else
4433    OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));    OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4434    
4435    SLJIT_ASSERT(range_right >= 0);
4436    
4437  #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)  #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4438  if (range_right >= 0)  OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
   OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);  
4439  #endif  #endif
4440    
4441  start = LABEL();  start = LABEL();
4442  quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4443    
 SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);  
   
 if (range_right >= 0)  
   {  
4444  #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)  #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
4445    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
4446  #else  #else
4447    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
4448  #endif  #endif
4449    
4450  #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)  #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4451    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
4452  #else  #else
4453    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
4454  #endif  #endif
4455    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4456    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
   }  
4457    
4458  if (offsets[0] >= 0)  if (offset >= 0)
4459    {    {
4460    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
   if (offsets[1] >= 0)  
     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));  
4461    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4462    
4463    if (chars[1] != 0)    if (chars[offset * MAX_DIFF_CHARS] == 1)
4464      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1], start);
4465    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);    else
   if (offsets[2] >= 0)  
     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));  
   
   if (offsets[1] >= 0)  
4466      {      {
4467      if (chars[5] != 0)      mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4468        OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);      if (is_powerof2(mask))
4469      CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);        {
4470          OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4471          CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1] | mask, start);
4472          }
4473        else
4474          {
4475          match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1]);
4476          CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 2], start);
4477          JUMPHERE(match);
4478          }
4479      }      }
4480      }
4481    
4482    if (offsets[2] >= 0)  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4483    if (common->utf && offset != 0)
4484      {
4485      if (offset < 0)
4486      {      {
4487      if (chars[3] != 0)      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4488        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);  
4489      }      }
4490    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));    else
4491        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4492    #if defined COMPILE_PCRE8
4493      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4494      CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, start);
4495    #elif defined COMPILE_PCRE16
4496      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4497      CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, start);
4498    #else
4499    #error "Unknown code width"
4500    #endif
4501      if (offset < 0)
4502        OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4503    }    }
4504    #endif
4505    
4506    if (offset >= 0)
4507      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4508    
4509  JUMPHERE(quit);  JUMPHERE(quit);
4510    
4511  if (firstline)  if (common->first_line_end != 0)
4512    {    {
4513    if (range_right >= 0)    if (range_right >= 0)
4514      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
# Line 4438  return TRUE; Line 4526  return TRUE;
4526  }  }
4527    
4528  #undef MAX_N_CHARS  #undef MAX_N_CHARS
4529  #undef MAX_N_BYTES  #undef MAX_DIFF_CHARS
4530    
4531  static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless)  static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless)
4532  {  {
# Line 10950  if (common->control_head_ptr != 0) Line 11038  if (common->control_head_ptr != 0)
11038  /* Main part of the matching */  /* Main part of the matching */
11039  if ((re->options & PCRE_ANCHORED) == 0)  if ((re->options & PCRE_ANCHORED) == 0)
11040    {    {
11041    mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0);    mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0);
11042    continue_match_label = LABEL();    continue_match_label = LABEL();
11043    /* Forward search if possible. */    /* Forward search if possible. */
11044    if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)    if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
11045      {      {
11046      if (mode == JIT_COMPILE && fast_forward_first_n_chars(common, (re->options & PCRE_FIRSTLINE) != 0))      if (mode == JIT_COMPILE && fast_forward_first_n_chars(common))
11047        ;        ;
11048      else if ((re->flags & PCRE_FIRSTSET) != 0)      else if ((re->flags & PCRE_FIRSTSET) != 0)
11049        fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0);        fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0);
# Line 11068  if ((re->options & PCRE_ANCHORED) == 0 & Line 11156  if ((re->options & PCRE_ANCHORED) == 0 &
11156    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
11157    }    }
11158    
11159  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
11160        (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1)) : common->start_ptr);
11161    
11162  if ((re->options & PCRE_ANCHORED) == 0)  if ((re->options & PCRE_ANCHORED) == 0)
11163    {    {
# Line 11079  if ((re->options & PCRE_ANCHORED) == 0) Line 11168  if ((re->options & PCRE_ANCHORED) == 0)
11168      /* There cannot be more newlines here. */      /* There cannot be more newlines here. */
11169      }      }
11170    else    else
11171      {      CMPTO(SLJIT_LESS, STR_PTR, 0, ((re->options & PCRE_FIRSTLINE) == 0) ? STR_END : TMP1, 0, mainloop_label);
     if ((re->options & PCRE_FIRSTLINE) == 0)  
       CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop_label);  
     else  
       CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, mainloop_label);  
     }  
11172    }    }
11173    
11174  /* No more remaining characters. */  /* No more remaining characters. */

Legend:
Removed from v.1632  
changed lines
  Added in v.1633

  ViewVC Help
Powered by ViewVC 1.1.5