/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1419 by zherczeg, Sun Dec 29 04:42:14 2013 UTC revision 1420 by zherczeg, Sun Dec 29 11:43:10 2013 UTC
# Line 369  typedef struct compiler_common { Line 369  typedef struct compiler_common {
369    int endonly;    int endonly;
370    /* Tables. */    /* Tables. */
371    sljit_sw ctypes;    sljit_sw ctypes;
   int digits[2 + MAX_RANGE_SIZE];  
372    /* Named capturing brackets. */    /* Named capturing brackets. */
373    pcre_uchar *name_table;    pcre_uchar *name_table;
374    sljit_sw name_count;    sljit_sw name_count;
# Line 408  typedef struct compiler_common { Line 407  typedef struct compiler_common {
407    jump_list *utfreadchar;    jump_list *utfreadchar;
408  #endif  #endif
409  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
410      jump_list *utfreadchar8;
411    jump_list *utfreadtype8;    jump_list *utfreadtype8;
412  #endif  #endif
413  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
# Line 2512  if (common->utf) Line 2512  if (common->utf)
2512  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2513  }  }
2514    
2515    static void read_char8(compiler_common *common)
2516    {
2517    /* Reads the precise value of a character into TMP1, if the character is
2518    less than 256. Otherwise it returns with a value greater or equal than 256. */
2519    DEFINE_COMPILER;
2520    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2521    struct sljit_jump *jump;
2522    #endif
2523    
2524    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2525    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2526    
2527    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2528    if (common->utf)
2529      {
2530    #if defined COMPILE_PCRE8
2531      jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2532      add_jump(compiler, &common->utfreadchar8, JUMP(SLJIT_FAST_CALL));
2533      JUMPHERE(jump);
2534    #elif defined COMPILE_PCRE16
2535      /* Skip low surrogate if necessary. */
2536      OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2537      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2538      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2539      JUMPHERE(jump);
2540    #endif /* COMPILE_PCRE[8|16] */
2541      }
2542    #endif
2543    }
2544    
2545  static void read_char8_type(compiler_common *common)  static void read_char8_type(compiler_common *common)
2546  {  {
2547  /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */  /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
# Line 2538  if (common->utf) Line 2568  if (common->utf)
2568    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2569    JUMPHERE(jump);    JUMPHERE(jump);
2570    /* Skip low surrogate if necessary. */    /* Skip low surrogate if necessary. */
2571    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);    OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2572    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);    jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2573    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2574    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);    JUMPHERE(jump);
   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);  
2575  #elif defined COMPILE_PCRE32  #elif defined COMPILE_PCRE32
2576    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2577    jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);    jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
# Line 2686  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UC Line 2715  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UC
2715  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2716  }  }
2717    
2718    static void do_utfreadchar8(compiler_common *common)
2719    {
2720    /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2721    of the character (>= 0xc0). Return value in TMP1. */
2722    DEFINE_COMPILER;
2723    struct sljit_jump *jump;
2724    
2725    sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2726    
2727    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2728    jump = JUMP(SLJIT_C_NOT_ZERO);
2729    /* Two byte sequence. */
2730    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2731    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2732    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2733    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2734    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2735    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2736    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2737    
2738    JUMPHERE(jump);
2739    OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2740    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x800);
2741    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2742    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2743    }
2744    
2745  static void do_utfreadtype8(compiler_common *common)  static void do_utfreadtype8(compiler_common *common)
2746  {  {
2747  /* Fast decoding a UTF-8 character type. TMP2 contains the first byte  /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
# Line 2702  jump = JUMP(SLJIT_C_NOT_ZERO); Line 2758  jump = JUMP(SLJIT_C_NOT_ZERO);
2758  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2759  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2760  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2761    /* The upper 5 bits are known at this point. */
2762    compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
2763  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2764  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2765  OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);  OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  
2766  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2767  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2768    
2769  JUMPHERE(compare);  JUMPHERE(compare);
2770  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2771  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
 JUMPHERE(jump);  
2772    
2773  /* We only have types for characters less than 256. */  /* We only have types for characters less than 256. */
2774  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);  JUMPHERE(jump);
2775  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2776  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2777    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2778  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2779  }  }
2780    
# Line 3717  sljit_emit_fast_return(compiler, SLJIT_M Line 3774  sljit_emit_fast_return(compiler, SLJIT_M
3774  static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)  static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3775  {  {
3776  DEFINE_COMPILER;  DEFINE_COMPILER;
3777    int offset;
3778    
3779  if (ranges[0] < 0 || ranges[0] > 4)  if (ranges[0] < 0 || ranges[0] > 4)
3780    return FALSE;    return FALSE;
# Line 3726  if (ranges[0] == 0 && ranges[1] == 0) Line 3784  if (ranges[0] == 0 && ranges[1] == 0)
3784    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3785    
3786  if (readch)  if (readch)
3787    read_char(common);    read_char8(common);
3788    
3789  switch(ranges[0])  switch(ranges[0])
3790    {    {
# Line 3790  switch(ranges[0]) Line 3848  switch(ranges[0])
3848    
3849    if (ranges[1] != 0)    if (ranges[1] != 0)
3850      {      {
3851        offset = 0;
3852      if (ranges[2] + 1 != ranges[3])      if (ranges[2] + 1 != ranges[3])
3853        {        {
3854        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3855        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3856        ranges[4] -= ranges[2];        offset = ranges[2];
       ranges[5] -= ranges[2];  
3857        }        }
3858      else      else
3859        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3860    
3861      if (ranges[4] + 1 != ranges[5])      if (ranges[4] + 1 != ranges[5])
3862        {        {
3863        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - offset);
3864        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3865        }        }
3866      else      else
3867        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4] - offset));
3868      return TRUE;      return TRUE;
3869      }      }
3870    
# Line 3827  switch(ranges[0]) Line 3885  switch(ranges[0])
3885    }    }
3886  }  }
3887    
 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)  
 {  
 int i, bit, length;  
 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;  
   
 bit = ctypes[0] & flag;  
 ranges[0] = -1;  
 ranges[1] = bit != 0 ? 1 : 0;  
 length = 0;  
   
 for (i = 1; i < 256; i++)  
   if ((ctypes[i] & flag) != bit)  
     {  
     if (length >= MAX_RANGE_SIZE)  
       return;  
     ranges[2 + length] = i;  
     length++;  
     bit ^= flag;  
     }  
   
 if (bit != 0)  
   {  
   if (length >= MAX_RANGE_SIZE)  
     return;  
   ranges[2 + length] = 256;  
   length++;  
   }  
 ranges[0] = length;  
 }  
   
3888  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
3889  {  {
3890  int ranges[2 + MAX_RANGE_SIZE];  int ranges[2 + MAX_RANGE_SIZE];
# Line 4825  switch(type) Line 4853  switch(type)
4853    case OP_NOT_DIGIT:    case OP_NOT_DIGIT:
4854    case OP_DIGIT:    case OP_DIGIT:
4855    /* Digits are usually 0-9, so it is worth to optimize them. */    /* Digits are usually 0-9, so it is worth to optimize them. */
   if (common->digits[0] == -2)  
     get_ctype_ranges(common, ctype_digit, common->digits);  
4856    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
4857    /* Flip the starting bit in the negative case. */    /* Flip the starting bit in the negative case. */
4858    if (type == OP_NOT_DIGIT)    read_char8_type(common);
4859      common->digits[1] ^= 1;    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4860    if (!check_ranges(common, common->digits, backtracks, TRUE))    add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
     {  
     read_char8_type(common);  
     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);  
     add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));  
     }  
   if (type == OP_NOT_DIGIT)  
     common->digits[1] ^= 1;  
4861    return cc;    return cc;
4862    
4863    case OP_NOT_WHITESPACE:    case OP_NOT_WHITESPACE:
# Line 5273  switch(type) Line 5292  switch(type)
5292    case OP_CLASS:    case OP_CLASS:
5293    case OP_NCLASS:    case OP_NCLASS:
5294    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5295    read_char(common);    read_char8(common);
5296    if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))    if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5297      return cc + 32 / sizeof(pcre_uchar);      return cc + 32 / sizeof(pcre_uchar);
5298    
# Line 9335  else Line 9354  else
9354    }    }
9355  common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
9356  common->ctypes = (sljit_sw)(tables + ctypes_offset);  common->ctypes = (sljit_sw)(tables + ctypes_offset);
 common->digits[0] = -2;  
9357  common->name_table = ((pcre_uchar *)re) + re->name_table_offset;  common->name_table = ((pcre_uchar *)re) + re->name_table_offset;
9358  common->name_count = re->name_count;  common->name_count = re->name_count;
9359  common->name_entry_size = re->name_entry_size;  common->name_entry_size = re->name_entry_size;
# Line 9755  if (common->utfreadchar != NULL) Line 9773  if (common->utfreadchar != NULL)
9773    }    }
9774  #endif /* !COMPILE_PCRE32 */  #endif /* !COMPILE_PCRE32 */
9775  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
9776    if (common->utfreadchar8 != NULL)
9777      {
9778      set_jumps(common->utfreadchar8, LABEL());
9779      do_utfreadchar8(common);
9780      }
9781  if (common->utfreadtype8 != NULL)  if (common->utfreadtype8 != NULL)
9782    {    {
9783    set_jumps(common->utfreadtype8, LABEL());    set_jumps(common->utfreadtype8, LABEL());

Legend:
Removed from v.1419  
changed lines
  Added in v.1420

  ViewVC Help
Powered by ViewVC 1.1.5