/[pcre]/code/tags/pcre-8.37/pcre_jit_compile.c
ViewVC logotype

Diff of /code/tags/pcre-8.37/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1419 by zherczeg, Sun Dec 29 04:42:14 2013 UTC revision 1424 by zherczeg, Tue Dec 31 11:22:31 2013 UTC
# Line 363  typedef struct compiler_common { Line 363  typedef struct compiler_common {
363    BOOL positive_assert;    BOOL positive_assert;
364    /* Newline control. */    /* Newline control. */
365    int nltype;    int nltype;
366      pcre_uint32 nlmax;
367    int newline;    int newline;
368    int bsr_nltype;    int bsr_nltype;
369      pcre_uint32 bsr_nlmax;
370    /* Dollar endonly. */    /* Dollar endonly. */
371    int endonly;    int endonly;
372    /* Tables. */    /* Tables. */
373    sljit_sw ctypes;    sljit_sw ctypes;
   int digits[2 + MAX_RANGE_SIZE];  
374    /* Named capturing brackets. */    /* Named capturing brackets. */
375    pcre_uchar *name_table;    pcre_uchar *name_table;
376    sljit_sw name_count;    sljit_sw name_count;
# Line 404  typedef struct compiler_common { Line 405  typedef struct compiler_common {
405  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
406    BOOL use_ucp;    BOOL use_ucp;
407  #endif  #endif
 #ifndef COMPILE_PCRE32  
   jump_list *utfreadchar;  
 #endif  
408  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
409      jump_list *utfreadchar;
410      jump_list *utfreadchar16;
411    jump_list *utfreadtype8;    jump_list *utfreadtype8;
412  #endif  #endif
413  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
# Line 524  the start pointers when the end of the c Line 524  the start pointers when the end of the c
524  #define GET_LOCAL_BASE(dst, dstw, offset) \  #define GET_LOCAL_BASE(dst, dstw, offset) \
525    sljit_get_local_base(compiler, (dst), (dstw), (offset))    sljit_get_local_base(compiler, (dst), (dstw), (offset))
526    
527    #define READ_CHAR_ANY 0x7fffffff
528    
529  static pcre_uchar* bracketend(pcre_uchar* cc)  static pcre_uchar* bracketend(pcre_uchar* cc)
530  {  {
531  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
# Line 2462  else Line 2464  else
2464  JUMPHERE(jump);  JUMPHERE(jump);
2465  }  }
2466    
2467  static void read_char(compiler_common *common)  static void peek_char(compiler_common *common)
2468  {  {
2469  /* Reads the character into TMP1, updates STR_PTR.  /* Reads the character into TMP1, keeps STR_PTR.
2470  Does not check STR_END. TMP2 Destroyed. */  Does not check STR_END. TMP2 Destroyed. */
2471  DEFINE_COMPILER;  DEFINE_COMPILER;
2472  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
# Line 2472  struct sljit_jump *jump; Line 2474  struct sljit_jump *jump;
2474  #endif  #endif
2475    
2476  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2477  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2478  if (common->utf)  if (common->utf)
2479    {    {
 #if defined COMPILE_PCRE8  
2480    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2481  #elif defined COMPILE_PCRE16    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
   jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);  
 #endif /* COMPILE_PCRE[8|16] */  
2482    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2483      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2484    JUMPHERE(jump);    JUMPHERE(jump);
2485    }    }
2486  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2487    
2488    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2489    if (common->utf)
2490      {
2491      OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2492      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2493      /* TMP2 contains the high surrogate. */
2494      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2495      OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2496      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2497      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2498      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2499      JUMPHERE(jump);
2500      }
2501    #endif
2502    }
2503    
2504    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2505    
2506    static BOOL is_char7_bitset(const pcre_uint8* bitset, BOOL nclass)
2507    {
2508    /* Tells whether the character codes below 128 are enough
2509    to determine a match. */
2510    const pcre_uint8 value = nclass ? 0xff : 0;
2511    const pcre_uint8* end = bitset + 32;
2512    
2513    bitset += 16;
2514    do
2515      {
2516      if (*bitset++ != value)
2517        return FALSE;
2518      }
2519    while (bitset < end);
2520    return TRUE;
2521    }
2522    
2523    static void read_char7_type(compiler_common *common, BOOL full_read)
2524    {
2525    /* Reads the precise character type of a character into TMP1, if the character
2526    is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2527    full_read argument tells whether characters above max are accepted or not. */
2528    DEFINE_COMPILER;
2529    struct sljit_jump *jump;
2530    
2531    SLJIT_ASSERT(common->utf);
2532    
2533    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2534  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2535    
2536    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2537    
2538    if (full_read)
2539      {
2540      jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2541      OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2542      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2543      JUMPHERE(jump);
2544      }
2545  }  }
2546    
2547  static void peek_char(compiler_common *common)  #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2548    
2549    static void read_char_max(compiler_common *common, pcre_uint32 max, BOOL full_read)
2550  {  {
2551  /* Reads the character into TMP1, keeps STR_PTR.  /* Reads the precise value of a character into TMP1, if the character is
2552  Does not check STR_END. TMP2 Destroyed. */  less than or equal to max. Otherwise it returns with a value greater than max.
2553    Does not check STR_END. The full_read argument tells whether characters above
2554    max are accepted or not. */
2555  DEFINE_COMPILER;  DEFINE_COMPILER;
2556  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2557  struct sljit_jump *jump;  struct sljit_jump *jump;
2558  #endif  #endif
2559    
2560    SLJIT_UNUSED_ARG(full_read);
2561    SLJIT_UNUSED_ARG(max);
2562    
2563  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2564  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2565    
2566    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2567  if (common->utf)  if (common->utf)
2568    {    {
2569  #if defined COMPILE_PCRE8    if (max < 128 && !full_read)
2570        return;
2571    
2572    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2573  #elif defined COMPILE_PCRE16    if (max >= 0x800)
2574    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);      add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2575  #endif /* COMPILE_PCRE[8|16] */    else if (max < 128)
2576    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));      {
2577    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);      OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2578        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2579        }
2580      else
2581        {
2582        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2583        if (!full_read)
2584          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2585        else
2586          OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2587        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2588        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2589        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2590        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2591        if (full_read)
2592          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2593        }
2594    JUMPHERE(jump);    JUMPHERE(jump);
2595    }    }
2596  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */  #endif
2597    
2598    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2599    if (common->utf)
2600      {
2601      if (max >= 0x10000)
2602        {
2603        OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2604        jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2605        /* TMP2 contains the high surrogate. */
2606        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2607        OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2608        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2609        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2610        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2611        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2612        JUMPHERE(jump);
2613        return;
2614        }
2615    
2616      if (max < 0xd800 && !full_read)
2617        return;
2618    
2619      /* Skip low surrogate if necessary. */
2620      OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2621      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2622      if (full_read)
2623        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2624      if (max >= 0xd800)
2625        OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2626      JUMPHERE(jump);
2627      }
2628    #endif
2629  }  }
2630    
2631  static void read_char8_type(compiler_common *common)  static SLJIT_INLINE void read_char(compiler_common *common)
2632  {  {
2633  /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */  read_char_max(common, READ_CHAR_ANY, TRUE);
2634    }
2635    
2636    static void read_char8_type(compiler_common *common, BOOL full_read)
2637    {
2638    /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END.
2639    The full_read argument tells whether characters above max are accepted or not. */
2640  DEFINE_COMPILER;  DEFINE_COMPILER;
2641  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2642  struct sljit_jump *jump;  struct sljit_jump *jump;
2643  #endif  #endif
2644    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2645    struct sljit_jump *jump2;
2646    #endif
2647    
2648  #ifdef SUPPORT_UTF  SLJIT_UNUSED_ARG(full_read);
2649    
2650    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2651    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2652    
2653    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2654  if (common->utf)  if (common->utf)
2655    {    {
   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);  
   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  
 #if defined COMPILE_PCRE8  
2656    /* This can be an extra read in some situations, but hopefully    /* This can be an extra read in some situations, but hopefully
2657    it is needed in most cases. */    it is needed in most cases. */
2658    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2659    jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2660    add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));    if (!full_read)
2661    JUMPHERE(jump);      {
2662  #elif defined COMPILE_PCRE16      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2663    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2664    jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);      OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2665    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2666    JUMPHERE(jump);      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2667    /* Skip low surrogate if necessary. */      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2668    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);      OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2669    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);      jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2670    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2671    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);      JUMPHERE(jump2);
2672    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);      }
2673  #elif defined COMPILE_PCRE32    else
2674    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);      add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
   jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  
   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  
2675    JUMPHERE(jump);    JUMPHERE(jump);
 #endif /* COMPILE_PCRE[8|16|32] */  
2676    return;    return;
2677    }    }
2678  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2679  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);  
2680  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  #if !defined COMPILE_PCRE8
 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32  
2681  /* The ctypes array contains only 256 values. */  /* The ctypes array contains only 256 values. */
2682  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2683  jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2684  #endif  #endif
2685  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2686  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if !defined COMPILE_PCRE8
2687  JUMPHERE(jump);  JUMPHERE(jump);
2688  #endif  #endif
2689    
2690    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2691    if (common->utf && full_read)
2692      {
2693      /* Skip low surrogate if necessary. */
2694      OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2695      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2696      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2697      JUMPHERE(jump);
2698      }
2699    #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2700  }  }
2701    
2702  static void skip_char_back(compiler_common *common)  static void skip_char_back(compiler_common *common)
# Line 2600  if (common->utf) Line 2734  if (common->utf)
2734  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2735  }  }
2736    
2737  static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)  static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2738  {  {
2739  /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */  /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2740  DEFINE_COMPILER;  DEFINE_COMPILER;
2741    struct sljit_jump *jump;
2742    
2743  if (nltype == NLTYPE_ANY)  if (nltype == NLTYPE_ANY)
2744    {    {
2745    add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2746    add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2747    }    }
2748  else if (nltype == NLTYPE_ANYCRLF)  else if (nltype == NLTYPE_ANYCRLF)
2749    {    {
2750    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);    if (jumpifmatch)
2751    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);      {
2752    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2753    OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2754    add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));      }
2755      else
2756        {
2757        jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2758        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2759        JUMPHERE(jump);
2760        }
2761    }    }
2762  else  else
2763    {    {
2764    SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);    SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2765    add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));    add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2766    }    }
2767  }  }
2768    
# Line 2631  else Line 2772  else
2772  static void do_utfreadchar(compiler_common *common)  static void do_utfreadchar(compiler_common *common)
2773  {  {
2774  /* Fast decoding a UTF-8 character. TMP1 contains the first byte  /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2775  of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */  of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2776  DEFINE_COMPILER;  DEFINE_COMPILER;
2777  struct sljit_jump *jump;  struct sljit_jump *jump;
2778    
2779  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2780    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2781    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2782    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2783    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2784    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2785    
2786  /* Searching for the first zero. */  /* Searching for the first zero. */
2787  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2788  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2789  /* Two byte sequence. */  /* Two byte sequence. */
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  
2790  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2791  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2792    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2793    
2794    JUMPHERE(jump);
2795    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2796    OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2797  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2798  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2799  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
 JUMPHERE(jump);  
2800    
2801  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2802  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2803  /* Three byte sequence. */  /* Three byte sequence. */
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  
 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);  
 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));  
2804  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2805  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));  
2806  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
 JUMPHERE(jump);  
2807    
2808  /* Four byte sequence. */  /* Four byte sequence. */
2809  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  JUMPHERE(jump);
2810  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2811  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);  OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2812    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2813    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2814  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);  
2815  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2816  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2817    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2818    }
2819    
2820    static void do_utfreadchar16(compiler_common *common)
2821    {
2822    /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2823    of the character (>= 0xc0). Return value in TMP1. */
2824    DEFINE_COMPILER;
2825    struct sljit_jump *jump;
2826    
2827    sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2828    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2829    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2830    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2831  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  
2832  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2833  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));  
2834  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));  /* Searching for the first zero. */
2835    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2836    jump = JUMP(SLJIT_C_NOT_ZERO);
2837    /* Two byte sequence. */
2838    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2839    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2840    
2841    JUMPHERE(jump);
2842    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
2843    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_NOT_ZERO);
2844    /* This code runs only in 8 bit mode. No need to shift the value. */
2845    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2846    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2847    OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2848    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2849  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2850  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2851  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));  /* Three byte sequence. */
2852    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2853  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2854  }  }
2855    
# Line 2702  jump = JUMP(SLJIT_C_NOT_ZERO); Line 2869  jump = JUMP(SLJIT_C_NOT_ZERO);
2869  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2870  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2871  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2872    /* The upper 5 bits are known at this point. */
2873    compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
2874  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2875  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2876  OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);  OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  
2877  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2878  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2879    
2880  JUMPHERE(compare);  JUMPHERE(compare);
2881  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2882  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
 JUMPHERE(jump);  
2883    
2884  /* We only have types for characters less than 256. */  /* We only have types for characters less than 256. */
 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);  
 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);  
 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
 }  
   
 #elif defined COMPILE_PCRE16  
   
 static void do_utfreadchar(compiler_common *common)  
 {  
 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char  
 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */  
 DEFINE_COMPILER;  
 struct sljit_jump *jump;  
   
 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);  
 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);  
 /* Do nothing, only return. */  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
   
2885  JUMPHERE(jump);  JUMPHERE(jump);
2886  /* Combine two 16 bit characters. */  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2887  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2888  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);  
 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));  
 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);  
2889  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2890  }  }
2891    
2892  #endif /* COMPILE_PCRE[8|16] */  #endif /* COMPILE_PCRE8 */
2893    
2894  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
2895    
# Line 2823  if (firstline) Line 2964  if (firstline)
2964      mainloop = LABEL();      mainloop = LABEL();
2965      /* Continual stores does not cause data dependency. */      /* Continual stores does not cause data dependency. */
2966      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2967      read_char(common);      read_char_max(common, common->nlmax, TRUE);
2968      check_newlinechar(common, common->nltype, &newline, TRUE);      check_newlinechar(common, common->nltype, &newline, TRUE);
2969      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2970      JUMPHERE(end);      JUMPHERE(end);
# Line 2903  static int scan_prefix(compiler_common * Line 3044  static int scan_prefix(compiler_common *
3044  {  {
3045  /* Recursive function, which scans prefix literals. */  /* Recursive function, which scans prefix literals. */
3046  int len, repeat, len_save, consumed = 0;  int len, repeat, len_save, consumed = 0;
3047  pcre_int32 caseless, chr, mask;  pcre_uint32 caseless, chr, mask;
3048  pcre_uchar *alternative, *cc_save;  pcre_uchar *alternative, *cc_save;
3049  BOOL last, any;  BOOL last, any;
3050    
# Line 3093  while (TRUE) Line 3234  while (TRUE)
3234          return consumed;          return consumed;
3235  #endif  #endif
3236        mask = 0;        mask = 0;
3237        if (len == (caseless & 0xff))        if ((pcre_uint32)len == (caseless & 0xff))
3238          {          {
3239          mask = caseless >> 8;          mask = caseless >> 8;
3240          chr |= mask;          chr |= mask;
# Line 3391  firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_ Line 3532  firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_
3532  skip_char_back(common);  skip_char_back(common);
3533    
3534  loop = LABEL();  loop = LABEL();
3535  read_char(common);  read_char_max(common, common->nlmax, TRUE);
3536  lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3537  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3538    foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);    foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
# Line 3706  OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSE Line 3847  OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSE
3847  sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);  sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3848  }  }
3849    
3850  /*  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
   range format:  
   
   ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).  
   ranges[1] = first bit (0 or 1)  
   ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)  
 */  
   
 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)  
3851  {  {
3852  DEFINE_COMPILER;  DEFINE_COMPILER;
3853    int ranges[MAX_RANGE_SIZE];
3854    pcre_uint8 bit, cbit, all;
3855    int i, byte, length = 0;
3856    
3857    bit = bits[0] & 0x1;
3858    /* All bits will be zero or one (since bit is zero or one). */
3859    all = -bit;
3860    
3861  if (ranges[0] < 0 || ranges[0] > 4)  for (i = 0; i < 256; )
3862      {
3863      byte = i >> 3;
3864      if ((i & 0x7) == 0 && bits[byte] == all)
3865        i += 8;
3866      else
3867        {
3868        cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3869        if (cbit != bit)
3870          {
3871          if (length >= MAX_RANGE_SIZE)
3872            return FALSE;
3873          ranges[length] = i;
3874          length++;
3875          bit = cbit;
3876          all = -cbit;
3877          }
3878        i++;
3879        }
3880      }
3881    
3882    if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3883      {
3884      if (length >= MAX_RANGE_SIZE)
3885        return FALSE;
3886      ranges[length] = 256;
3887      length++;
3888      }
3889    
3890    if (length < 0 || length > 4)
3891    return FALSE;    return FALSE;
3892    
3893    bit = bits[0] & 0x1;
3894    if (invert) bit ^= 0x1;
3895    
3896  /* No character is accepted. */  /* No character is accepted. */
3897  if (ranges[0] == 0 && ranges[1] == 0)  if (length == 0 && bit == 0)
3898    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3899    
3900  if (readch)  switch(length)
   read_char(common);  
   
 switch(ranges[0])  
3901    {    {
3902    case 0:    case 0:
3903    /* When ranges[1] != 0, all characters are accepted. */    /* When bit != 0, all characters are accepted. */
3904    return TRUE;    return TRUE;
3905    
3906    case 1:    case 1:
3907    add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));    add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3908    return TRUE;    return TRUE;
3909    
3910    case 2:    case 2:
3911    if (ranges[2] + 1 != ranges[3])    if (ranges[0] + 1 != ranges[1])
3912      {      {
3913      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3914      add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));      add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3915      }      }
3916    else    else
3917      add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));      add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3918    return TRUE;    return TRUE;
3919    
3920    case 3:    case 3:
3921    if (ranges[1] != 0)    if (bit != 0)
3922      {      {
3923      add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));      add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3924      if (ranges[2] + 1 != ranges[3])      if (ranges[0] + 1 != ranges[1])
3925        {        {
3926        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3927        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3928        }        }
3929      else      else
3930        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3931      return TRUE;      return TRUE;
3932      }      }
3933    
3934    add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2]));    add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
3935    if (ranges[3] + 1 != ranges[4])    if (ranges[1] + 1 != ranges[2])
3936      {      {
3937      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[3]);      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
3938      add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[3]));      add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
3939      }      }
3940    else    else
3941      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3]));      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
3942    return TRUE;    return TRUE;
3943    
3944    case 4:    case 4:
3945    if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4])    if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
3946        && (ranges[2] | (ranges[4] - ranges[2])) == ranges[4]        && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
3947        && is_powerof2(ranges[4] - ranges[2]))        && is_powerof2(ranges[2] - ranges[0]))
3948      {      {
3949      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
3950      if (ranges[4] + 1 != ranges[5])      if (ranges[2] + 1 != ranges[3])
3951        {        {
3952        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3953        add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));        add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3954        }        }
3955      else      else
3956        add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));        add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3957      return TRUE;      return TRUE;
3958      }      }
3959    
3960    if (ranges[1] != 0)    if (bit != 0)
3961      {      {
3962      if (ranges[2] + 1 != ranges[3])      i = 0;
3963        if (ranges[0] + 1 != ranges[1])
3964        {        {
3965        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3966        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3967        ranges[4] -= ranges[2];        i = ranges[0];
       ranges[5] -= ranges[2];  
3968        }        }
3969      else      else
3970        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3971    
3972      if (ranges[4] + 1 != ranges[5])      if (ranges[2] + 1 != ranges[3])
3973        {        {
3974        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
3975        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3976        }        }
3977      else      else
3978        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
3979      return TRUE;      return TRUE;
3980      }      }
3981    
3982    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3983    add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[2]));    add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
3984    if (ranges[3] + 1 != ranges[4])    if (ranges[1] + 1 != ranges[2])
3985      {      {
3986      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]);      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
3987      add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[3]));      add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
3988      }      }
3989    else    else
3990      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3991    return TRUE;    return TRUE;
3992    
3993    default:    default:
# Line 3827  switch(ranges[0]) Line 3996  switch(ranges[0])
3996    }    }
3997  }  }
3998    
 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)  
 {  
 int i, bit, length;  
 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;  
   
 bit = ctypes[0] & flag;  
 ranges[0] = -1;  
 ranges[1] = bit != 0 ? 1 : 0;  
 length = 0;  
   
 for (i = 1; i < 256; i++)  
   if ((ctypes[i] & flag) != bit)  
     {  
     if (length >= MAX_RANGE_SIZE)  
       return;  
     ranges[2 + length] = i;  
     length++;  
     bit ^= flag;  
     }  
   
 if (bit != 0)  
   {  
   if (length >= MAX_RANGE_SIZE)  
     return;  
   ranges[2 + length] = 256;  
   length++;  
   }  
 ranges[0] = length;  
 }  
   
 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)  
 {  
 int ranges[2 + MAX_RANGE_SIZE];  
 pcre_uint8 bit, cbit, all;  
 int i, byte, length = 0;  
   
 bit = bits[0] & 0x1;  
 ranges[1] = !invert ? bit : (bit ^ 0x1);  
 /* All bits will be zero or one (since bit is zero or one). */  
 all = -bit;  
   
 for (i = 0; i < 256; )  
   {  
   byte = i >> 3;  
   if ((i & 0x7) == 0 && bits[byte] == all)  
     i += 8;  
   else  
     {  
     cbit = (bits[byte] >> (i & 0x7)) & 0x1;  
     if (cbit != bit)  
       {  
       if (length >= MAX_RANGE_SIZE)  
         return FALSE;  
       ranges[2 + length] = i;  
       length++;  
       bit = cbit;  
       all = -cbit;  
       }  
     i++;  
     }  
   }  
   
 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))  
   {  
   if (length >= MAX_RANGE_SIZE)  
     return FALSE;  
   ranges[2 + length] = 256;  
   length++;  
   }  
 ranges[0] = length;  
   
 return check_ranges(common, ranges, backtracks, FALSE);  
 }  
   
3999  static void check_anynewline(compiler_common *common)  static void check_anynewline(compiler_common *common)
4000  {  {
4001  /* Check whether TMP1 contains a newline character. TMP2 destroyed. */  /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
# Line 4276  return cc; Line 4371  return cc;
4371    if ((value) != charoffset) \    if ((value) != charoffset) \
4372      { \      { \
4373      if ((value) > charoffset) \      if ((value) > charoffset) \
4374        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4375      else \      else \
4376        OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \        OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4377      } \      } \
4378    charoffset = (value);    charoffset = (value);
4379    
# Line 4287  static void compile_xclass_matchingpath( Line 4382  static void compile_xclass_matchingpath(
4382  DEFINE_COMPILER;  DEFINE_COMPILER;
4383  jump_list *found = NULL;  jump_list *found = NULL;
4384  jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;  jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4385  pcre_int32 c, charoffset;  sljit_uw c, charoffset, max = 0;
4386  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
4387  pcre_uchar *ccbegin;  pcre_uchar *ccbegin;
4388  int compares, invertcmp, numberofcmps;  int compares, invertcmp, numberofcmps;
4389    #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4390    BOOL utf = common->utf;
4391    #endif
4392    
4393  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4394  BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;  BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
# Line 4300  const pcre_uint32 *other_cases; Line 4398  const pcre_uint32 *other_cases;
4398  pcre_int32 typeoffset;  pcre_int32 typeoffset;
4399  #endif  #endif
4400    
 /* Although SUPPORT_UTF must be defined, we are  
    not necessary in utf mode even in 8 bit mode. */  
 detect_partial_match(common, backtracks);  
 read_char(common);  
   
 cc++;  
 if ((cc[-1] & XCL_HASPROP) == 0)  
   {  
   if ((cc[-1] & XCL_MAP) != 0)  
     {  
     OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);  
 #ifdef SUPPORT_UCP  
     charsaved = TRUE;  
 #endif  
     if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, FALSE, backtracks))  
       {  
       jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);  
   
       OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);  
       OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);  
       OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);  
       OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);  
       OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);  
       add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));  
       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));  
   
       JUMPHERE(jump);  
       }  
     else  
       add_jump(compiler, &found, CMP(SLJIT_C_LESS_EQUAL, TMP3, 0, SLJIT_IMM, 0xff));  
   
     OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);  
     cc += 32 / sizeof(pcre_uchar);  
     }  
   else  
     add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff));  
   }  
 else if ((cc[-1] & XCL_MAP) != 0)  
   {  
   OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);  
 #ifdef SUPPORT_UCP  
   charsaved = TRUE;  
 #endif  
   if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))  
     {  
 #ifdef COMPILE_PCRE8  
     SLJIT_ASSERT(common->utf);  
 #endif  
     jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);  
   
     OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);  
     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);  
     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);  
     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);  
     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);  
     add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));  
   
     JUMPHERE(jump);  
     }  
   
   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);  
   cc += 32 / sizeof(pcre_uchar);  
   }  
   
4401  /* Scanning the necessary info. */  /* Scanning the necessary info. */
4402    cc++;
4403  ccbegin = cc;  ccbegin = cc;
4404  compares = 0;  compares = 0;
4405    if (cc[-1] & XCL_MAP) cc += 32 / sizeof(pcre_uchar);
4406    
4407  while (*cc != XCL_END)  while (*cc != XCL_END)
4408    {    {
4409    compares++;    compares++;
4410    if (*cc == XCL_SINGLE)    if (*cc == XCL_SINGLE)
4411      {      {
4412      cc += 2;      cc ++;
4413  #ifdef SUPPORT_UTF      GETCHARINCTEST(c, cc);
4414      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);      if (c > max) max = c;
 #endif  
4415  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4416      needschar = TRUE;      needschar = TRUE;
4417  #endif  #endif
# Line 4386  while (*cc != XCL_END) Line 4422  while (*cc != XCL_END)
4422  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
4423      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4424  #endif  #endif
4425      cc++;      GETCHARINCTEST(c, cc);
4426  #ifdef SUPPORT_UTF      if (c > max) max = c;
     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);  
 #endif  
4427  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4428      needschar = TRUE;      needschar = TRUE;
4429  #endif  #endif
# Line 4399  while (*cc != XCL_END) Line 4433  while (*cc != XCL_END)
4433      {      {
4434      SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);      SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4435      cc++;      cc++;
4436        if (*cc == PT_CLIST)
4437          {
4438          other_cases = PRIV(ucd_caseless_sets) + cc[1];
4439          while (*other_cases != NOTACHAR)
4440            {
4441            if (*other_cases > max) max = *other_cases;
4442            other_cases++;
4443            }
4444          }
4445        else
4446          max = READ_CHAR_ANY;
4447    
4448      switch(*cc)      switch(*cc)
4449        {        {
4450        case PT_ANY:        case PT_ANY:
# Line 4439  while (*cc != XCL_END) Line 4485  while (*cc != XCL_END)
4485  #endif  #endif
4486    }    }
4487    
4488    /* We are not necessary in utf mode even in 8 bit mode. */
4489    cc = ccbegin;
4490    detect_partial_match(common, backtracks);
4491    read_char_max(common, max, (cc[0] & XCL_NOT) != 0);
4492    
4493    if ((cc[-1] & XCL_HASPROP) == 0)
4494      {
4495      if ((cc[-1] & XCL_MAP) != 0)
4496        {
4497        jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4498        if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4499          {
4500          OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4501          OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4502          OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4503          OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4504          OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4505          add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));
4506          }
4507    
4508        add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4509        JUMPHERE(jump);
4510    
4511        cc += 32 / sizeof(pcre_uchar);
4512        }
4513      else
4514        add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff));
4515      }
4516    else if ((cc[-1] & XCL_MAP) != 0)
4517      {
4518      OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4519    #ifdef SUPPORT_UCP
4520      charsaved = TRUE;
4521    #endif
4522      if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4523        {
4524    #ifdef COMPILE_PCRE8
4525        SLJIT_ASSERT(common->utf);
4526    #endif
4527        jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4528    
4529        OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4530        OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4531        OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4532        OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4533        OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4534        add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4535    
4536        JUMPHERE(jump);
4537        }
4538    
4539      OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4540      cc += 32 / sizeof(pcre_uchar);
4541      }
4542    
4543  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4544  /* Simple register allocation. TMP1 is preferred if possible. */  /* Simple register allocation. TMP1 is preferred if possible. */
4545  if (needstype || needsscript)  if (needstype || needsscript)
# Line 4480  if (needstype || needsscript) Line 4581  if (needstype || needsscript)
4581  #endif  #endif
4582    
4583  /* Generating code. */  /* Generating code. */
 cc = ccbegin;  
4584  charoffset = 0;  charoffset = 0;
4585  numberofcmps = 0;  numberofcmps = 0;
4586  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 4496  while (*cc != XCL_END) Line 4596  while (*cc != XCL_END)
4596    if (*cc == XCL_SINGLE)    if (*cc == XCL_SINGLE)
4597      {      {
4598      cc ++;      cc ++;
4599  #ifdef SUPPORT_UTF      GETCHARINCTEST(c, cc);
     if (common->utf)  
       {  
       GETCHARINC(c, cc);  
       }  
     else  
 #endif  
       c = *cc++;  
4600    
4601      if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))      if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4602        {        {
4603        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4604        OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);        OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4605        numberofcmps++;        numberofcmps++;
4606        }        }
4607      else if (numberofcmps > 0)      else if (numberofcmps > 0)
4608        {        {
4609        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4610        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4611        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4612        numberofcmps = 0;        numberofcmps = 0;
4613        }        }
4614      else      else
4615        {        {
4616        jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);        jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4617        numberofcmps = 0;        numberofcmps = 0;
4618        }        }
4619      }      }
4620    else if (*cc == XCL_RANGE)    else if (*cc == XCL_RANGE)
4621      {      {
4622      cc ++;      cc ++;
4623  #ifdef SUPPORT_UTF      GETCHARINCTEST(c, cc);
     if (common->utf)  
       {  
       GETCHARINC(c, cc);  
       }  
     else  
 #endif  
       c = *cc++;  
4624      SET_CHAR_OFFSET(c);      SET_CHAR_OFFSET(c);
4625  #ifdef SUPPORT_UTF      GETCHARINCTEST(c, cc);
4626      if (common->utf)  
       {  
       GETCHARINC(c, cc);  
       }  
     else  
 #endif  
       c = *cc++;  
4627      if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))      if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4628        {        {
4629        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4630        OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);        OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4631        numberofcmps++;        numberofcmps++;
4632        }        }
4633      else if (numberofcmps > 0)      else if (numberofcmps > 0)
4634        {        {
4635        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4636        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4637        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4638        numberofcmps = 0;        numberofcmps = 0;
4639        }        }
4640      else      else
4641        {        {
4642        jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);        jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4643        numberofcmps = 0;        numberofcmps = 0;
4644        }        }
4645      }      }
# Line 4625  while (*cc != XCL_END) Line 4705  while (*cc != XCL_END)
4705        break;        break;
4706    
4707        case PT_WORD:        case PT_WORD:
4708        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
4709        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4710        /* Fall through. */        /* Fall through. */
4711    
# Line 4673  while (*cc != XCL_END) Line 4753  while (*cc != XCL_END)
4753          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4754          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4755    
4756          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
4757          OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);          OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4758    
4759          other_cases += 3;          other_cases += 3;
4760          }          }
4761        else        else
4762          {          {
4763          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
4764          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4765          }          }
4766    
4767        while (*other_cases != NOTACHAR)        while (*other_cases != NOTACHAR)
4768          {          {
4769          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
4770          OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);          OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4771          }          }
4772        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4773        break;        break;
4774    
4775        case PT_UCNC:        case PT_UCNC:
4776        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
4777        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4778        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
4779        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4780        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
4781        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4782    
4783        SET_CHAR_OFFSET(0xa0);        SET_CHAR_OFFSET(0xa0);
4784        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
4785        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4786        SET_CHAR_OFFSET(0);        SET_CHAR_OFFSET(0);
4787        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
# Line 4800  struct sljit_label *label; Line 4880  struct sljit_label *label;
4880  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4881  pcre_uchar propdata[5];  pcre_uchar propdata[5];
4882  #endif  #endif
4883  #endif  #endif /* SUPPORT_UTF */
4884    
4885  switch(type)  switch(type)
4886    {    {
# Line 4825  switch(type) Line 4905  switch(type)
4905    case OP_NOT_DIGIT:    case OP_NOT_DIGIT:
4906    case OP_DIGIT:    case OP_DIGIT:
4907    /* Digits are usually 0-9, so it is worth to optimize them. */    /* Digits are usually 0-9, so it is worth to optimize them. */
   if (common->digits[0] == -2)  
     get_ctype_ranges(common, ctype_digit, common->digits);  
4908    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
4909    /* Flip the starting bit in the negative case. */  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
4910    if (type == OP_NOT_DIGIT)    if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
4911      common->digits[1] ^= 1;      read_char7_type(common, type == OP_NOT_DIGIT);
4912    if (!check_ranges(common, common->digits, backtracks, TRUE))    else
4913      {  #endif
4914      read_char8_type(common);      read_char8_type(common, type == OP_NOT_DIGIT);
4915      OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);      /* Flip the starting bit in the negative case. */
4916      add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4917      }    add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
   if (type == OP_NOT_DIGIT)  
     common->digits[1] ^= 1;  
4918    return cc;    return cc;
4919    
4920    case OP_NOT_WHITESPACE:    case OP_NOT_WHITESPACE:
4921    case OP_WHITESPACE:    case OP_WHITESPACE:
4922    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
4923    read_char8_type(common);  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
4924      if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
4925        read_char7_type(common, type == OP_NOT_WHITESPACE);
4926      else
4927    #endif
4928        read_char8_type(common, type == OP_NOT_WHITESPACE);
4929    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4930    add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4931    return cc;    return cc;
# Line 4852  switch(type) Line 4933  switch(type)
4933    case OP_NOT_WORDCHAR:    case OP_NOT_WORDCHAR:
4934    case OP_WORDCHAR:    case OP_WORDCHAR:
4935    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
4936    read_char8_type(common);  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
4937      if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
4938        read_char7_type(common, type == OP_NOT_WORDCHAR);
4939      else
4940    #endif
4941        read_char8_type(common, type == OP_NOT_WORDCHAR);
4942    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4943    add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4944    return cc;    return cc;
4945    
4946    case OP_ANY:    case OP_ANY:
4947    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
4948    read_char(common);    read_char_max(common, common->nlmax, TRUE);
4949    if (common->nltype == NLTYPE_FIXED && common->newline > 255)    if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4950      {      {
4951      jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);      jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
# Line 4927  switch(type) Line 5013  switch(type)
5013    
5014    case OP_ANYNL:    case OP_ANYNL:
5015    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5016    read_char(common);    read_char_max(common, common->bsr_nlmax, FALSE);
5017    jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);    jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5018    /* We don't need to handle soft partial matching case. */    /* We don't need to handle soft partial matching case. */
5019    end_list = NULL;    end_list = NULL;
# Line 4949  switch(type) Line 5035  switch(type)
5035    case OP_NOT_HSPACE:    case OP_NOT_HSPACE:
5036    case OP_HSPACE:    case OP_HSPACE:
5037    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5038    read_char(common);    read_char_max(common, 0x3000, type == OP_NOT_HSPACE);
5039    add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5040    add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5041    return cc;    return cc;
# Line 4957  switch(type) Line 5043  switch(type)
5043    case OP_NOT_VSPACE:    case OP_NOT_VSPACE:
5044    case OP_VSPACE:    case OP_VSPACE:
5045    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5046    read_char(common);    read_char_max(common, 0x2029, type == OP_NOT_VSPACE);
5047    add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5048    add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5049    return cc;    return cc;
# Line 5056  switch(type) Line 5142  switch(type)
5142      else      else
5143        {        {
5144        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
5145        read_char(common);        read_char_max(common, common->nlmax, TRUE);
5146        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5147        add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));        add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5148        add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));        add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
# Line 5104  switch(type) Line 5190  switch(type)
5190    else    else
5191      {      {
5192      skip_char_back(common);      skip_char_back(common);
5193      read_char(common);      read_char_max(common, common->nlmax, TRUE);
5194      check_newlinechar(common, common->nltype, backtracks, FALSE);      check_newlinechar(common, common->nltype, backtracks, FALSE);
5195      }      }
5196    JUMPHERE(jump[0]);    JUMPHERE(jump[0]);
# Line 5179  switch(type) Line 5265  switch(type)
5265  #endif  #endif
5266      return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);      return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5267      }      }
5268    
5269    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
   read_char(common);  
5270  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
5271    if (common->utf)    if (common->utf)
5272      {      {
# Line 5189  switch(type) Line 5275  switch(type)
5275    else    else
5276  #endif  #endif
5277      c = *cc;      c = *cc;
5278    
5279    if (type == OP_CHAR || !char_has_othercase(common, cc))    if (type == OP_CHAR || !char_has_othercase(common, cc))
5280      {      {
5281        read_char_max(common, c, FALSE);
5282      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5283      return cc + length;      return cc + length;
5284      }      }
5285    oc = char_othercase(common, c);    oc = char_othercase(common, c);
5286      read_char_max(common, c > oc ? c : oc, FALSE);
5287    bit = c ^ oc;    bit = c ^ oc;
5288    if (is_powerof2(bit))    if (is_powerof2(bit))
5289      {      {
# Line 5202  switch(type) Line 5291  switch(type)
5291      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5292      return cc + length;      return cc + length;
5293      }      }
5294    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);    jump[0] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c);
5295    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);    add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5296    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);    JUMPHERE(jump[0]);
   OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);  
   add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));  
5297    return cc + length;    return cc + length;
5298    
5299    case OP_NOT:    case OP_NOT:
# Line 5241  switch(type) Line 5328  switch(type)
5328  #endif /* COMPILE_PCRE8 */  #endif /* COMPILE_PCRE8 */
5329        {        {
5330        GETCHARLEN(c, cc, length);        GETCHARLEN(c, cc, length);
       read_char(common);  
5331        }        }
5332      }      }
5333    else    else
5334  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
     {  
     read_char(common);  
5335      c = *cc;      c = *cc;
     }  
5336    
5337    if (type == OP_NOT || !char_has_othercase(common, cc))    if (type == OP_NOT || !char_has_othercase(common, cc))
5338        {
5339        read_char_max(common, c, TRUE);
5340      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5341        }
5342    else    else
5343      {      {
5344      oc = char_othercase(common, c);      oc = char_othercase(common, c);
5345        read_char_max(common, c > oc ? c : oc, TRUE);
5346      bit = c ^ oc;      bit = c ^ oc;
5347      if (is_powerof2(bit))      if (is_powerof2(bit))
5348        {        {
# Line 5273  switch(type) Line 5360  switch(type)
5360    case OP_CLASS:    case OP_CLASS:
5361    case OP_NCLASS:    case OP_NCLASS:
5362    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5363    read_char(common);  
5364    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5365      bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5366      read_char_max(common, bit, type == OP_NCLASS);
5367    #else
5368      read_char_max(common, 255, type == OP_NCLASS);
5369    #endif
5370    
5371    if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))    if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5372      return cc + 32 / sizeof(pcre_uchar);      return cc + 32 / sizeof(pcre_uchar);
5373    
5374  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5375    jump[0] = NULL;    jump[0] = NULL;
 #ifdef COMPILE_PCRE8  
   /* This check only affects 8 bit mode. In other modes, we  
   always need to compare the value with 255. */  
5376    if (common->utf)    if (common->utf)
 #endif /* COMPILE_PCRE8 */  
5377      {      {
5378      jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);      jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, bit);
5379      if (type == OP_CLASS)      if (type == OP_CLASS)
5380        {        {
5381        add_jump(compiler, backtracks, jump[0]);        add_jump(compiler, backtracks, jump[0]);
5382        jump[0] = NULL;        jump[0] = NULL;
5383        }        }
5384      }      }
5385  #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */  #elif !defined COMPILE_PCRE8
5386      jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
5387      if (type == OP_CLASS)
5388        {
5389        add_jump(compiler, backtracks, jump[0]);
5390        jump[0] = NULL;
5391        }
5392    #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5393    
5394    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5395    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5396    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5397    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5398    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5399    add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5400    
5401  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5402    if (jump[0] != NULL)    if (jump[0] != NULL)
5403      JUMPHERE(jump[0]);      JUMPHERE(jump[0]);
5404  #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */  #endif
5405    
5406    return cc + 32 / sizeof(pcre_uchar);    return cc + 32 / sizeof(pcre_uchar);
5407    
5408  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
# Line 9321  switch(re->options & PCRE_NEWLINE_BITS) Line 9421  switch(re->options & PCRE_NEWLINE_BITS)
9421    case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;    case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
9422    default: return;    default: return;
9423    }    }
9424    common->nlmax = READ_CHAR_ANY;
9425  if ((re->options & PCRE_BSR_ANYCRLF) != 0)  if ((re->options & PCRE_BSR_ANYCRLF) != 0)
9426    common->bsr_nltype = NLTYPE_ANYCRLF;    common->bsr_nltype = NLTYPE_ANYCRLF;
9427  else if ((re->options & PCRE_BSR_UNICODE) != 0)  else if ((re->options & PCRE_BSR_UNICODE) != 0)
# Line 9333  else Line 9434  else
9434    common->bsr_nltype = NLTYPE_ANY;    common->bsr_nltype = NLTYPE_ANY;
9435  #endif  #endif
9436    }    }
9437    common->bsr_nlmax = READ_CHAR_ANY;
9438  common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
9439  common->ctypes = (sljit_sw)(tables + ctypes_offset);  common->ctypes = (sljit_sw)(tables + ctypes_offset);
 common->digits[0] = -2;  
9440  common->name_table = ((pcre_uchar *)re) + re->name_table_offset;  common->name_table = ((pcre_uchar *)re) + re->name_table_offset;
9441  common->name_count = re->name_count;  common->name_count = re->name_count;
9442  common->name_entry_size = re->name_entry_size;  common->name_entry_size = re->name_entry_size;
# Line 9346  common->utf = (re->options & PCRE_UTF8) Line 9447  common->utf = (re->options & PCRE_UTF8)
9447  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
9448  common->use_ucp = (re->options & PCRE_UCP) != 0;  common->use_ucp = (re->options & PCRE_UCP) != 0;
9449  #endif  #endif
9450    if (common->utf)
9451      {
9452      if (common->nltype == NLTYPE_ANY)
9453        common->nlmax = 0x2029;
9454      else if (common->nltype == NLTYPE_ANYCRLF)
9455        common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
9456      else
9457        {
9458        /* We only care about the first newline character. */
9459        common->nlmax = common->newline & 0xff;
9460        }
9461    
9462      if (common->bsr_nltype == NLTYPE_ANY)
9463        common->bsr_nlmax = 0x2029;
9464      else
9465        common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
9466      }
9467  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
9468  ccend = bracketend(rootbacktrack.cc);  ccend = bracketend(rootbacktrack.cc);
9469    
# Line 9747  if (common->reset_match != NULL) Line 9865  if (common->reset_match != NULL)
9865    JUMPTO(SLJIT_JUMP, reset_match_label);    JUMPTO(SLJIT_JUMP, reset_match_label);
9866    }    }
9867  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
9868  #ifndef COMPILE_PCRE32  #ifdef COMPILE_PCRE8
9869  if (common->utfreadchar != NULL)  if (common->utfreadchar != NULL)
9870    {    {
9871    set_jumps(common->utfreadchar, LABEL());    set_jumps(common->utfreadchar, LABEL());
9872    do_utfreadchar(common);    do_utfreadchar(common);
9873    }    }
9874  #endif /* !COMPILE_PCRE32 */  if (common->utfreadchar16 != NULL)
9875  #ifdef COMPILE_PCRE8    {
9876      set_jumps(common->utfreadchar16, LABEL());
9877      do_utfreadchar16(common);
9878      }
9879  if (common->utfreadtype8 != NULL)  if (common->utfreadtype8 != NULL)
9880    {    {
9881    set_jumps(common->utfreadtype8, LABEL());    set_jumps(common->utfreadtype8, LABEL());

Legend:
Removed from v.1419  
changed lines
  Added in v.1424

  ViewVC Help
Powered by ViewVC 1.1.5