/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1367 by zherczeg, Mon Oct 7 07:41:44 2013 UTC revision 1423 by zherczeg, Tue Dec 31 07:57:56 2013 UTC
# Line 306  typedef struct then_trap_backtrack { Line 306  typedef struct then_trap_backtrack {
306    int framesize;    int framesize;
307  } then_trap_backtrack;  } then_trap_backtrack;
308    
309  #define MAX_RANGE_SIZE 6  #define MAX_RANGE_SIZE 4
310    
311  typedef struct compiler_common {  typedef struct compiler_common {
312    /* The sljit ceneric compiler. */    /* The sljit ceneric compiler. */
# Line 363  typedef struct compiler_common { Line 363  typedef struct compiler_common {
363    BOOL positive_assert;    BOOL positive_assert;
364    /* Newline control. */    /* Newline control. */
365    int nltype;    int nltype;
366      pcre_uint32 nlmax;
367    int newline;    int newline;
368    int bsr_nltype;    int bsr_nltype;
369      pcre_uint32 bsr_nlmax;
370    /* Dollar endonly. */    /* Dollar endonly. */
371    int endonly;    int endonly;
372    /* Tables. */    /* Tables. */
373    sljit_sw ctypes;    sljit_sw ctypes;
   int digits[2 + MAX_RANGE_SIZE];  
374    /* Named capturing brackets. */    /* Named capturing brackets. */
375    pcre_uchar *name_table;    pcre_uchar *name_table;
376    sljit_sw name_count;    sljit_sw name_count;
# Line 404  typedef struct compiler_common { Line 405  typedef struct compiler_common {
405  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
406    BOOL use_ucp;    BOOL use_ucp;
407  #endif  #endif
 #ifndef COMPILE_PCRE32  
   jump_list *utfreadchar;  
 #endif  
408  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
409      jump_list *utfreadchar;
410      jump_list *utfreadchar16;
411    jump_list *utfreadtype8;    jump_list *utfreadtype8;
412  #endif  #endif
413  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
# Line 524  the start pointers when the end of the c Line 524  the start pointers when the end of the c
524  #define GET_LOCAL_BASE(dst, dstw, offset) \  #define GET_LOCAL_BASE(dst, dstw, offset) \
525    sljit_get_local_base(compiler, (dst), (dstw), (offset))    sljit_get_local_base(compiler, (dst), (dstw), (offset))
526    
527    #define READ_CHAR_ANY 0x7fffffff
528    
529  static pcre_uchar* bracketend(pcre_uchar* cc)  static pcre_uchar* bracketend(pcre_uchar* cc)
530  {  {
531  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
# Line 533  cc += 1 + LINK_SIZE; Line 535  cc += 1 + LINK_SIZE;
535  return cc;  return cc;
536  }  }
537    
538    static int ones_in_half_byte[16] = {
539      /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
540      /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
541    };
542    
543  /* Functions whose might need modification for all new supported opcodes:  /* Functions whose might need modification for all new supported opcodes:
544   next_opcode   next_opcode
545   check_opcode_types   check_opcode_types
# Line 585  switch(*cc) Line 592  switch(*cc)
592    case OP_CRMINQUERY:    case OP_CRMINQUERY:
593    case OP_CRRANGE:    case OP_CRRANGE:
594    case OP_CRMINRANGE:    case OP_CRMINRANGE:
595      case OP_CRPOSSTAR:
596      case OP_CRPOSPLUS:
597      case OP_CRPOSQUERY:
598      case OP_CRPOSRANGE:
599    case OP_CLASS:    case OP_CLASS:
600    case OP_NCLASS:    case OP_NCLASS:
601    case OP_REF:    case OP_REF:
602    case OP_REFI:    case OP_REFI:
603      case OP_DNREF:
604      case OP_DNREFI:
605    case OP_RECURSE:    case OP_RECURSE:
606    case OP_CALLOUT:    case OP_CALLOUT:
607    case OP_ALT:    case OP_ALT:
# Line 736  switch(*cc) Line 749  switch(*cc)
749    
750  static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)  static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
751  {  {
752    int count;
753  pcre_uchar *slot;  pcre_uchar *slot;
 int i;  
754    
755  /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */  /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
756  while (cc < ccend)  while (cc < ccend)
# Line 771  while (cc < ccend) Line 784  while (cc < ccend)
784      break;      break;
785    
786      case OP_CREF:      case OP_CREF:
787      i = GET2(cc, 1);      common->optimized_cbracket[GET2(cc, 1)] = 0;
     common->optimized_cbracket[i] = 0;  
788      cc += 1 + IMM2_SIZE;      cc += 1 + IMM2_SIZE;
789      break;      break;
790    
791        case OP_DNREF:
792        case OP_DNREFI:
793      case OP_DNCREF:      case OP_DNCREF:
794      i = GET2(cc, 1 + IMM2_SIZE);      count = GET2(cc, 1 + IMM2_SIZE);
795      slot = common->name_table + GET2(cc, 1) * common->name_entry_size;      slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
796      while (i-- > 0)      while (count-- > 0)
797        {        {
798        common->optimized_cbracket[GET2(slot, 0)] = 0;        common->optimized_cbracket[GET2(slot, 0)] = 0;
799        slot += common->name_entry_size;        slot += common->name_entry_size;
# Line 2450  else Line 2464  else
2464  JUMPHERE(jump);  JUMPHERE(jump);
2465  }  }
2466    
2467  static void read_char(compiler_common *common)  static void peek_char(compiler_common *common)
2468  {  {
2469  /* Reads the character into TMP1, updates STR_PTR.  /* Reads the character into TMP1, keeps STR_PTR.
2470  Does not check STR_END. TMP2 Destroyed. */  Does not check STR_END. TMP2 Destroyed. */
2471  DEFINE_COMPILER;  DEFINE_COMPILER;
2472  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
# Line 2460  struct sljit_jump *jump; Line 2474  struct sljit_jump *jump;
2474  #endif  #endif
2475    
2476  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2477  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2478  if (common->utf)  if (common->utf)
2479    {    {
 #if defined COMPILE_PCRE8  
2480    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2481  #elif defined COMPILE_PCRE16    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
   jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);  
 #endif /* COMPILE_PCRE[8|16] */  
2482    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2483      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2484    JUMPHERE(jump);    JUMPHERE(jump);
2485    }    }
2486  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2487    
2488    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2489    if (common->utf)
2490      {
2491      OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2492      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2493      /* TMP2 contains the high surrogate. */
2494      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2495      OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2496      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2497      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2498      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2499      JUMPHERE(jump);
2500      }
2501    #endif
2502    }
2503    
2504    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2505    
2506    static BOOL is_char7_bitset(const pcre_uint8* bitset, BOOL nclass)
2507    {
2508    /* Tells whether the character codes below 128 are enough
2509    to determine a match. */
2510    const pcre_uint8 value = nclass ? 0xff : 0;
2511    const pcre_uint8* end = bitset + 32;
2512    
2513    bitset += 16;
2514    do
2515      {
2516      if (*bitset++ != value)
2517        return FALSE;
2518      }
2519    while (bitset < end);
2520    return TRUE;
2521    }
2522    
2523    static void read_char7_type(compiler_common *common, BOOL full_read)
2524    {
2525    /* Reads the precise character type of a character into TMP1, if the character
2526    is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2527    full_read argument tells whether characters above max are accepted or not. */
2528    DEFINE_COMPILER;
2529    struct sljit_jump *jump;
2530    
2531    SLJIT_ASSERT(common->utf);
2532    
2533    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2534  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2535    
2536    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2537    
2538    if (full_read)
2539      {
2540      jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2541      OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2542      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2543      JUMPHERE(jump);
2544      }
2545  }  }
2546    
2547  static void peek_char(compiler_common *common)  #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2548    
2549    static void read_char_max(compiler_common *common, pcre_uint32 max, BOOL full_read)
2550  {  {
2551  /* Reads the character into TMP1, keeps STR_PTR.  /* Reads the precise value of a character into TMP1, if the character is
2552  Does not check STR_END. TMP2 Destroyed. */  less than or equal to max. Otherwise it returns with a value greater than max.
2553    Does not check STR_END. The full_read argument tells whether characters above
2554    max are accepted or not. */
2555  DEFINE_COMPILER;  DEFINE_COMPILER;
2556  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2557  struct sljit_jump *jump;  struct sljit_jump *jump;
2558  #endif  #endif
2559    
2560    SLJIT_UNUSED_ARG(full_read);
2561    SLJIT_UNUSED_ARG(max);
2562    
2563  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2564  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2565    
2566    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2567  if (common->utf)  if (common->utf)
2568    {    {
2569  #if defined COMPILE_PCRE8    if (max < 128 && !full_read)
2570        return;
2571    
2572    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2573  #elif defined COMPILE_PCRE16    if (max >= 0x800)
2574    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);      add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2575  #endif /* COMPILE_PCRE[8|16] */    else if (max < 128)
2576    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));      {
2577    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);      OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2578        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2579        }
2580      else
2581        {
2582        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2583        if (!full_read)
2584          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2585        else
2586          OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2587        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2588        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2589        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2590        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2591        if (full_read)
2592          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2593        }
2594    JUMPHERE(jump);    JUMPHERE(jump);
2595    }    }
2596  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */  #endif
2597    
2598    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2599    if (common->utf)
2600      {
2601      if (max >= 0x10000)
2602        {
2603        OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2604        jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2605        /* TMP2 contains the high surrogate. */
2606        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2607        OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2608        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2609        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2610        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2611        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2612        JUMPHERE(jump);
2613        return;
2614        }
2615    
2616      if (max < 0xd800 && !full_read)
2617        return;
2618    
2619      /* Skip low surrogate if necessary. */
2620      OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2621      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2622      if (full_read)
2623        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2624      if (max >= 0xd800)
2625        OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2626      JUMPHERE(jump);
2627      }
2628    #endif
2629  }  }
2630    
2631  static void read_char8_type(compiler_common *common)  static SLJIT_INLINE void read_char(compiler_common *common)
2632  {  {
2633  /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */  read_char_max(common, READ_CHAR_ANY, TRUE);
2634    }
2635    
2636    static void read_char8_type(compiler_common *common, BOOL full_read)
2637    {
2638    /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END.
2639    The full_read argument tells whether characters above max are accepted or not. */
2640  DEFINE_COMPILER;  DEFINE_COMPILER;
2641  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2642  struct sljit_jump *jump;  struct sljit_jump *jump;
2643  #endif  #endif
2644    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2645    struct sljit_jump *jump2;
2646    #endif
2647    
2648  #ifdef SUPPORT_UTF  SLJIT_UNUSED_ARG(full_read);
2649    
2650    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2651    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2652    
2653    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2654  if (common->utf)  if (common->utf)
2655    {    {
   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);  
   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  
 #if defined COMPILE_PCRE8  
2656    /* This can be an extra read in some situations, but hopefully    /* This can be an extra read in some situations, but hopefully
2657    it is needed in most cases. */    it is needed in most cases. */
2658    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2659    jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2660    add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));    if (!full_read)
2661    JUMPHERE(jump);      {
2662  #elif defined COMPILE_PCRE16      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2663    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2664    jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);      OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2665    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2666    JUMPHERE(jump);      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2667    /* Skip low surrogate if necessary. */      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2668    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);      OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2669    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);      jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2670    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2671    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);      JUMPHERE(jump2);
2672    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);      }
2673  #elif defined COMPILE_PCRE32    else
2674    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);      add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
   jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  
   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  
2675    JUMPHERE(jump);    JUMPHERE(jump);
 #endif /* COMPILE_PCRE[8|16|32] */  
2676    return;    return;
2677    }    }
2678  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2679  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);  
2680  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  #if !defined COMPILE_PCRE8
 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32  
2681  /* The ctypes array contains only 256 values. */  /* The ctypes array contains only 256 values. */
2682  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2683  jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2684  #endif  #endif
2685  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2686  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if !defined COMPILE_PCRE8
2687  JUMPHERE(jump);  JUMPHERE(jump);
2688  #endif  #endif
2689    
2690    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2691    if (common->utf && full_read)
2692      {
2693      /* Skip low surrogate if necessary. */
2694      OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2695      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2696      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2697      JUMPHERE(jump);
2698      }
2699    #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2700  }  }
2701    
2702  static void skip_char_back(compiler_common *common)  static void skip_char_back(compiler_common *common)
# Line 2588  if (common->utf) Line 2734  if (common->utf)
2734  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2735  }  }
2736    
2737  static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)  static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2738  {  {
2739  /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */  /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2740  DEFINE_COMPILER;  DEFINE_COMPILER;
2741    struct sljit_jump *jump;
2742    
2743  if (nltype == NLTYPE_ANY)  if (nltype == NLTYPE_ANY)
2744    {    {
2745    add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2746    add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2747    }    }
2748  else if (nltype == NLTYPE_ANYCRLF)  else if (nltype == NLTYPE_ANYCRLF)
2749    {    {
2750    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);    if (jumpifmatch)
2751    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);      {
2752    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2753    OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2754    add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));      }
2755      else
2756        {
2757        jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2758        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2759        JUMPHERE(jump);
2760        }
2761    }    }
2762  else  else
2763    {    {
2764    SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);    SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2765    add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));    add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2766    }    }
2767  }  }
2768    
# Line 2619  else Line 2772  else
2772  static void do_utfreadchar(compiler_common *common)  static void do_utfreadchar(compiler_common *common)
2773  {  {
2774  /* Fast decoding a UTF-8 character. TMP1 contains the first byte  /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2775  of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */  of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2776  DEFINE_COMPILER;  DEFINE_COMPILER;
2777  struct sljit_jump *jump;  struct sljit_jump *jump;
2778    
2779  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2780    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2781    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2782    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2783    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2784    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2785    
2786  /* Searching for the first zero. */  /* Searching for the first zero. */
2787  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2788  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2789  /* Two byte sequence. */  /* Two byte sequence. */
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  
2790  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2791  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2792    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2793    
2794    JUMPHERE(jump);
2795    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2796    OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2797  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2798  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2799  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
 JUMPHERE(jump);  
2800    
2801  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2802  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2803  /* Three byte sequence. */  /* Three byte sequence. */
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  
 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);  
 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));  
2804  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2805  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));  
2806  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
 JUMPHERE(jump);  
2807    
2808  /* Four byte sequence. */  /* Four byte sequence. */
2809  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  JUMPHERE(jump);
2810  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2811  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);  OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2812    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2813    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2814  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);  
2815  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2816  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2817    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2818    }
2819    
2820    static void do_utfreadchar16(compiler_common *common)
2821    {
2822    /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2823    of the character (>= 0xc0). Return value in TMP1. */
2824    DEFINE_COMPILER;
2825    struct sljit_jump *jump;
2826    
2827    sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2828    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2829    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2830    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2831  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  
2832  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2833  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));  
2834  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));  /* Searching for the first zero. */
2835    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2836    jump = JUMP(SLJIT_C_NOT_ZERO);
2837    /* Two byte sequence. */
2838    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2839    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2840    
2841    JUMPHERE(jump);
2842    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
2843    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_NOT_ZERO);
2844    /* This code runs only in 8 bit mode. No need to shift the value. */
2845    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2846    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2847    OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2848    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2849  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2850  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2851  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));  /* Three byte sequence. */
2852    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2853  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2854  }  }
2855    
# Line 2690  jump = JUMP(SLJIT_C_NOT_ZERO); Line 2869  jump = JUMP(SLJIT_C_NOT_ZERO);
2869  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2870  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2871  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2872    /* The upper 5 bits are known at this point. */
2873    compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
2874  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2875  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2876  OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);  OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  
2877  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2878  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2879    
2880  JUMPHERE(compare);  JUMPHERE(compare);
2881  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2882  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
 JUMPHERE(jump);  
2883    
2884  /* We only have types for characters less than 256. */  /* We only have types for characters less than 256. */
 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);  
 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);  
 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
 }  
   
 #elif defined COMPILE_PCRE16  
   
 static void do_utfreadchar(compiler_common *common)  
 {  
 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char  
 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */  
 DEFINE_COMPILER;  
 struct sljit_jump *jump;  
   
 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);  
 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);  
 /* Do nothing, only return. */  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
   
2885  JUMPHERE(jump);  JUMPHERE(jump);
2886  /* Combine two 16 bit characters. */  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2887  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2888  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);  
 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));  
 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);  
2889  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2890  }  }
2891    
2892  #endif /* COMPILE_PCRE[8|16] */  #endif /* COMPILE_PCRE8 */
2893    
2894  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
2895    
# Line 2811  if (firstline) Line 2964  if (firstline)
2964      mainloop = LABEL();      mainloop = LABEL();
2965      /* Continual stores does not cause data dependency. */      /* Continual stores does not cause data dependency. */
2966      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2967      read_char(common);      read_char_max(common, common->nlmax, TRUE);
2968      check_newlinechar(common, common->nltype, &newline, TRUE);      check_newlinechar(common, common->nltype, &newline, TRUE);
2969      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2970      JUMPHERE(end);      JUMPHERE(end);
# Line 2887  if (newlinecheck) Line 3040  if (newlinecheck)
3040  return mainloop;  return mainloop;
3041  }  }
3042    
3043  #define MAX_N_CHARS 3  static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, int max_chars)
   
 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)  
3044  {  {
3045  DEFINE_COMPILER;  /* Recursive function, which scans prefix literals. */
3046  struct sljit_label *start;  int len, repeat, len_save, consumed = 0;
3047  struct sljit_jump *quit;  pcre_uint32 caseless, chr, mask;
3048  pcre_uint32 chars[MAX_N_CHARS * 2];  pcre_uchar *alternative, *cc_save;
3049  pcre_uchar *cc = common->start + 1 + LINK_SIZE;  BOOL last, any;
 int location = 0;  
 pcre_int32 len, c, bit, caseless;  
 int must_stop;  
   
 /* We do not support alternatives now. */  
 if (*(common->start + GET(common->start, 1)) == OP_ALT)  
   return FALSE;  
3050    
3051    repeat = 1;
3052  while (TRUE)  while (TRUE)
3053    {    {
3054      last = TRUE;
3055      any = FALSE;
3056    caseless = 0;    caseless = 0;
3057    must_stop = 1;    switch (*cc)
   switch(*cc)  
3058      {      {
     case OP_CHAR:  
     must_stop = 0;  
     cc++;  
     break;  
   
3059      case OP_CHARI:      case OP_CHARI:
3060      caseless = 1;      caseless = 1;
3061      must_stop = 0;      case OP_CHAR:
3062        last = FALSE;
3063      cc++;      cc++;
3064      break;      break;
3065    
# Line 2942  while (TRUE) Line 3084  while (TRUE)
3084      cc++;      cc++;
3085      break;      break;
3086    
3087        case OP_EXACTI:
3088        caseless = 1;
3089      case OP_EXACT:      case OP_EXACT:
3090        repeat = GET2(cc, 1);
3091        last = FALSE;
3092      cc += 1 + IMM2_SIZE;      cc += 1 + IMM2_SIZE;
3093      break;      break;
3094    
# Line 2953  while (TRUE) Line 3099  while (TRUE)
3099      cc++;      cc++;
3100      break;      break;
3101    
3102      case OP_EXACTI:      case OP_KET:
3103      caseless = 1;      cc += 1 + LINK_SIZE;
3104      cc += 1 + IMM2_SIZE;      continue;
3105    
3106        case OP_ALT:
3107        cc += GET(cc, 1);
3108        continue;
3109    
3110        case OP_ONCE:
3111        case OP_ONCE_NC:
3112        case OP_BRA:
3113        case OP_BRAPOS:
3114        case OP_CBRA:
3115        case OP_CBRAPOS:
3116        alternative = cc + GET(cc, 1);
3117        while (*alternative == OP_ALT)
3118          {
3119          max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars);
3120          if (max_chars == 0)
3121            return consumed;
3122          alternative += GET(alternative, 1);
3123          }
3124    
3125        if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3126          cc += IMM2_SIZE;
3127        cc += 1 + LINK_SIZE;
3128        continue;
3129    
3130        case OP_CLASS:
3131        case OP_NCLASS:
3132        any = TRUE;
3133        cc += 1 + 32 / sizeof(pcre_uchar);
3134      break;      break;
3135    
3136      default:  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3137      must_stop = 2;      case OP_XCLASS:
3138        any = TRUE;
3139        cc += GET(cc, 1);
3140      break;      break;
3141    #endif
3142    
3143        case OP_NOT_DIGIT:
3144        case OP_DIGIT:
3145        case OP_NOT_WHITESPACE:
3146        case OP_WHITESPACE:
3147        case OP_NOT_WORDCHAR:
3148        case OP_WORDCHAR:
3149        case OP_ANY:
3150        case OP_ALLANY:
3151        any = TRUE;
3152        cc++;
3153        break;
3154    
3155    #ifdef SUPPORT_UCP
3156        case OP_NOTPROP:
3157        case OP_PROP:
3158        any = TRUE;
3159        cc += 1 + 2;
3160        break;
3161    #endif
3162    
3163        case OP_TYPEEXACT:
3164        repeat = GET2(cc, 1);
3165        cc += 1 + IMM2_SIZE;
3166        continue;
3167    
3168        default:
3169        return consumed;
3170      }      }
3171    
3172    if (must_stop == 2)    if (any)
3173        break;      {
3174    #ifdef SUPPORT_UTF
3175        if (common->utf) return consumed;
3176    #endif
3177    #if defined COMPILE_PCRE8
3178        mask = 0xff;
3179    #elif defined COMPILE_PCRE16
3180        mask = 0xffff;
3181    #elif defined COMPILE_PCRE32
3182        mask = 0xffffffff;
3183    #else
3184        SLJIT_ASSERT_STOP();
3185    #endif
3186    
3187        do
3188          {
3189          chars[0] = mask;
3190          chars[1] = mask;
3191    
3192          if (--max_chars == 0)
3193            return consumed;
3194          consumed++;
3195          chars += 2;
3196          }
3197        while (--repeat > 0);
3198    
3199        repeat = 1;
3200        continue;
3201        }
3202    
3203    len = 1;    len = 1;
3204  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3205    if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);    if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3206  #endif  #endif
3207    
3208    if (caseless && char_has_othercase(common, cc))    if (caseless != 0 && char_has_othercase(common, cc))
3209      {      {
3210      caseless = char_get_othercase_bit(common, cc);      caseless = char_get_othercase_bit(common, cc);
3211      if (caseless == 0)      if (caseless == 0)
3212        return FALSE;        return consumed;
3213  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
3214      caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));      caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
3215  #else  #else
# Line 2988  while (TRUE) Line 3222  while (TRUE)
3222    else    else
3223      caseless = 0;      caseless = 0;
3224    
3225    while (len > 0 && location < MAX_N_CHARS * 2)    len_save = len;
3226      {    cc_save = cc;
3227      c = *cc;    while (TRUE)
3228      bit = 0;      {
3229      if (len == (caseless & 0xff))      do
3230        {        {
3231        bit = caseless >> 8;        chr = *cc;
3232        c |= bit;  #ifdef COMPILE_PCRE32
3233          if (SLJIT_UNLIKELY(chr == NOTACHAR))
3234            return consumed;
3235    #endif
3236          mask = 0;
3237          if (len == (caseless & 0xff))
3238            {
3239            mask = caseless >> 8;
3240            chr |= mask;
3241            }
3242    
3243          if (chars[0] == NOTACHAR)
3244            {
3245            chars[0] = chr;
3246            chars[1] = mask;
3247            }
3248          else
3249            {
3250            mask |= chars[0] ^ chr;
3251            chr |= mask;
3252            chars[0] = chr;
3253            chars[1] |= mask;
3254            }
3255    
3256          len--;
3257          if (--max_chars == 0)
3258            return consumed;
3259          consumed++;
3260          chars += 2;
3261          cc++;
3262        }        }
3263        while (len > 0);
3264    
3265      chars[location] = c;      if (--repeat == 0)
3266      chars[location + 1] = bit;        break;
3267    
3268      len--;      len = len_save;
3269      location += 2;      cc = cc_save;
3270      cc++;      }
3271    
3272      repeat = 1;
3273      if (last)
3274        return consumed;
3275      }
3276    }
3277    
3278    #define MAX_N_CHARS 16
3279    
3280    static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3281    {
3282    DEFINE_COMPILER;
3283    struct sljit_label *start;
3284    struct sljit_jump *quit;
3285    pcre_uint32 chars[MAX_N_CHARS * 2];
3286    pcre_uint8 ones[MAX_N_CHARS];
3287    pcre_uint32 mask;
3288    int i, max;
3289    int offsets[3];
3290    
3291    for (i = 0; i < MAX_N_CHARS; i++)
3292      {
3293      chars[i << 1] = NOTACHAR;
3294      chars[(i << 1) + 1] = 0;
3295      }
3296    
3297    max = scan_prefix(common, common->start, chars, MAX_N_CHARS);
3298    
3299    if (max <= 1)
3300      return FALSE;
3301    
3302    for (i = 0; i < max; i++)
3303      {
3304      mask = chars[(i << 1) + 1];
3305      ones[i] = ones_in_half_byte[mask & 0xf];
3306      mask >>= 4;
3307      while (mask != 0)
3308        {
3309        ones[i] += ones_in_half_byte[mask & 0xf];
3310        mask >>= 4;
3311      }      }
3312      }
3313    
3314    if (location >= MAX_N_CHARS * 2 || must_stop != 0)  offsets[0] = -1;
3315    /* Scan forward. */
3316    for (i = 0; i < max; i++)
3317      if (ones[i] <= 2) {
3318        offsets[0] = i;
3319      break;      break;
3320    }    }
3321    
3322  /* At least two characters are required. */  if (offsets[0] == -1)
3323  if (location < 2 * 2)    return FALSE;
3324      return FALSE;  
3325    /* Scan backward. */
3326    offsets[1] = -1;
3327    for (i = max - 1; i > offsets[0]; i--)
3328      if (ones[i] <= 2) {
3329        offsets[1] = i;
3330        break;
3331      }
3332    
3333    offsets[2] = -1;
3334    if (offsets[1] >= 0)
3335      {
3336      /* Scan from middle. */
3337      for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3338        if (ones[i] <= 2)
3339          {
3340          offsets[2] = i;
3341          break;
3342          }
3343    
3344      if (offsets[2] == -1)
3345        {
3346        for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3347          if (ones[i] <= 2)
3348            {
3349            offsets[2] = i;
3350            break;
3351            }
3352        }
3353      }
3354    
3355    SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3356    SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3357    
3358    chars[0] = chars[offsets[0] << 1];
3359    chars[1] = chars[(offsets[0] << 1) + 1];
3360    if (offsets[2] >= 0)
3361      {
3362      chars[2] = chars[offsets[2] << 1];
3363      chars[3] = chars[(offsets[2] << 1) + 1];
3364      }
3365    if (offsets[1] >= 0)
3366      {
3367      chars[4] = chars[offsets[1] << 1];
3368      chars[5] = chars[(offsets[1] << 1) + 1];
3369      }
3370    
3371    max -= 1;
3372  if (firstline)  if (firstline)
3373    {    {
3374    SLJIT_ASSERT(common->first_line_end != 0);    SLJIT_ASSERT(common->first_line_end != 0);
3375    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3376    OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));    OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS(max));
3377    }    }
3378  else  else
3379    OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));    OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3380    
3381  start = LABEL();  start = LABEL();
3382  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3383    
3384  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3385  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  if (offsets[1] >= 0)
3386      OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3387  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3388    
3389  if (chars[1] != 0)  if (chars[1] != 0)
3390    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3391  CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);  CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3392  if (location > 2 * 2)  if (offsets[2] >= 0)
3393    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3394  if (chars[3] != 0)  
3395    OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);  if (offsets[1] >= 0)
 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);  
 if (location > 2 * 2)  
3396    {    {
3397    if (chars[5] != 0)    if (chars[5] != 0)
3398      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3399    CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);    CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3400      }
3401    
3402    if (offsets[2] >= 0)
3403      {
3404      if (chars[3] != 0)
3405        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3406      CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3407    }    }
3408  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3409    
# Line 3050  JUMPHERE(quit); Line 3412  JUMPHERE(quit);
3412  if (firstline)  if (firstline)
3413    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3414  else  else
3415    OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));    OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3416  return TRUE;  return TRUE;
3417  }  }
3418    
# Line 3170  firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_ Line 3532  firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_
3532  skip_char_back(common);  skip_char_back(common);
3533    
3534  loop = LABEL();  loop = LABEL();
3535  read_char(common);  read_char_max(common, common->nlmax, TRUE);
3536  lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3537  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3538    foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);    foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
# Line 3199  if (firstline) Line 3561  if (firstline)
3561    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3562  }  }
3563    
3564  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3565    
3566  static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)  static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3567  {  {
3568  DEFINE_COMPILER;  DEFINE_COMPILER;
3569  struct sljit_label *start;  struct sljit_label *start;
3570  struct sljit_jump *quit;  struct sljit_jump *quit;
3571  struct sljit_jump *found = NULL;  struct sljit_jump *found = NULL;
3572  jump_list *matches = NULL;  jump_list *matches = NULL;
 pcre_uint8 inverted_start_bits[32];  
 int i;  
3573  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
3574  struct sljit_jump *jump;  struct sljit_jump *jump;
3575  #endif  #endif
3576    
 for (i = 0; i < 32; ++i)  
   inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);  
   
3577  if (firstline)  if (firstline)
3578    {    {
3579    SLJIT_ASSERT(common->first_line_end != 0);    SLJIT_ASSERT(common->first_line_end != 0);
# Line 3232  if (common->utf) Line 3589  if (common->utf)
3589    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3590  #endif  #endif
3591    
3592  if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))  if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3593    {    {
3594  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
3595    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
# Line 3241  if (!check_class_ranges(common, inverted Line 3598  if (!check_class_ranges(common, inverted
3598  #endif  #endif
3599    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3600    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3601    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3602    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3603    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3604    found = JUMP(SLJIT_C_NOT_ZERO);    found = JUMP(SLJIT_C_NOT_ZERO);
# Line 3490  OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSE Line 3847  OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSE
3847  sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);  sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3848  }  }
3849    
3850  /*  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
   range format:  
   
   ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).  
   ranges[1] = first bit (0 or 1)  
   ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)  
 */  
   
 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)  
3851  {  {
3852  DEFINE_COMPILER;  DEFINE_COMPILER;
3853  struct sljit_jump *jump;  int ranges[MAX_RANGE_SIZE];
3854    pcre_uint8 bit, cbit, all;
3855    int i, byte, length = 0;
3856    
3857    bit = bits[0] & 0x1;
3858    /* All bits will be zero or one (since bit is zero or one). */
3859    all = -bit;
3860    
3861  if (ranges[0] < 0)  for (i = 0; i < 256; )
3862      {
3863      byte = i >> 3;
3864      if ((i & 0x7) == 0 && bits[byte] == all)
3865        i += 8;
3866      else
3867        {
3868        cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3869        if (cbit != bit)
3870          {
3871          if (length >= MAX_RANGE_SIZE)
3872            return FALSE;
3873          ranges[length] = i;
3874          length++;
3875          bit = cbit;
3876          all = -cbit;
3877          }
3878        i++;
3879        }
3880      }
3881    
3882    if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3883      {
3884      if (length >= MAX_RANGE_SIZE)
3885        return FALSE;
3886      ranges[length] = 256;
3887      length++;
3888      }
3889    
3890    if (length < 0 || length > 4)
3891    return FALSE;    return FALSE;
3892    
3893  switch(ranges[0])  bit = bits[0] & 0x1;
3894    if (invert) bit ^= 0x1;
3895    
3896    /* No character is accepted. */
3897    if (length == 0 && bit == 0)
3898      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3899    
3900    switch(length)
3901    {    {
3902      case 0:
3903      /* When bit != 0, all characters are accepted. */
3904      return TRUE;
3905    
3906    case 1:    case 1:
3907    if (readch)    add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
     read_char(common);  
   add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));  
3908    return TRUE;    return TRUE;
3909    
3910    case 2:    case 2:
3911    if (readch)    if (ranges[0] + 1 != ranges[1])
3912      read_char(common);      {
3913    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3914    add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));      add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3915        }
3916      else
3917        add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3918    return TRUE;    return TRUE;
3919    
3920    case 4:    case 3:
3921    if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])    if (bit != 0)
3922      {      {
3923      if (readch)      add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3924        read_char(common);      if (ranges[0] + 1 != ranges[1])
     if (ranges[1] != 0)  
3925        {        {
3926        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3927        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3928        }        }
3929      else      else
3930        {        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
       jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);  
       add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));  
       JUMPHERE(jump);  
       }  
3931      return TRUE;      return TRUE;
3932      }      }
3933    if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))  
3934      add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
3935      if (ranges[1] + 1 != ranges[2])
3936      {      {
3937      if (readch)      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
3938        read_char(common);      add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);  
     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);  
     add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));  
     return TRUE;  
3939      }      }
3940    return FALSE;    else
3941        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
3942    default:    return TRUE;
   return FALSE;  
   }  
 }  
   
 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)  
 {  
 int i, bit, length;  
 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;  
   
 bit = ctypes[0] & flag;  
 ranges[0] = -1;  
 ranges[1] = bit != 0 ? 1 : 0;  
 length = 0;  
3943    
3944  for (i = 1; i < 256; i++)    case 4:
3945    if ((ctypes[i] & flag) != bit)    if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
3946          && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
3947          && is_powerof2(ranges[2] - ranges[0]))
3948      {      {
3949      if (length >= MAX_RANGE_SIZE)      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
3950        return;      if (ranges[2] + 1 != ranges[3])
3951      ranges[2 + length] = i;        {
3952      length++;        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3953      bit ^= flag;        add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3954          }
3955        else
3956          add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3957        return TRUE;
3958      }      }
3959    
3960  if (bit != 0)    if (bit != 0)
3961    {      {
3962    if (length >= MAX_RANGE_SIZE)      i = 0;
3963      return;      if (ranges[0] + 1 != ranges[1])
   ranges[2 + length] = 256;  
   length++;  
   }  
 ranges[0] = length;  
 }  
   
 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)  
 {  
 int ranges[2 + MAX_RANGE_SIZE];  
 pcre_uint8 bit, cbit, all;  
 int i, byte, length = 0;  
   
 bit = bits[0] & 0x1;  
 ranges[1] = bit;  
 /* Can be 0 or 255. */  
 all = -bit;  
   
 for (i = 0; i < 256; )  
   {  
   byte = i >> 3;  
   if ((i & 0x7) == 0 && bits[byte] == all)  
     i += 8;  
   else  
     {  
     cbit = (bits[byte] >> (i & 0x7)) & 0x1;  
     if (cbit != bit)  
3964        {        {
3965        if (length >= MAX_RANGE_SIZE)        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3966          return FALSE;        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3967        ranges[2 + length] = i;        i = ranges[0];
       length++;  
       bit = cbit;  
       all = -cbit;  
3968        }        }
3969      i++;      else
3970          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3971    
3972        if (ranges[2] + 1 != ranges[3])
3973          {
3974          OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
3975          add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3976          }
3977        else
3978          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
3979        return TRUE;
3980      }      }
   }  
3981    
3982  if (((bit == 0) && nclass) || ((bit == 1) && !nclass))    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3983    {    add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
3984    if (length >= MAX_RANGE_SIZE)    if (ranges[1] + 1 != ranges[2])
3985      return FALSE;      {
3986    ranges[2 + length] = 256;      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
3987    length++;      add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
3988    }      }
3989  ranges[0] = length;    else
3990        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3991      return TRUE;
3992    
3993  return check_ranges(common, ranges, backtracks, FALSE);    default:
3994      SLJIT_ASSERT_STOP();
3995      return FALSE;
3996      }
3997  }  }
3998    
3999  static void check_anynewline(compiler_common *common)  static void check_anynewline(compiler_common *common)
# Line 4014  static void compile_xclass_matchingpath( Line 4381  static void compile_xclass_matchingpath(
4381  {  {
4382  DEFINE_COMPILER;  DEFINE_COMPILER;
4383  jump_list *found = NULL;  jump_list *found = NULL;
4384  jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;  jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4385  pcre_int32 c, charoffset;  pcre_int32 c, charoffset;
 const pcre_uint32 *other_cases;  
4386  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
4387  pcre_uchar *ccbegin;  pcre_uchar *ccbegin;
4388  int compares, invertcmp, numberofcmps;  int compares, invertcmp, numberofcmps;
4389    
4390  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4391  BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;  BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4392  BOOL charsaved = FALSE;  BOOL charsaved = FALSE;
4393  int typereg = TMP1, scriptreg = TMP1;  int typereg = TMP1, scriptreg = TMP1;
4394    const pcre_uint32 *other_cases;
4395  pcre_int32 typeoffset;  pcre_int32 typeoffset;
4396  #endif  #endif
4397    
# Line 4032  pcre_int32 typeoffset; Line 4400  pcre_int32 typeoffset;
4400  detect_partial_match(common, backtracks);  detect_partial_match(common, backtracks);
4401  read_char(common);  read_char(common);
4402    
4403  if ((*cc++ & XCL_MAP) != 0)  cc++;
4404    if ((cc[-1] & XCL_HASPROP) == 0)
4405    {    {
4406    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);    if ((cc[-1] & XCL_MAP) != 0)
4407  #ifndef COMPILE_PCRE8      {
4408    jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);      OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4409  #elif defined SUPPORT_UTF  #ifdef SUPPORT_UCP
4410    if (common->utf)      charsaved = TRUE;
     jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);  
4411  #endif  #endif
4412        if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, FALSE, backtracks))
4413          {
4414          jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4415    
4416          OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4417          OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4418          OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4419          OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4420          OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4421          add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));
4422          add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4423    
4424          JUMPHERE(jump);
4425          }
4426        else
4427          add_jump(compiler, &found, CMP(SLJIT_C_LESS_EQUAL, TMP3, 0, SLJIT_IMM, 0xff));
4428    
4429    if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))      OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4430        cc += 32 / sizeof(pcre_uchar);
4431        }
4432      else
4433        add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff));
4434      }
4435    else if ((cc[-1] & XCL_MAP) != 0)
4436      {
4437      OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4438    #ifdef SUPPORT_UCP
4439      charsaved = TRUE;
4440    #endif
4441      if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4442      {      {
4443    #ifdef COMPILE_PCRE8
4444        SLJIT_ASSERT(common->utf);
4445    #endif
4446        jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4447    
4448      OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);      OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4449      OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);      OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4450      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4451      OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);      OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4452      OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);      OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4453      add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));      add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
     }  
4454    
 #ifndef COMPILE_PCRE8  
   JUMPHERE(jump);  
 #elif defined SUPPORT_UTF  
   if (common->utf)  
4455      JUMPHERE(jump);      JUMPHERE(jump);
4456  #endif      }
4457    
4458    OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);    OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
 #ifdef SUPPORT_UCP  
   charsaved = TRUE;  
 #endif  
4459    cc += 32 / sizeof(pcre_uchar);    cc += 32 / sizeof(pcre_uchar);
4460    }    }
4461    
# Line 4119  while (*cc != XCL_END) Line 4513  while (*cc != XCL_END)
4513        case PT_SPACE:        case PT_SPACE:
4514        case PT_PXSPACE:        case PT_PXSPACE:
4515        case PT_WORD:        case PT_WORD:
4516          case PT_PXGRAPH:
4517          case PT_PXPRINT:
4518          case PT_PXPUNCT:
4519        needstype = TRUE;        needstype = TRUE;
4520        needschar = TRUE;        needschar = TRUE;
4521        break;        break;
# Line 4307  while (*cc != XCL_END) Line 4704  while (*cc != XCL_END)
4704        case PT_SPACE:        case PT_SPACE:
4705        case PT_PXSPACE:        case PT_PXSPACE:
4706        SET_CHAR_OFFSET(9);        SET_CHAR_OFFSET(9);
4707        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
4708        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4709    
4710          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
4711          OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4712    
4713          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
4714          OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4715    
4716        SET_TYPE_OFFSET(ucp_Zl);        SET_TYPE_OFFSET(ucp_Zl);
4717        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4718        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
# Line 4400  while (*cc != XCL_END) Line 4803  while (*cc != XCL_END)
4803        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4804        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4805        break;        break;
4806    
4807          case PT_PXGRAPH:
4808          /* C and Z groups are the farthest two groups. */
4809          SET_TYPE_OFFSET(ucp_Ll);
4810          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4811          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4812    
4813          jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4814    
4815          /* In case of ucp_Cf, we overwrite the result. */
4816          SET_CHAR_OFFSET(0x2066);
4817          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4818          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4819    
4820          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4821          OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4822    
4823          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
4824          OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4825    
4826          JUMPHERE(jump);
4827          jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4828          break;
4829    
4830          case PT_PXPRINT:
4831          /* C and Z groups are the farthest two groups. */
4832          SET_TYPE_OFFSET(ucp_Ll);
4833          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4834          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4835    
4836          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
4837          OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4838    
4839          jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4840    
4841          /* In case of ucp_Cf, we overwrite the result. */
4842          SET_CHAR_OFFSET(0x2066);
4843          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4844          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4845    
4846          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4847          OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4848    
4849          JUMPHERE(jump);
4850          jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4851          break;
4852    
4853          case PT_PXPUNCT:
4854          SET_TYPE_OFFSET(ucp_Sc);
4855          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
4856          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4857    
4858          SET_CHAR_OFFSET(0);
4859          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
4860          OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4861    
4862          SET_TYPE_OFFSET(ucp_Pc);
4863          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
4864          OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4865          jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4866          break;
4867        }        }
4868      cc += 2;      cc += 2;
4869      }      }
# Line 4431  struct sljit_label *label; Line 4895  struct sljit_label *label;
4895  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4896  pcre_uchar propdata[5];  pcre_uchar propdata[5];
4897  #endif  #endif
4898  #endif  #endif /* SUPPORT_UTF */
4899    
4900  switch(type)  switch(type)
4901    {    {
# Line 4456  switch(type) Line 4920  switch(type)
4920    case OP_NOT_DIGIT:    case OP_NOT_DIGIT:
4921    case OP_DIGIT:    case OP_DIGIT:
4922    /* Digits are usually 0-9, so it is worth to optimize them. */    /* Digits are usually 0-9, so it is worth to optimize them. */
   if (common->digits[0] == -2)  
     get_ctype_ranges(common, ctype_digit, common->digits);  
4923    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
4924    /* Flip the starting bit in the negative case. */  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
4925    if (type == OP_NOT_DIGIT)    if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
4926      common->digits[1] ^= 1;      read_char7_type(common, type == OP_NOT_DIGIT);
4927    if (!check_ranges(common, common->digits, backtracks, TRUE))    else
4928      {  #endif
4929      read_char8_type(common);      read_char8_type(common, type == OP_NOT_DIGIT);
4930      OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);      /* Flip the starting bit in the negative case. */
4931      add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4932      }    add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
   if (type == OP_NOT_DIGIT)  
     common->digits[1] ^= 1;  
4933    return cc;    return cc;
4934    
4935    case OP_NOT_WHITESPACE:    case OP_NOT_WHITESPACE:
4936    case OP_WHITESPACE:    case OP_WHITESPACE:
4937    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
4938    read_char8_type(common);  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
4939      if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
4940        read_char7_type(common, type == OP_NOT_WHITESPACE);
4941      else
4942    #endif
4943        read_char8_type(common, type == OP_NOT_WHITESPACE);
4944    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4945    add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4946    return cc;    return cc;
# Line 4483  switch(type) Line 4948  switch(type)
4948    case OP_NOT_WORDCHAR:    case OP_NOT_WORDCHAR:
4949    case OP_WORDCHAR:    case OP_WORDCHAR:
4950    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
4951    read_char8_type(common);  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
4952      if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
4953        read_char7_type(common, type == OP_NOT_WORDCHAR);
4954      else
4955    #endif
4956        read_char8_type(common, type == OP_NOT_WORDCHAR);
4957    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4958    add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4959    return cc;    return cc;
4960    
4961    case OP_ANY:    case OP_ANY:
4962    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
4963    read_char(common);    read_char_max(common, common->nlmax, TRUE);
4964    if (common->nltype == NLTYPE_FIXED && common->newline > 255)    if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4965      {      {
4966      jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);      jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
# Line 4546  switch(type) Line 5016  switch(type)
5016  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
5017    case OP_NOTPROP:    case OP_NOTPROP:
5018    case OP_PROP:    case OP_PROP:
5019    propdata[0] = 0;    propdata[0] = XCL_HASPROP;
5020    propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;    propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5021    propdata[2] = cc[0];    propdata[2] = cc[0];
5022    propdata[3] = cc[1];    propdata[3] = cc[1];
# Line 4558  switch(type) Line 5028  switch(type)
5028    
5029    case OP_ANYNL:    case OP_ANYNL:
5030    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5031    read_char(common);    read_char_max(common, common->bsr_nlmax, FALSE);
5032    jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);    jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5033    /* We don't need to handle soft partial matching case. */    /* We don't need to handle soft partial matching case. */
5034    end_list = NULL;    end_list = NULL;
# Line 4580  switch(type) Line 5050  switch(type)
5050    case OP_NOT_HSPACE:    case OP_NOT_HSPACE:
5051    case OP_HSPACE:    case OP_HSPACE:
5052    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5053    read_char(common);    read_char_max(common, 0x3000, type == OP_NOT_HSPACE);
5054    add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5055    add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5056    return cc;    return cc;
# Line 4588  switch(type) Line 5058  switch(type)
5058    case OP_NOT_VSPACE:    case OP_NOT_VSPACE:
5059    case OP_VSPACE:    case OP_VSPACE:
5060    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5061    read_char(common);    read_char_max(common, 0x2029, type == OP_NOT_VSPACE);
5062    add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5063    add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5064    return cc;    return cc;
# Line 4687  switch(type) Line 5157  switch(type)
5157      else      else
5158        {        {
5159        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
5160        read_char(common);        read_char_max(common, common->nlmax, TRUE);
5161        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5162        add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));        add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5163        add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));        add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
# Line 4735  switch(type) Line 5205  switch(type)
5205    else    else
5206      {      {
5207      skip_char_back(common);      skip_char_back(common);
5208      read_char(common);      read_char_max(common, common->nlmax, TRUE);
5209      check_newlinechar(common, common->nltype, backtracks, FALSE);      check_newlinechar(common, common->nltype, backtracks, FALSE);
5210      }      }
5211    JUMPHERE(jump[0]);    JUMPHERE(jump[0]);
# Line 4810  switch(type) Line 5280  switch(type)
5280  #endif  #endif
5281      return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);      return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5282      }      }
5283    
5284    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
   read_char(common);  
5285  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
5286    if (common->utf)    if (common->utf)
5287      {      {
# Line 4820  switch(type) Line 5290  switch(type)
5290    else    else
5291  #endif  #endif
5292      c = *cc;      c = *cc;
5293    
5294    if (type == OP_CHAR || !char_has_othercase(common, cc))    if (type == OP_CHAR || !char_has_othercase(common, cc))
5295      {      {
5296        read_char_max(common, c, FALSE);
5297      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5298      return cc + length;      return cc + length;
5299      }      }
5300    oc = char_othercase(common, c);    oc = char_othercase(common, c);
5301      read_char_max(common, c > oc ? c : oc, FALSE);
5302    bit = c ^ oc;    bit = c ^ oc;
5303    if (is_powerof2(bit))    if (is_powerof2(bit))
5304      {      {
# Line 4833  switch(type) Line 5306  switch(type)
5306      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5307      return cc + length;      return cc + length;
5308      }      }
5309    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);    jump[0] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c);
5310    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);    add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5311    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);    JUMPHERE(jump[0]);
   OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);  
   add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));  
5312    return cc + length;    return cc + length;
5313    
5314    case OP_NOT:    case OP_NOT:
# Line 4872  switch(type) Line 5343  switch(type)
5343  #endif /* COMPILE_PCRE8 */  #endif /* COMPILE_PCRE8 */
5344        {        {
5345        GETCHARLEN(c, cc, length);        GETCHARLEN(c, cc, length);
       read_char(common);  
5346        }        }
5347      }      }
5348    else    else
5349  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
     {  
     read_char(common);  
5350      c = *cc;      c = *cc;
     }  
5351    
5352    if (type == OP_NOT || !char_has_othercase(common, cc))    if (type == OP_NOT || !char_has_othercase(common, cc))
5353        {
5354        read_char_max(common, c, TRUE);
5355      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5356        }
5357    else    else
5358      {      {
5359      oc = char_othercase(common, c);      oc = char_othercase(common, c);
5360        read_char_max(common, c > oc ? c : oc, TRUE);
5361      bit = c ^ oc;      bit = c ^ oc;
5362      if (is_powerof2(bit))      if (is_powerof2(bit))
5363        {        {
# Line 4904  switch(type) Line 5375  switch(type)
5375    case OP_CLASS:    case OP_CLASS:
5376    case OP_NCLASS:    case OP_NCLASS:
5377    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5378    read_char(common);  
5379    if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5380      bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5381      read_char_max(common, bit, type == OP_NCLASS);
5382    #else
5383      read_char_max(common, 255, type == OP_NCLASS);
5384    #endif
5385    
5386      if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5387      return cc + 32 / sizeof(pcre_uchar);      return cc + 32 / sizeof(pcre_uchar);
5388    
5389  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5390    jump[0] = NULL;    jump[0] = NULL;
 #ifdef COMPILE_PCRE8  
   /* This check only affects 8 bit mode. In other modes, we  
   always need to compare the value with 255. */  
5391    if (common->utf)    if (common->utf)
 #endif /* COMPILE_PCRE8 */  
5392      {      {
5393      jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);      jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, bit);
5394      if (type == OP_CLASS)      if (type == OP_CLASS)
5395        {        {
5396        add_jump(compiler, backtracks, jump[0]);        add_jump(compiler, backtracks, jump[0]);
5397        jump[0] = NULL;        jump[0] = NULL;
5398        }        }
5399      }      }
5400  #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */  #elif !defined COMPILE_PCRE8
5401      jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
5402      if (type == OP_CLASS)
5403        {
5404        add_jump(compiler, backtracks, jump[0]);
5405        jump[0] = NULL;
5406        }
5407    #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5408    
5409    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5410    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5411    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5412    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5413    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5414    add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5415    
5416  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5417    if (jump[0] != NULL)    if (jump[0] != NULL)
5418      JUMPHERE(jump[0]);      JUMPHERE(jump[0]);
5419  #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */  #endif
5420    
5421    return cc + 32 / sizeof(pcre_uchar);    return cc + 32 / sizeof(pcre_uchar);
5422    
5423  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
# Line 5038  if (context.length > 0) Line 5522  if (context.length > 0)
5522  return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);  return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5523  }  }
5524    
 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)  
 {  
 DEFINE_COMPILER;  
 int offset = GET2(cc, 1) << 1;  
   
 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));  
 if (!common->jscript_compat)  
   {  
   if (backtracks == NULL)  
     {  
     /* OVECTOR(1) contains the "string begin - 1" constant. */  
     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));  
     OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);  
     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));  
     OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);  
     return JUMP(SLJIT_C_NOT_ZERO);  
     }  
   add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));  
   }  
 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));  
 }  
   
5525  /* Forward definitions. */  /* Forward definitions. */
5526  static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);  static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5527  static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);  static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
# Line 5092  static void compile_backtrackingpath(com Line 5554  static void compile_backtrackingpath(com
5554    
5555  #define BACKTRACK_AS(type) ((type *)backtrack)  #define BACKTRACK_AS(type) ((type *)backtrack)
5556    
5557  static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)  static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5558    {
5559    /* The OVECTOR offset goes to TMP2. */
5560    DEFINE_COMPILER;
5561    int count = GET2(cc, 1 + IMM2_SIZE);
5562    pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5563    unsigned int offset;
5564    jump_list *found = NULL;
5565    
5566    SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5567    
5568    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5569    
5570    count--;
5571    while (count-- > 0)
5572      {
5573      offset = GET2(slot, 0) << 1;
5574      GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5575      add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5576      slot += common->name_entry_size;
5577      }
5578    
5579    offset = GET2(slot, 0) << 1;
5580    GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5581    if (backtracks != NULL && !common->jscript_compat)
5582      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5583    
5584    set_jumps(found, LABEL());
5585    }
5586    
5587    static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5588  {  {
5589  DEFINE_COMPILER;  DEFINE_COMPILER;
5590  int offset = GET2(cc, 1) << 1;  BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5591    int offset = 0;
5592  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
5593  struct sljit_jump *partial;  struct sljit_jump *partial;
5594  struct sljit_jump *nopartial;  struct sljit_jump *nopartial;
5595    
5596  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));  if (ref)
5597  /* OVECTOR(1) contains the "string begin - 1" constant. */    {
5598  if (withchecks && !common->jscript_compat)    offset = GET2(cc, 1) << 1;
5599    add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5600      /* OVECTOR(1) contains the "string begin - 1" constant. */
5601      if (withchecks && !common->jscript_compat)
5602        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5603      }
5604    else
5605      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5606    
5607  #if defined SUPPORT_UTF && defined SUPPORT_UCP  #if defined SUPPORT_UTF && defined SUPPORT_UCP
5608  if (common->utf && *cc == OP_REFI)  if (common->utf && *cc == OP_REFI)
5609    {    {
5610    SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);    SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5611    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));    if (ref)
5612        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5613      else
5614        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5615    
5616    if (withchecks)    if (withchecks)
5617      jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);      jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5618    
# Line 5134  if (common->utf && *cc == OP_REFI) Line 5637  if (common->utf && *cc == OP_REFI)
5637  else  else
5638  #endif /* SUPPORT_UTF && SUPPORT_UCP */  #endif /* SUPPORT_UTF && SUPPORT_UCP */
5639    {    {
5640    OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);    if (ref)
5641        OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5642      else
5643        OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
5644    
5645    if (withchecks)    if (withchecks)
5646      jump = JUMP(SLJIT_C_ZERO);      jump = JUMP(SLJIT_C_ZERO);
5647    
# Line 5171  if (jump != NULL) Line 5678  if (jump != NULL)
5678    else    else
5679      JUMPHERE(jump);      JUMPHERE(jump);
5680    }    }
 return cc + 1 + IMM2_SIZE;  
5681  }  }
5682    
5683  static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)  static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5684  {  {
5685  DEFINE_COMPILER;  DEFINE_COMPILER;
5686    BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5687  backtrack_common *backtrack;  backtrack_common *backtrack;
5688  pcre_uchar type;  pcre_uchar type;
5689    int offset = 0;
5690  struct sljit_label *label;  struct sljit_label *label;
5691  struct sljit_jump *zerolength;  struct sljit_jump *zerolength;
5692  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
# Line 5188  BOOL minimize; Line 5696  BOOL minimize;
5696    
5697  PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);  PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5698    
5699    if (ref)
5700      offset = GET2(cc, 1) << 1;
5701    else
5702      cc += IMM2_SIZE;
5703  type = cc[1 + IMM2_SIZE];  type = cc[1 + IMM2_SIZE];
5704    
5705    SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
5706  minimize = (type & 0x1) != 0;  minimize = (type & 0x1) != 0;
5707  switch(type)  switch(type)
5708    {    {
# Line 5226  if (!minimize) Line 5740  if (!minimize)
5740    if (min == 0)    if (min == 0)
5741      {      {
5742      allocate_stack(common, 2);      allocate_stack(common, 2);
5743        if (ref)
5744          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5745      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5746      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5747      /* Temporary release of STR_PTR. */      /* Temporary release of STR_PTR. */
5748      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5749      zerolength = compile_ref_checks(common, ccbegin, NULL);      /* Handles both invalid and empty cases. Since the minimum repeat,
5750        is zero the invalid case is basically the same as an empty case. */
5751        if (ref)
5752          zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5753        else
5754          {
5755          compile_dnref_search(common, ccbegin, NULL);
5756          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5757          OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5758          zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5759          }
5760      /* Restore if not zero length. */      /* Restore if not zero length. */
5761      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5762      }      }
5763    else    else
5764      {      {
5765      allocate_stack(common, 1);      allocate_stack(common, 1);
5766        if (ref)
5767          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5768      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5769      zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);      if (ref)
5770          {
5771          add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5772          zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5773          }
5774        else
5775          {
5776          compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5777          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5778          OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5779          zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5780          }
5781      }      }
5782    
5783    if (min > 1 || max > 1)    if (min > 1 || max > 1)
5784      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5785    
5786    label = LABEL();    label = LABEL();
5787      if (!ref)
5788        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5789    compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);    compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5790    
5791    if (min > 1 || max > 1)    if (min > 1 || max > 1)
# Line 5279  if (!minimize) Line 5820  if (!minimize)
5820    return cc;    return cc;
5821    }    }
5822    
5823  allocate_stack(common, 2);  allocate_stack(common, ref ? 2 : 3);
5824    if (ref)
5825      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5826  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5827  if (type != OP_CRMINSTAR)  if (type != OP_CRMINSTAR)
5828    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5829    
5830  if (min == 0)  if (min == 0)
5831    {    {
5832    zerolength = compile_ref_checks(common, ccbegin, NULL);    /* Handles both invalid and empty cases. Since the minimum repeat,
5833      is zero the invalid case is basically the same as an empty case. */
5834      if (ref)
5835        zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5836      else
5837        {
5838        compile_dnref_search(common, ccbegin, NULL);
5839        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5840        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5841        zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5842        }
5843      /* Length is non-zero, we can match real repeats. */
5844    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5845    jump = JUMP(SLJIT_JUMP);    jump = JUMP(SLJIT_JUMP);
5846    }    }
5847  else  else
5848    zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);    {
5849      if (ref)
5850        {
5851        add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5852        zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5853        }
5854      else
5855        {
5856        compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5857        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5858        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5859        zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5860        }
5861      }
5862    
5863  BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();  BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5864  if (max > 0)  if (max > 0)
5865    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5866    
5867    if (!ref)
5868      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
5869  compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);  compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5870  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5871    
# Line 6362  if (opcode == OP_COND || opcode == OP_SC Line 6931  if (opcode == OP_COND || opcode == OP_SC
6931          i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);          i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6932          while (stacksize > 0)          while (stacksize > 0)
6933            {            {
6934            if (GET2(slot, 0) == i)            if ((int)GET2(slot, 0) == i)
6935              break;              break;
6936            slot += common->name_entry_size;            slot += common->name_entry_size;
6937            stacksize--;            stacksize--;
# Line 6825  count_match(common); Line 7394  count_match(common);
7394  return cc + 1 + LINK_SIZE;  return cc + 1 + LINK_SIZE;
7395  }  }
7396    
7397  static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)  static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *max, int *min, pcre_uchar **end)
7398  {  {
7399  int class_len;  int class_len;
7400    
# Line 6861  else if (*opcode >= OP_TYPESTAR && *opco Line 7430  else if (*opcode >= OP_TYPESTAR && *opco
7430    }    }
7431  else  else
7432    {    {
7433    SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);    SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
7434    *type = *opcode;    *type = *opcode;
7435    cc++;    cc++;
7436    class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);    class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
# Line 6872  else Line 7441  else
7441      if (end != NULL)      if (end != NULL)
7442        *end = cc + class_len;        *end = cc + class_len;
7443      }      }
7444      else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
7445        {
7446        *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
7447        if (end != NULL)
7448          *end = cc + class_len;
7449        }
7450    else    else
7451      {      {
7452      SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);      SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
7453      *arg1 = GET2(cc, (class_len + IMM2_SIZE));      *max = GET2(cc, (class_len + IMM2_SIZE));
7454      *arg2 = GET2(cc, class_len);      *min = GET2(cc, class_len);
7455    
7456      if (*arg2 == 0)      if (*min == 0)
7457        {        {
7458        SLJIT_ASSERT(*arg1 != 0);        SLJIT_ASSERT(*max != 0);
7459        *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;        *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : (*opcode == OP_CRMINRANGE ? OP_MINUPTO : OP_POSUPTO);
7460        }        }
7461      if (*arg1 == *arg2)      if (*max == *min)
7462        *opcode = OP_EXACT;        *opcode = OP_EXACT;
7463    
7464      if (end != NULL)      if (end != NULL)
# Line 6894  else Line 7469  else
7469    
7470  if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)  if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
7471    {    {
7472    *arg1 = GET2(cc, 0);    *max = GET2(cc, 0);
7473    cc += IMM2_SIZE;    cc += IMM2_SIZE;
7474    }    }
7475    
# Line 6923  DEFINE_COMPILER; Line 7498  DEFINE_COMPILER;
7498  backtrack_common *backtrack;  backtrack_common *backtrack;
7499  pcre_uchar opcode;  pcre_uchar opcode;
7500  pcre_uchar type;  pcre_uchar type;
7501  int arg1 = -1, arg2 = -1;  int max = -1, min = -1;
7502  pcre_uchar* end;  pcre_uchar* end;
7503  jump_list *nomatch = NULL;  jump_list *nomatch = NULL;
7504  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
# Line 6936  int tmp_base, tmp_offset; Line 7511  int tmp_base, tmp_offset;
7511    
7512  PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);  PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
7513    
7514  cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);  cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, &end);
7515    
7516  switch(type)  switch(type)
7517    {    {
# Line 7007  switch(opcode) Line 7582  switch(opcode)
7582        {        {
7583        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
7584        OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);        OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7585        if (opcode == OP_CRRANGE && arg2 > 0)        if (opcode == OP_CRRANGE && min > 0)
7586          CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);          CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
7587        if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))        if (opcode == OP_UPTO || (opcode == OP_CRRANGE && max > 0))
7588          jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);          jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
7589        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
7590        }        }
7591    
# Line 7037  switch(opcode) Line 7612  switch(opcode)
7612      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7613      if (opcode <= OP_PLUS)      if (opcode <= OP_PLUS)
7614        JUMPTO(SLJIT_JUMP, label);        JUMPTO(SLJIT_JUMP, label);
7615      else if (opcode == OP_CRRANGE && arg1 == 0)      else if (opcode == OP_CRRANGE && max == 0)
7616        {        {
7617        OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);        OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
7618        JUMPTO(SLJIT_JUMP, label);        JUMPTO(SLJIT_JUMP, label);
# Line 7047  switch(opcode) Line 7622  switch(opcode)
7622        OP1(SLJIT_MOV, TMP1, 0, base, offset1);        OP1(SLJIT_MOV, TMP1, 0, base, offset1);
7623        OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);        OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7624        OP1(SLJIT_MOV, base, offset1, TMP1, 0);        OP1(SLJIT_MOV, base, offset1, TMP1, 0);
7625        CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);        CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, max + 1, label);
7626        }        }
7627      set_jumps(nomatch, LABEL());      set_jumps(nomatch, LABEL());
7628      if (opcode == OP_CRRANGE)      if (opcode == OP_CRRANGE)
7629        add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1));        add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, min + 1));
7630      OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);      OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
7631      }      }
7632    BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();    BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
# Line 7089  switch(opcode) Line 7664  switch(opcode)
7664    break;    break;
7665    
7666    case OP_EXACT:    case OP_EXACT:
7667    OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, arg1);    OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
7668    label = LABEL();    label = LABEL();
7669    compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);    compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7670    OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);    OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
# Line 7102  switch(opcode) Line 7677  switch(opcode)
7677    if (opcode == OP_POSPLUS)    if (opcode == OP_POSPLUS)
7678      compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);      compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7679    if (opcode == OP_POSUPTO)    if (opcode == OP_POSUPTO)
7680      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, arg1);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, max);
7681    OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);    OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
7682    label = LABEL();    label = LABEL();
7683    compile_char1_matchingpath(common, type, cc, &nomatch);    compile_char1_matchingpath(common, type, cc, &nomatch);
# Line 7126  switch(opcode) Line 7701  switch(opcode)
7701    OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);    OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
7702    break;    break;
7703    
7704      case OP_CRPOSRANGE:
7705      /* Combination of OP_EXACT and OP_POSSTAR or OP_POSUPTO */
7706      OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, min);
7707      label = LABEL();
7708      compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7709      OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
7710      JUMPTO(SLJIT_C_NOT_ZERO, label);
7711    
7712      if (max != 0)
7713        {
7714        SLJIT_ASSERT(max - min > 0);
7715        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, max - min);
7716        }
7717      OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
7718      label = LABEL();
7719      compile_char1_matchingpath(common, type, cc, &nomatch);
7720      OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
7721      if (max == 0)
7722        JUMPTO(SLJIT_JUMP, label);
7723      else
7724        {
7725        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
7726        JUMPTO(SLJIT_C_NOT_ZERO, label);
7727        }
7728      set_jumps(nomatch, LABEL());
7729      OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
7730      break;
7731    
7732    default:    default:
7733    SLJIT_ASSERT_STOP();    SLJIT_ASSERT_STOP();
7734    break;    break;
# Line 7403  while (cc < ccend) Line 8006  while (cc < ccend)
8006    
8007      case OP_CLASS:      case OP_CLASS:
8008      case OP_NCLASS:      case OP_NCLASS:
8009      if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)      if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE)
8010        cc = compile_iterator_matchingpath(common, cc, parent);        cc = compile_iterator_matchingpath(common, cc, parent);
8011      else      else
8012        cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);        cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
# Line 7411  while (cc < ccend) Line 8014  while (cc < ccend)
8014    
8015  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
8016      case OP_XCLASS:      case OP_XCLASS:
8017      if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)      if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
8018        cc = compile_iterator_matchingpath(common, cc, parent);        cc = compile_iterator_matchingpath(common, cc, parent);
8019      else      else
8020        cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);        cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
# Line 7420  while (cc < ccend) Line 8023  while (cc < ccend)
8023    
8024      case OP_REF:      case OP_REF:
8025      case OP_REFI:      case OP_REFI:
8026      if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)      if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
8027          cc = compile_ref_iterator_matchingpath(common, cc, parent);
8028        else
8029          {
8030          compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
8031          cc += 1 + IMM2_SIZE;
8032          }
8033        break;
8034    
8035        case OP_DNREF:
8036        case OP_DNREFI:
8037        if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
8038        cc = compile_ref_iterator_matchingpath(common, cc, parent);        cc = compile_ref_iterator_matchingpath(common, cc, parent);
8039      else      else
8040        cc = compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);        {
8041          compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8042          compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
8043          cc += 1 + 2 * IMM2_SIZE;
8044          }
8045      break;      break;
8046    
8047      case OP_RECURSE:      case OP_RECURSE:
# Line 7576  DEFINE_COMPILER; Line 8194  DEFINE_COMPILER;
8194  pcre_uchar *cc = current->cc;  pcre_uchar *cc = current->cc;
8195  pcre_uchar opcode;  pcre_uchar opcode;
8196  pcre_uchar type;  pcre_uchar type;
8197  int arg1 = -1, arg2 = -1;  int max = -1, min = -1;
8198  struct sljit_label *label = NULL;  struct sljit_label *label = NULL;
8199  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
8200  jump_list *jumplist = NULL;  jump_list *jumplist = NULL;
# Line 7585  int base = (private_data_ptr == 0) ? SLJ Line 8203  int base = (private_data_ptr == 0) ? SLJ
8203  int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;  int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
8204  int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);  int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
8205    
8206  cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL);  cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, NULL);
8207    
8208  switch(opcode)  switch(opcode)
8209    {    {
# Line 7604  switch(opcode) Line 8222  switch(opcode)
8222    else    else
8223      {      {
8224      if (opcode == OP_UPTO)      if (opcode == OP_UPTO)
8225        arg2 = 0;        min = 0;
8226      if (opcode <= OP_PLUS)      if (opcode <= OP_PLUS)
8227        {        {
8228        OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);        OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
# Line 7614  switch(opcode) Line 8232  switch(opcode)
8232        {        {
8233        OP1(SLJIT_MOV, TMP1, 0, base, offset1);        OP1(SLJIT_MOV, TMP1, 0, base, offset1);
8234        OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);        OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8235        jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, arg2 + 1);        jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, min + 1);
8236        OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1);        OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1);
8237        }        }
8238      skip_char_back(common);      skip_char_back(common);
# Line 7659  switch(opcode) Line 8277  switch(opcode)
8277    OP1(SLJIT_MOV, base, offset1, TMP1, 0);    OP1(SLJIT_MOV, base, offset1, TMP1, 0);
8278    
8279    if (opcode == OP_CRMINRANGE)    if (opcode == OP_CRMINRANGE)
8280      CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label);      CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min + 1, label);
8281    
8282    if (opcode == OP_CRMINRANGE && arg1 == 0)    if (opcode == OP_CRMINRANGE && max == 0)
8283      JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);      JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8284    else    else
8285      CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_backtrack)->matchingpath);      CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, max + 2, CURRENT_AS(iterator_backtrack)->matchingpath);
8286    
8287    set_jumps(jumplist, LABEL());    set_jumps(jumplist, LABEL());
8288    if (private_data_ptr == 0)    if (private_data_ptr == 0)
# Line 7699  switch(opcode) Line 8317  switch(opcode)
8317    
8318    case OP_EXACT:    case OP_EXACT:
8319    case OP_POSPLUS:    case OP_POSPLUS:
8320      case OP_CRPOSRANGE:
8321    set_jumps(current->topbacktracks, LABEL());    set_jumps(current->topbacktracks, LABEL());
8322    break;    break;
8323    
# Line 7717  static SLJIT_INLINE void compile_ref_ite Line 8336  static SLJIT_INLINE void compile_ref_ite
8336  {  {
8337  DEFINE_COMPILER;  DEFINE_COMPILER;
8338  pcre_uchar *cc = current->cc;  pcre_uchar *cc = current->cc;
8339    BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
8340  pcre_uchar type;  pcre_uchar type;
8341    
8342  type = cc[1 + IMM2_SIZE];  type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
8343    
8344  if ((type & 0x1) == 0)  if ((type & 0x1) == 0)
8345    {    {
8346      /* Maximize case. */
8347    set_jumps(current->topbacktracks, LABEL());    set_jumps(current->topbacktracks, LABEL());
8348    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8349    free_stack(common, 1);    free_stack(common, 1);
# Line 7732  if ((type & 0x1) == 0) Line 8354  if ((type & 0x1) == 0)
8354  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8355  CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);  CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8356  set_jumps(current->topbacktracks, LABEL());  set_jumps(current->topbacktracks, LABEL());
8357  free_stack(common, 2);  free_stack(common, ref ? 2 : 3);
8358  }  }
8359    
8360  static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)  static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
# Line 8533  while (current) Line 9155  while (current)
9155    
9156      case OP_REF:      case OP_REF:
9157      case OP_REFI:      case OP_REFI:
9158        case OP_DNREF:
9159        case OP_DNREFI:
9160      compile_ref_iterator_backtrackingpath(common, current);      compile_ref_iterator_backtrackingpath(common, current);
9161      break;      break;
9162    
# Line 8812  switch(re->options & PCRE_NEWLINE_BITS) Line 9436  switch(re->options & PCRE_NEWLINE_BITS)
9436    case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;    case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
9437    default: return;    default: return;
9438    }    }
9439    common->nlmax = READ_CHAR_ANY;
9440  if ((re->options & PCRE_BSR_ANYCRLF) != 0)  if ((re->options & PCRE_BSR_ANYCRLF) != 0)
9441    common->bsr_nltype = NLTYPE_ANYCRLF;    common->bsr_nltype = NLTYPE_ANYCRLF;
9442  else if ((re->options & PCRE_BSR_UNICODE) != 0)  else if ((re->options & PCRE_BSR_UNICODE) != 0)
# Line 8824  else Line 9449  else
9449    common->bsr_nltype = NLTYPE_ANY;    common->bsr_nltype = NLTYPE_ANY;
9450  #endif  #endif
9451    }    }
9452    common->bsr_nlmax = READ_CHAR_ANY;
9453  common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
9454  common->ctypes = (sljit_sw)(tables + ctypes_offset);  common->ctypes = (sljit_sw)(tables + ctypes_offset);
 common->digits[0] = -2;  
9455  common->name_table = ((pcre_uchar *)re) + re->name_table_offset;  common->name_table = ((pcre_uchar *)re) + re->name_table_offset;
9456  common->name_count = re->name_count;  common->name_count = re->name_count;
9457  common->name_entry_size = re->name_entry_size;  common->name_entry_size = re->name_entry_size;
# Line 8837  common->utf = (re->options & PCRE_UTF8) Line 9462  common->utf = (re->options & PCRE_UTF8)
9462  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
9463  common->use_ucp = (re->options & PCRE_UCP) != 0;  common->use_ucp = (re->options & PCRE_UCP) != 0;
9464  #endif  #endif
9465    if (common->utf)
9466      {
9467      if (common->nltype == NLTYPE_ANY)
9468        common->nlmax = 0x2029;
9469      else if (common->nltype == NLTYPE_ANYCRLF)
9470        common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
9471      else
9472        {
9473        /* We only care about the first newline character. */
9474        common->nlmax = common->newline & 0xff;
9475        }
9476    
9477      if (common->bsr_nltype == NLTYPE_ANY)
9478        common->bsr_nlmax = 0x2029;
9479      else
9480        common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
9481      }
9482  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
9483  ccend = bracketend(rootbacktrack.cc);  ccend = bracketend(rootbacktrack.cc);
9484    
# Line 9000  if ((re->options & PCRE_ANCHORED) == 0) Line 9642  if ((re->options & PCRE_ANCHORED) == 0)
9642      else if ((re->flags & PCRE_STARTLINE) != 0)      else if ((re->flags & PCRE_STARTLINE) != 0)
9643        fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);        fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
9644      else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)      else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
9645        fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);        fast_forward_start_bits(common, study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
9646      }      }
9647    }    }
9648  else  else
# Line 9238  if (common->reset_match != NULL) Line 9880  if (common->reset_match != NULL)
9880    JUMPTO(SLJIT_JUMP, reset_match_label);    JUMPTO(SLJIT_JUMP, reset_match_label);
9881    }    }
9882  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
9883  #ifndef COMPILE_PCRE32  #ifdef COMPILE_PCRE8
9884  if (common->utfreadchar != NULL)  if (common->utfreadchar != NULL)
9885    {    {
9886    set_jumps(common->utfreadchar, LABEL());    set_jumps(common->utfreadchar, LABEL());
9887    do_utfreadchar(common);    do_utfreadchar(common);
9888    }    }
9889  #endif /* !COMPILE_PCRE32 */  if (common->utfreadchar16 != NULL)
9890  #ifdef COMPILE_PCRE8    {
9891      set_jumps(common->utfreadchar16, LABEL());
9892      do_utfreadchar16(common);
9893      }
9894  if (common->utfreadtype8 != NULL)  if (common->utfreadtype8 != NULL)
9895    {    {
9896    set_jumps(common->utfreadtype8, LABEL());    set_jumps(common->utfreadtype8, LABEL());

Legend:
Removed from v.1367  
changed lines
  Added in v.1423

  ViewVC Help
Powered by ViewVC 1.1.5