/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1379 by ph10, Mon Oct 14 13:54:07 2013 UTC revision 1452 by zherczeg, Tue Jan 28 16:07:52 2014 UTC
# Line 179  typedef struct jit_arguments { Line 179  typedef struct jit_arguments {
179    
180  typedef struct executable_functions {  typedef struct executable_functions {
181    void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];    void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182      sljit_uw *read_only_data[JIT_NUMBER_OF_COMPILE_MODES];
183      sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184    PUBL(jit_callback) callback;    PUBL(jit_callback) callback;
185    void *userdata;    void *userdata;
186    pcre_uint32 top_bracket;    pcre_uint32 top_bracket;
187    pcre_uint32 limit_match;    pcre_uint32 limit_match;
   sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];  
188  } executable_functions;  } executable_functions;
189    
190  typedef struct jump_list {  typedef struct jump_list {
# Line 197  typedef struct stub_list { Line 198  typedef struct stub_list {
198    struct stub_list *next;    struct stub_list *next;
199  } stub_list;  } stub_list;
200    
201    typedef struct label_addr_list {
202      struct sljit_label *label;
203      sljit_uw *addr;
204      struct label_addr_list *next;
205    } label_addr_list;
206    
207  enum frame_types {  enum frame_types {
208    no_frame = -1,    no_frame = -1,
209    no_stack = -2    no_stack = -2
# Line 306  typedef struct then_trap_backtrack { Line 313  typedef struct then_trap_backtrack {
313    int framesize;    int framesize;
314  } then_trap_backtrack;  } then_trap_backtrack;
315    
316  #define MAX_RANGE_SIZE 6  #define MAX_RANGE_SIZE 4
317    
318  typedef struct compiler_common {  typedef struct compiler_common {
319    /* The sljit ceneric compiler. */    /* The sljit ceneric compiler. */
# Line 315  typedef struct compiler_common { Line 322  typedef struct compiler_common {
322    pcre_uchar *start;    pcre_uchar *start;
323    /* Maps private data offset to each opcode. */    /* Maps private data offset to each opcode. */
324    sljit_si *private_data_ptrs;    sljit_si *private_data_ptrs;
325      /* This read-only data is available during runtime. */
326      sljit_uw *read_only_data;
327      /* The total size of the read-only data. */
328      sljit_uw read_only_data_size;
329      /* The next free entry of the read_only_data. */
330      sljit_uw *read_only_data_ptr;
331    /* Tells whether the capturing bracket is optimized. */    /* Tells whether the capturing bracket is optimized. */
332    pcre_uint8 *optimized_cbracket;    pcre_uint8 *optimized_cbracket;
333    /* Tells whether the starting offset is a target of then. */    /* Tells whether the starting offset is a target of then. */
# Line 349  typedef struct compiler_common { Line 362  typedef struct compiler_common {
362    sljit_sw lcc;    sljit_sw lcc;
363    /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */    /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
364    int mode;    int mode;
365      /* TRUE, when minlength is greater than 0. */
366      BOOL might_be_empty;
367    /* \K is found in the pattern. */    /* \K is found in the pattern. */
368    BOOL has_set_som;    BOOL has_set_som;
369    /* (*SKIP:arg) is found in the pattern. */    /* (*SKIP:arg) is found in the pattern. */
# Line 363  typedef struct compiler_common { Line 378  typedef struct compiler_common {
378    BOOL positive_assert;    BOOL positive_assert;
379    /* Newline control. */    /* Newline control. */
380    int nltype;    int nltype;
381      pcre_uint32 nlmax;
382      pcre_uint32 nlmin;
383    int newline;    int newline;
384    int bsr_nltype;    int bsr_nltype;
385      pcre_uint32 bsr_nlmax;
386      pcre_uint32 bsr_nlmin;
387    /* Dollar endonly. */    /* Dollar endonly. */
388    int endonly;    int endonly;
389    /* Tables. */    /* Tables. */
390    sljit_sw ctypes;    sljit_sw ctypes;
   int digits[2 + MAX_RANGE_SIZE];  
391    /* Named capturing brackets. */    /* Named capturing brackets. */
392    pcre_uchar *name_table;    pcre_uchar *name_table;
393    sljit_sw name_count;    sljit_sw name_count;
# Line 380  typedef struct compiler_common { Line 398  typedef struct compiler_common {
398    struct sljit_label *quit_label;    struct sljit_label *quit_label;
399    struct sljit_label *forced_quit_label;    struct sljit_label *forced_quit_label;
400    struct sljit_label *accept_label;    struct sljit_label *accept_label;
401      struct sljit_label *ff_newline_shortcut;
402    stub_list *stubs;    stub_list *stubs;
403      label_addr_list *label_addrs;
404    recurse_entry *entries;    recurse_entry *entries;
405    recurse_entry *currententry;    recurse_entry *currententry;
406    jump_list *partialmatch;    jump_list *partialmatch;
# Line 404  typedef struct compiler_common { Line 424  typedef struct compiler_common {
424  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
425    BOOL use_ucp;    BOOL use_ucp;
426  #endif  #endif
 #ifndef COMPILE_PCRE32  
   jump_list *utfreadchar;  
 #endif  
427  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
428      jump_list *utfreadchar;
429      jump_list *utfreadchar16;
430    jump_list *utfreadtype8;    jump_list *utfreadtype8;
431  #endif  #endif
432  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
# Line 524  the start pointers when the end of the c Line 543  the start pointers when the end of the c
543  #define GET_LOCAL_BASE(dst, dstw, offset) \  #define GET_LOCAL_BASE(dst, dstw, offset) \
544    sljit_get_local_base(compiler, (dst), (dstw), (offset))    sljit_get_local_base(compiler, (dst), (dstw), (offset))
545    
546    #define READ_CHAR_MAX 0x7fffffff
547    
548  static pcre_uchar* bracketend(pcre_uchar* cc)  static pcre_uchar* bracketend(pcre_uchar* cc)
549  {  {
550  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
# Line 533  cc += 1 + LINK_SIZE; Line 554  cc += 1 + LINK_SIZE;
554  return cc;  return cc;
555  }  }
556    
557    static int no_alternatives(pcre_uchar* cc)
558    {
559    int count = 0;
560    SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
561    do
562      {
563      cc += GET(cc, 1);
564      count++;
565      }
566    while (*cc == OP_ALT);
567    SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
568    return count;
569    }
570    
571    static int ones_in_half_byte[16] = {
572      /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
573      /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
574    };
575    
576  /* Functions whose might need modification for all new supported opcodes:  /* Functions whose might need modification for all new supported opcodes:
577   next_opcode   next_opcode
578   check_opcode_types   check_opcode_types
# Line 752  while (cc < ccend) Line 792  while (cc < ccend)
792      {      {
793      case OP_SET_SOM:      case OP_SET_SOM:
794      common->has_set_som = TRUE;      common->has_set_som = TRUE;
795        common->might_be_empty = TRUE;
796      cc += 1;      cc += 1;
797      break;      break;
798    
# Line 761  while (cc < ccend) Line 802  while (cc < ccend)
802      cc += 1 + IMM2_SIZE;      cc += 1 + IMM2_SIZE;
803      break;      break;
804    
805        case OP_BRA:
806        case OP_CBRA:
807        case OP_SBRA:
808        case OP_SCBRA:
809        count = no_alternatives(cc);
810        if (count > 4)
811          common->read_only_data_size += count * sizeof(sljit_uw);
812        cc += 1 + LINK_SIZE + (*cc == OP_CBRA || *cc == OP_SCBRA ? IMM2_SIZE : 0);
813        break;
814    
815      case OP_CBRAPOS:      case OP_CBRAPOS:
816      case OP_SCBRAPOS:      case OP_SCBRAPOS:
817      common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;      common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
# Line 2019  while (list_item) Line 2070  while (list_item)
2070  common->stubs = NULL;  common->stubs = NULL;
2071  }  }
2072    
2073    static void add_label_addr(compiler_common *common)
2074    {
2075    DEFINE_COMPILER;
2076    label_addr_list *label_addr;
2077    
2078    label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2079    if (label_addr == NULL)
2080      return;
2081    label_addr->label = LABEL();
2082    label_addr->addr = common->read_only_data_ptr;
2083    label_addr->next = common->label_addrs;
2084    common->label_addrs = label_addr;
2085    common->read_only_data_ptr++;
2086    }
2087    
2088  static SLJIT_INLINE void count_match(compiler_common *common)  static SLJIT_INLINE void count_match(compiler_common *common)
2089  {  {
2090  DEFINE_COMPILER;  DEFINE_COMPILER;
# Line 2457  else Line 2523  else
2523  JUMPHERE(jump);  JUMPHERE(jump);
2524  }  }
2525    
2526  static void read_char(compiler_common *common)  static void peek_char(compiler_common *common, pcre_uint32 max)
2527  {  {
2528  /* Reads the character into TMP1, updates STR_PTR.  /* Reads the character into TMP1, keeps STR_PTR.
2529  Does not check STR_END. TMP2 Destroyed. */  Does not check STR_END. TMP2 Destroyed. */
2530  DEFINE_COMPILER;  DEFINE_COMPILER;
2531  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2532  struct sljit_jump *jump;  struct sljit_jump *jump;
2533  #endif  #endif
2534    
2535    SLJIT_UNUSED_ARG(max);
2536    
2537  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2538  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2539  if (common->utf)  if (common->utf)
2540    {    {
2541  #if defined COMPILE_PCRE8    if (max < 128) return;
2542    
2543    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2544  #elif defined COMPILE_PCRE16    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
   jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);  
 #endif /* COMPILE_PCRE[8|16] */  
2545    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2546      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2547    JUMPHERE(jump);    JUMPHERE(jump);
2548    }    }
2549  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2550    
2551    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2552    if (common->utf)
2553      {
2554      if (max < 0xd800) return;
2555    
2556      OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2557      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2558      /* TMP2 contains the high surrogate. */
2559      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2560      OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2561      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2562      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2563      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2564      JUMPHERE(jump);
2565      }
2566    #endif
2567    }
2568    
2569    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2570    
2571    static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
2572    {
2573    /* Tells whether the character codes below 128 are enough
2574    to determine a match. */
2575    const pcre_uint8 value = nclass ? 0xff : 0;
2576    const pcre_uint8* end = bitset + 32;
2577    
2578    bitset += 16;
2579    do
2580      {
2581      if (*bitset++ != value)
2582        return FALSE;
2583      }
2584    while (bitset < end);
2585    return TRUE;
2586    }
2587    
2588    static void read_char7_type(compiler_common *common, BOOL full_read)
2589    {
2590    /* Reads the precise character type of a character into TMP1, if the character
2591    is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2592    full_read argument tells whether characters above max are accepted or not. */
2593    DEFINE_COMPILER;
2594    struct sljit_jump *jump;
2595    
2596    SLJIT_ASSERT(common->utf);
2597    
2598    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2599  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2600    
2601    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2602    
2603    if (full_read)
2604      {
2605      jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2606      OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2607      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2608      JUMPHERE(jump);
2609      }
2610  }  }
2611    
2612  static void peek_char(compiler_common *common)  #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2613    
2614    static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2615  {  {
2616  /* Reads the character into TMP1, keeps STR_PTR.  /* Reads the precise value of a character into TMP1, if the character is
2617  Does not check STR_END. TMP2 Destroyed. */  between min and max (c >= min && c <= max). Otherwise it returns with a value
2618    outside the range. Does not check STR_END. */
2619  DEFINE_COMPILER;  DEFINE_COMPILER;
2620  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2621  struct sljit_jump *jump;  struct sljit_jump *jump;
2622  #endif  #endif
2623    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2624    struct sljit_jump *jump2;
2625    #endif
2626    
2627  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  SLJIT_UNUSED_ARG(update_str_ptr);
2628  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  SLJIT_UNUSED_ARG(min);
2629    SLJIT_UNUSED_ARG(max);
2630    SLJIT_ASSERT(min <= max);
2631    
2632    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2633    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2634    
2635    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2636  if (common->utf)  if (common->utf)
2637    {    {
2638  #if defined COMPILE_PCRE8    if (max < 128 && !update_str_ptr) return;
2639    
2640    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2641  #elif defined COMPILE_PCRE16    if (min >= 0x10000)
2642    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);      {
2643  #endif /* COMPILE_PCRE[8|16] */      OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2644    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));      if (update_str_ptr)
2645    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);        OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2646        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2647        jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2648        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2649        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2650        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2651        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2652        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2653        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2654        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2655        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2656        if (!update_str_ptr)
2657          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2658        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2659        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2660        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2661        JUMPHERE(jump2);
2662        if (update_str_ptr)
2663          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2664        }
2665      else if (min >= 0x800 && max <= 0xffff)
2666        {
2667        OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2668        if (update_str_ptr)
2669          OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2670        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2671        jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2672        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2673        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2674        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2675        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2676        if (!update_str_ptr)
2677          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2678        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2679        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2680        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2681        JUMPHERE(jump2);
2682        if (update_str_ptr)
2683          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2684        }
2685      else if (max >= 0x800)
2686        add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2687      else if (max < 128)
2688        {
2689        OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2690        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2691        }
2692      else
2693        {
2694        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2695        if (!update_str_ptr)
2696          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2697        else
2698          OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2699        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2700        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2701        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2702        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2703        if (update_str_ptr)
2704          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2705        }
2706    JUMPHERE(jump);    JUMPHERE(jump);
2707    }    }
2708  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */  #endif
2709    
2710    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2711    if (common->utf)
2712      {
2713      if (max >= 0x10000)
2714        {
2715        OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2716        jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2717        /* TMP2 contains the high surrogate. */
2718        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2719        OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2720        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2721        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2722        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2723        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2724        JUMPHERE(jump);
2725        return;
2726        }
2727    
2728      if (max < 0xd800 && !update_str_ptr) return;
2729    
2730      /* Skip low surrogate if necessary. */
2731      OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2732      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2733      if (update_str_ptr)
2734        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2735      if (max >= 0xd800)
2736        OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2737      JUMPHERE(jump);
2738      }
2739    #endif
2740    }
2741    
2742    static SLJIT_INLINE void read_char(compiler_common *common)
2743    {
2744    read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2745  }  }
2746    
2747  static void read_char8_type(compiler_common *common)  static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2748  {  {
2749  /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */  /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2750  DEFINE_COMPILER;  DEFINE_COMPILER;
2751  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2752  struct sljit_jump *jump;  struct sljit_jump *jump;
2753  #endif  #endif
2754    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2755    struct sljit_jump *jump2;
2756    #endif
2757    
2758  #ifdef SUPPORT_UTF  SLJIT_UNUSED_ARG(update_str_ptr);
2759    
2760    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2761    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2762    
2763    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2764  if (common->utf)  if (common->utf)
2765    {    {
   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);  
   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  
 #if defined COMPILE_PCRE8  
2766    /* This can be an extra read in some situations, but hopefully    /* This can be an extra read in some situations, but hopefully
2767    it is needed in most cases. */    it is needed in most cases. */
2768    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2769    jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2770    add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));    if (!update_str_ptr)
2771    JUMPHERE(jump);      {
2772  #elif defined COMPILE_PCRE16      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2773    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2774    jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);      OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2775    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2776    JUMPHERE(jump);      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2777    /* Skip low surrogate if necessary. */      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2778    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);      OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2779    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);      jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2780    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2781    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);      JUMPHERE(jump2);
2782    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);      }
2783  #elif defined COMPILE_PCRE32    else
2784    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);      add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
   jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  
   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  
2785    JUMPHERE(jump);    JUMPHERE(jump);
 #endif /* COMPILE_PCRE[8|16|32] */  
2786    return;    return;
2787    }    }
2788  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2789  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);  
2790  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  #if !defined COMPILE_PCRE8
 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32  
2791  /* The ctypes array contains only 256 values. */  /* The ctypes array contains only 256 values. */
2792  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2793  jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2794  #endif  #endif
2795  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2796  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if !defined COMPILE_PCRE8
2797  JUMPHERE(jump);  JUMPHERE(jump);
2798  #endif  #endif
2799    
2800    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2801    if (common->utf && update_str_ptr)
2802      {
2803      /* Skip low surrogate if necessary. */
2804      OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2805      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2806      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2807      JUMPHERE(jump);
2808      }
2809    #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2810  }  }
2811    
2812  static void skip_char_back(compiler_common *common)  static void skip_char_back(compiler_common *common)
# Line 2595  if (common->utf) Line 2844  if (common->utf)
2844  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2845  }  }
2846    
2847  static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)  static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2848  {  {
2849  /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */  /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2850  DEFINE_COMPILER;  DEFINE_COMPILER;
2851    struct sljit_jump *jump;
2852    
2853  if (nltype == NLTYPE_ANY)  if (nltype == NLTYPE_ANY)
2854    {    {
2855    add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2856    add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2857    }    }
2858  else if (nltype == NLTYPE_ANYCRLF)  else if (nltype == NLTYPE_ANYCRLF)
2859    {    {
2860    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);    if (jumpifmatch)
2861    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);      {
2862    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2863    OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2864    add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));      }
2865      else
2866        {
2867        jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2868        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2869        JUMPHERE(jump);
2870        }
2871    }    }
2872  else  else
2873    {    {
2874    SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);    SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2875    add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));    add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2876    }    }
2877  }  }
2878    
# Line 2626  else Line 2882  else
2882  static void do_utfreadchar(compiler_common *common)  static void do_utfreadchar(compiler_common *common)
2883  {  {
2884  /* Fast decoding a UTF-8 character. TMP1 contains the first byte  /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2885  of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */  of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2886  DEFINE_COMPILER;  DEFINE_COMPILER;
2887  struct sljit_jump *jump;  struct sljit_jump *jump;
2888    
2889  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2890    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2891    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2892    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2893    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2894    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2895    
2896  /* Searching for the first zero. */  /* Searching for the first zero. */
2897  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2898  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2899  /* Two byte sequence. */  /* Two byte sequence. */
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  
2900  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2901  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2902    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2903    
2904    JUMPHERE(jump);
2905    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2906    OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2907  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2908  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2909  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
 JUMPHERE(jump);  
2910    
2911  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2912  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2913  /* Three byte sequence. */  /* Three byte sequence. */
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  
 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);  
 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));  
2914  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2915  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));  
2916  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
 JUMPHERE(jump);  
2917    
2918  /* Four byte sequence. */  /* Four byte sequence. */
2919  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  JUMPHERE(jump);
2920  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2921  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);  OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2922    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2923    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2924  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);  
2925  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2926  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2927    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2928    }
2929    
2930    static void do_utfreadchar16(compiler_common *common)
2931    {
2932    /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2933    of the character (>= 0xc0). Return value in TMP1. */
2934    DEFINE_COMPILER;
2935    struct sljit_jump *jump;
2936    
2937    sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2938    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2939    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2940    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2941  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  
2942  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2943  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));  
2944  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));  /* Searching for the first zero. */
2945    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2946    jump = JUMP(SLJIT_C_NOT_ZERO);
2947    /* Two byte sequence. */
2948    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2949    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2950    
2951    JUMPHERE(jump);
2952    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
2953    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_NOT_ZERO);
2954    /* This code runs only in 8 bit mode. No need to shift the value. */
2955    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2956    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2957    OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2958    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2959  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2960  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2961  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));  /* Three byte sequence. */
2962    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2963  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2964  }  }
2965    
# Line 2697  jump = JUMP(SLJIT_C_NOT_ZERO); Line 2979  jump = JUMP(SLJIT_C_NOT_ZERO);
2979  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2980  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2981  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2982    /* The upper 5 bits are known at this point. */
2983    compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
2984  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2985  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2986  OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);  OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  
2987  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2988  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2989    
2990  JUMPHERE(compare);  JUMPHERE(compare);
2991  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2992  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
 JUMPHERE(jump);  
2993    
2994  /* We only have types for characters less than 256. */  /* We only have types for characters less than 256. */
 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);  
 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);  
 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
 }  
   
 #elif defined COMPILE_PCRE16  
   
 static void do_utfreadchar(compiler_common *common)  
 {  
 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char  
 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */  
 DEFINE_COMPILER;  
 struct sljit_jump *jump;  
   
 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);  
 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);  
 /* Do nothing, only return. */  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
   
2995  JUMPHERE(jump);  JUMPHERE(jump);
2996  /* Combine two 16 bit characters. */  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2997  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2998  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);  
 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));  
 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);  
2999  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3000  }  }
3001    
3002  #endif /* COMPILE_PCRE[8|16] */  #endif /* COMPILE_PCRE8 */
3003    
3004  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
3005    
# Line 2818  if (firstline) Line 3074  if (firstline)
3074      mainloop = LABEL();      mainloop = LABEL();
3075      /* Continual stores does not cause data dependency. */      /* Continual stores does not cause data dependency. */
3076      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
3077      read_char(common);      read_char_range(common, common->nlmin, common->nlmax, TRUE);
3078      check_newlinechar(common, common->nltype, &newline, TRUE);      check_newlinechar(common, common->nltype, &newline, TRUE);
3079      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3080      JUMPHERE(end);      JUMPHERE(end);
# Line 2894  if (newlinecheck) Line 3150  if (newlinecheck)
3150  return mainloop;  return mainloop;
3151  }  }
3152    
3153  #define MAX_N_CHARS 3  #define MAX_N_CHARS 16
3154    #define MAX_N_BYTES 8
3155    
3156  static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)  static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3157  {  {
3158  DEFINE_COMPILER;  pcre_uint8 len = bytes[0];
3159  struct sljit_label *start;  int i;
 struct sljit_jump *quit;  
 pcre_uint32 chars[MAX_N_CHARS * 2];  
 pcre_uchar *cc = common->start + 1 + LINK_SIZE;  
 int location = 0;  
 pcre_int32 len, c, bit, caseless;  
 int must_stop;  
3160    
3161  /* We do not support alternatives now. */  if (len == 255)
3162  if (*(common->start + GET(common->start, 1)) == OP_ALT)    return;
3163    return FALSE;  
3164    if (len == 0)
3165      {
3166      bytes[0] = 1;
3167      bytes[1] = byte;
3168      return;
3169      }
3170    
3171    for (i = len; i > 0; i--)
3172      if (bytes[i] == byte)
3173        return;
3174    
3175    if (len >= MAX_N_BYTES - 1)
3176      {
3177      bytes[0] = 255;
3178      return;
3179      }
3180    
3181    len++;
3182    bytes[len] = byte;
3183    bytes[0] = len;
3184    }
3185    
3186    static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars)
3187    {
3188    /* Recursive function, which scans prefix literals. */
3189    BOOL last, any, caseless;
3190    int len, repeat, len_save, consumed = 0;
3191    pcre_uint32 chr, mask;
3192    pcre_uchar *alternative, *cc_save, *oc;
3193    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3194    pcre_uchar othercase[8];
3195    #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3196    pcre_uchar othercase[2];
3197    #else
3198    pcre_uchar othercase[1];
3199    #endif
3200    
3201    repeat = 1;
3202  while (TRUE)  while (TRUE)
3203    {    {
3204    caseless = 0;    last = TRUE;
3205    must_stop = 1;    any = FALSE;
3206    switch(*cc)    caseless = FALSE;
3207      switch (*cc)
3208      {      {
     case OP_CHAR:  
     must_stop = 0;  
     cc++;  
     break;  
   
3209      case OP_CHARI:      case OP_CHARI:
3210      caseless = 1;      caseless = TRUE;
3211      must_stop = 0;      case OP_CHAR:
3212        last = FALSE;
3213      cc++;      cc++;
3214      break;      break;
3215    
# Line 2943  while (TRUE) Line 3228  while (TRUE)
3228      cc++;      cc++;
3229      continue;      continue;
3230    
3231        case OP_ASSERT:
3232        case OP_ASSERT_NOT:
3233        case OP_ASSERTBACK:
3234        case OP_ASSERTBACK_NOT:
3235        cc = bracketend(cc);
3236        continue;
3237    
3238        case OP_PLUSI:
3239        case OP_MINPLUSI:
3240        case OP_POSPLUSI:
3241        caseless = TRUE;
3242      case OP_PLUS:      case OP_PLUS:
3243      case OP_MINPLUS:      case OP_MINPLUS:
3244      case OP_POSPLUS:      case OP_POSPLUS:
3245      cc++;      cc++;
3246      break;      break;
3247    
3248        case OP_EXACTI:
3249        caseless = TRUE;
3250      case OP_EXACT:      case OP_EXACT:
3251        repeat = GET2(cc, 1);
3252        last = FALSE;
3253      cc += 1 + IMM2_SIZE;      cc += 1 + IMM2_SIZE;
3254      break;      break;
3255    
3256      case OP_PLUSI:      case OP_QUERYI:
3257      case OP_MINPLUSI:      case OP_MINQUERYI:
3258      case OP_POSPLUSI:      case OP_POSQUERYI:
3259      caseless = 1;      caseless = TRUE;
3260        case OP_QUERY:
3261        case OP_MINQUERY:
3262        case OP_POSQUERY:
3263        len = 1;
3264      cc++;      cc++;
3265    #ifdef SUPPORT_UTF
3266        if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3267    #endif
3268        max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars);
3269        if (max_chars == 0)
3270          return consumed;
3271        last = FALSE;
3272      break;      break;
3273    
3274      case OP_EXACTI:      case OP_KET:
3275      caseless = 1;      cc += 1 + LINK_SIZE;
3276        continue;
3277    
3278        case OP_ALT:
3279        cc += GET(cc, 1);
3280        continue;
3281    
3282        case OP_ONCE:
3283        case OP_ONCE_NC:
3284        case OP_BRA:
3285        case OP_BRAPOS:
3286        case OP_CBRA:
3287        case OP_CBRAPOS:
3288        alternative = cc + GET(cc, 1);
3289        while (*alternative == OP_ALT)
3290          {
3291          max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars);
3292          if (max_chars == 0)
3293            return consumed;
3294          alternative += GET(alternative, 1);
3295          }
3296    
3297        if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3298          cc += IMM2_SIZE;
3299        cc += 1 + LINK_SIZE;
3300        continue;
3301    
3302        case OP_CLASS:
3303    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3304        if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3305    #endif
3306        any = TRUE;
3307        cc += 1 + 32 / sizeof(pcre_uchar);
3308        break;
3309    
3310        case OP_NCLASS:
3311    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3312        if (common->utf) return consumed;
3313    #endif
3314        any = TRUE;
3315        cc += 1 + 32 / sizeof(pcre_uchar);
3316        break;
3317    
3318    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3319        case OP_XCLASS:
3320    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3321        if (common->utf) return consumed;
3322    #endif
3323        any = TRUE;
3324        cc += GET(cc, 1);
3325        break;
3326    #endif
3327    
3328        case OP_DIGIT:
3329    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3330        if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3331          return consumed;
3332    #endif
3333        any = TRUE;
3334        cc++;
3335        break;
3336    
3337        case OP_WHITESPACE:
3338    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3339        if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3340          return consumed;
3341    #endif
3342        any = TRUE;
3343        cc++;
3344        break;
3345    
3346        case OP_WORDCHAR:
3347    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3348        if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3349          return consumed;
3350    #endif
3351        any = TRUE;
3352        cc++;
3353        break;
3354    
3355        case OP_NOT:
3356        case OP_NOTI:
3357        cc++;
3358        /* Fall through. */
3359        case OP_NOT_DIGIT:
3360        case OP_NOT_WHITESPACE:
3361        case OP_NOT_WORDCHAR:
3362        case OP_ANY:
3363        case OP_ALLANY:
3364    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3365        if (common->utf) return consumed;
3366    #endif
3367        any = TRUE;
3368        cc++;
3369        break;
3370    
3371    #ifdef SUPPORT_UCP
3372        case OP_NOTPROP:
3373        case OP_PROP:
3374    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3375        if (common->utf) return consumed;
3376    #endif
3377        any = TRUE;
3378        cc += 1 + 2;
3379        break;
3380    #endif
3381    
3382        case OP_TYPEEXACT:
3383        repeat = GET2(cc, 1);
3384      cc += 1 + IMM2_SIZE;      cc += 1 + IMM2_SIZE;
3385        continue;
3386    
3387        case OP_NOTEXACT:
3388        case OP_NOTEXACTI:
3389    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3390        if (common->utf) return consumed;
3391    #endif
3392        any = TRUE;
3393        repeat = GET2(cc, 1);
3394        cc += 1 + IMM2_SIZE + 1;
3395      break;      break;
3396    
3397      default:      default:
3398      must_stop = 2;      return consumed;
     break;  
3399      }      }
3400    
3401    if (must_stop == 2)    if (any)
3402        break;      {
3403    #if defined COMPILE_PCRE8
3404        mask = 0xff;
3405    #elif defined COMPILE_PCRE16
3406        mask = 0xffff;
3407    #elif defined COMPILE_PCRE32
3408        mask = 0xffffffff;
3409    #else
3410        SLJIT_ASSERT_STOP();
3411    #endif
3412    
3413        do
3414          {
3415          chars[0] = mask;
3416          chars[1] = mask;
3417          bytes[0] = 255;
3418    
3419          consumed++;
3420          if (--max_chars == 0)
3421            return consumed;
3422          chars += 2;
3423          bytes += MAX_N_BYTES;
3424          }
3425        while (--repeat > 0);
3426    
3427        repeat = 1;
3428        continue;
3429        }
3430    
3431    len = 1;    len = 1;
3432  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3433    if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);    if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3434  #endif  #endif
3435    
3436    if (caseless && char_has_othercase(common, cc))    if (caseless && char_has_othercase(common, cc))
3437      {      {
3438      caseless = char_get_othercase_bit(common, cc);  #ifdef SUPPORT_UTF
3439      if (caseless == 0)      if (common->utf)
3440        return FALSE;        {
3441  #ifdef COMPILE_PCRE8        GETCHAR(chr, cc);
3442      caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));        if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3443  #else          return consumed;
3444      if ((caseless & 0x100) != 0)        }
       caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));  
3445      else      else
       caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));  
3446  #endif  #endif
3447          {
3448          chr = *cc;
3449          othercase[0] = TABLE_GET(chr, common->fcc, chr);
3450          }
3451      }      }
3452    else    else
3453      caseless = 0;      caseless = FALSE;
3454    
3455    while (len > 0 && location < MAX_N_CHARS * 2)    len_save = len;
3456      cc_save = cc;
3457      while (TRUE)
3458      {      {
3459      c = *cc;      oc = othercase;
3460      bit = 0;      do
     if (len == (caseless & 0xff))  
3461        {        {
3462        bit = caseless >> 8;        chr = *cc;
3463        c |= bit;  #ifdef COMPILE_PCRE32
3464          if (SLJIT_UNLIKELY(chr == NOTACHAR))
3465            return consumed;
3466    #endif
3467          add_prefix_byte((pcre_uint8)chr, bytes);
3468    
3469          mask = 0;
3470          if (caseless)
3471            {
3472            add_prefix_byte((pcre_uint8)*oc, bytes);
3473            mask = *cc ^ *oc;
3474            chr |= mask;
3475            }
3476    
3477    #ifdef COMPILE_PCRE32
3478          if (chars[0] == NOTACHAR && chars[1] == 0)
3479    #else
3480          if (chars[0] == NOTACHAR)
3481    #endif
3482            {
3483            chars[0] = chr;
3484            chars[1] = mask;
3485            }
3486          else
3487            {
3488            mask |= chars[0] ^ chr;
3489            chr |= mask;
3490            chars[0] = chr;
3491            chars[1] |= mask;
3492            }
3493    
3494          len--;
3495          consumed++;
3496          if (--max_chars == 0)
3497            return consumed;
3498          chars += 2;
3499          bytes += MAX_N_BYTES;
3500          cc++;
3501          oc++;
3502        }        }
3503        while (len > 0);
3504    
3505      chars[location] = c;      if (--repeat == 0)
3506      chars[location + 1] = bit;        break;
3507    
3508      len--;      len = len_save;
3509      location += 2;      cc = cc_save;
3510      cc++;      }
3511    
3512      repeat = 1;
3513      if (last)
3514        return consumed;
3515      }
3516    }
3517    
3518    static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3519    {
3520    DEFINE_COMPILER;
3521    struct sljit_label *start;
3522    struct sljit_jump *quit;
3523    pcre_uint32 chars[MAX_N_CHARS * 2];
3524    pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
3525    pcre_uint8 ones[MAX_N_CHARS];
3526    int offsets[3];
3527    pcre_uint32 mask;
3528    pcre_uint8 *byte_set, *byte_set_end;
3529    int i, max, from;
3530    int range_right = -1, range_len = 3 - 1;
3531    sljit_ub *update_table = NULL;
3532    BOOL in_range;
3533    
3534    /* This is even TRUE, if both are NULL. */
3535    SLJIT_ASSERT(common->read_only_data_ptr == common->read_only_data);
3536    
3537    for (i = 0; i < MAX_N_CHARS; i++)
3538      {
3539      chars[i << 1] = NOTACHAR;
3540      chars[(i << 1) + 1] = 0;
3541      bytes[i * MAX_N_BYTES] = 0;
3542      }
3543    
3544    max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS);
3545    
3546    if (max <= 1)
3547      return FALSE;
3548    
3549    for (i = 0; i < max; i++)
3550      {
3551      mask = chars[(i << 1) + 1];
3552      ones[i] = ones_in_half_byte[mask & 0xf];
3553      mask >>= 4;
3554      while (mask != 0)
3555        {
3556        ones[i] += ones_in_half_byte[mask & 0xf];
3557        mask >>= 4;
3558        }
3559      }
3560    
3561    in_range = FALSE;
3562    for (i = 0; i <= max; i++)
3563      {
3564      if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
3565        {
3566        range_len = i - from;
3567        range_right = i - 1;
3568        }
3569    
3570      if (i < max && bytes[i * MAX_N_BYTES] < 255)
3571        {
3572        if (!in_range)
3573          {
3574          in_range = TRUE;
3575          from = i;
3576          }
3577        }
3578      else if (in_range)
3579        in_range = FALSE;
3580      }
3581    
3582    if (range_right >= 0)
3583      {
3584      /* Since no data is consumed (see the assert in the beginning
3585      of this function), this space can be reallocated. */
3586      if (common->read_only_data)
3587        SLJIT_FREE(common->read_only_data);
3588    
3589      common->read_only_data_size += 256;
3590      common->read_only_data = (sljit_uw *)SLJIT_MALLOC(common->read_only_data_size);
3591      if (common->read_only_data == NULL)
3592        return TRUE;
3593    
3594      update_table = (sljit_ub *)common->read_only_data;
3595      common->read_only_data_ptr = (sljit_uw *)(update_table + 256);
3596      memset(update_table, IN_UCHARS(range_len), 256);
3597    
3598      for (i = 0; i < range_len; i++)
3599        {
3600        byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
3601        SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
3602        byte_set_end = byte_set + byte_set[0];
3603        byte_set++;
3604        while (byte_set <= byte_set_end)
3605          {
3606          if (update_table[*byte_set] > IN_UCHARS(i))
3607            update_table[*byte_set] = IN_UCHARS(i);
3608          byte_set++;
3609          }
3610      }      }
3611      }
3612    
3613    if (location >= MAX_N_CHARS * 2 || must_stop != 0)  offsets[0] = -1;
3614    /* Scan forward. */
3615    for (i = 0; i < max; i++)
3616      if (ones[i] <= 2) {
3617        offsets[0] = i;
3618      break;      break;
3619    }    }
3620    
3621  /* At least two characters are required. */  if (offsets[0] < 0 && range_right < 0)
3622  if (location < 2 * 2)    return FALSE;
3623    
3624    if (offsets[0] >= 0)
3625      {
3626      /* Scan backward. */
3627      offsets[1] = -1;
3628      for (i = max - 1; i > offsets[0]; i--)
3629        if (ones[i] <= 2 && i != range_right)
3630          {
3631          offsets[1] = i;
3632          break;
3633          }
3634    
3635      /* This case is handled better by fast_forward_first_char. */
3636      if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
3637      return FALSE;      return FALSE;
3638    
3639      offsets[2] = -1;
3640      /* We only search for a middle character if there is no range check. */
3641      if (offsets[1] >= 0 && range_right == -1)
3642        {
3643        /* Scan from middle. */
3644        for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3645          if (ones[i] <= 2)
3646            {
3647            offsets[2] = i;
3648            break;
3649            }
3650    
3651        if (offsets[2] == -1)
3652          {
3653          for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3654            if (ones[i] <= 2)
3655              {
3656              offsets[2] = i;
3657              break;
3658              }
3659          }
3660        }
3661    
3662      SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3663      SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3664    
3665      chars[0] = chars[offsets[0] << 1];
3666      chars[1] = chars[(offsets[0] << 1) + 1];
3667      if (offsets[2] >= 0)
3668        {
3669        chars[2] = chars[offsets[2] << 1];
3670        chars[3] = chars[(offsets[2] << 1) + 1];
3671        }
3672      if (offsets[1] >= 0)
3673        {
3674        chars[4] = chars[offsets[1] << 1];
3675        chars[5] = chars[(offsets[1] << 1) + 1];
3676        }
3677      }
3678    
3679    max -= 1;
3680  if (firstline)  if (firstline)
3681    {    {
3682    SLJIT_ASSERT(common->first_line_end != 0);    SLJIT_ASSERT(common->first_line_end != 0);
3683      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3684    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3685    OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));    OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3686      quit = CMP(SLJIT_C_LESS_EQUAL, STR_END, 0, TMP1, 0);
3687      OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
3688      JUMPHERE(quit);
3689    }    }
3690  else  else
3691    OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));    OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3692    
3693    #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3694    if (range_right >= 0)
3695      OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
3696    #endif
3697    
3698  start = LABEL();  start = LABEL();
3699  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3700    
3701  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));  SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
3702  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  
3703  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  if (range_right >= 0)
3704  if (chars[1] != 0)    {
3705    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);  #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
3706  CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
3707  if (location > 2 * 2)  #else
3708    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
3709  if (chars[3] != 0)  #endif
3710    OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);  
3711  CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);  #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3712  if (location > 2 * 2)    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
3713    {  #else
3714    if (chars[5] != 0)    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
3715      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);  #endif
3716    CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3717      CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
3718      }
3719    
3720    if (offsets[0] >= 0)
3721      {
3722      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3723      if (offsets[1] >= 0)
3724        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3725      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3726    
3727      if (chars[1] != 0)
3728        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3729      CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3730      if (offsets[2] >= 0)
3731        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3732    
3733      if (offsets[1] >= 0)
3734        {
3735        if (chars[5] != 0)
3736          OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3737        CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3738        }
3739    
3740      if (offsets[2] >= 0)
3741        {
3742        if (chars[3] != 0)
3743          OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3744        CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3745        }
3746      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3747    }    }
 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  
3748    
3749  JUMPHERE(quit);  JUMPHERE(quit);
3750    
3751  if (firstline)  if (firstline)
3752      {
3753      if (range_right >= 0)
3754        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3755    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3756      if (range_right >= 0)
3757        {
3758        quit = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3759        OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
3760        JUMPHERE(quit);
3761        }
3762      }
3763  else  else
3764    OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));    OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3765  return TRUE;  return TRUE;
3766  }  }
3767    
3768  #undef MAX_N_CHARS  #undef MAX_N_CHARS
3769    #undef MAX_N_BYTES
3770    
3771  static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)  static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3772  {  {
# Line 3167  if (common->nltype == NLTYPE_FIXED && co Line 3872  if (common->nltype == NLTYPE_FIXED && co
3872    JUMPHERE(lastchar);    JUMPHERE(lastchar);
3873    
3874    if (firstline)    if (firstline)
3875      OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);      OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3876    return;    return;
3877    }    }
3878    
# Line 3177  firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_ Line 3882  firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_
3882  skip_char_back(common);  skip_char_back(common);
3883    
3884  loop = LABEL();  loop = LABEL();
3885  read_char(common);  common->ff_newline_shortcut = loop;
3886    
3887    read_char_range(common, common->nlmin, common->nlmax, TRUE);
3888  lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3889  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3890    foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);    foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
# Line 3206  if (firstline) Line 3913  if (firstline)
3913    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3914  }  }
3915    
3916  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3917    
3918  static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)  static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3919  {  {
3920  DEFINE_COMPILER;  DEFINE_COMPILER;
3921  struct sljit_label *start;  struct sljit_label *start;
3922  struct sljit_jump *quit;  struct sljit_jump *quit;
3923  struct sljit_jump *found = NULL;  struct sljit_jump *found = NULL;
3924  jump_list *matches = NULL;  jump_list *matches = NULL;
 pcre_uint8 inverted_start_bits[32];  
 int i;  
3925  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
3926  struct sljit_jump *jump;  struct sljit_jump *jump;
3927  #endif  #endif
3928    
 for (i = 0; i < 32; ++i)  
   inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);  
   
3929  if (firstline)  if (firstline)
3930    {    {
3931    SLJIT_ASSERT(common->first_line_end != 0);    SLJIT_ASSERT(common->first_line_end != 0);
# Line 3239  if (common->utf) Line 3941  if (common->utf)
3941    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3942  #endif  #endif
3943    
3944  if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))  if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3945    {    {
3946  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
3947    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
# Line 3248  if (!check_class_ranges(common, inverted Line 3950  if (!check_class_ranges(common, inverted
3950  #endif  #endif
3951    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3952    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3953    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3954    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3955    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3956    found = JUMP(SLJIT_C_NOT_ZERO);    found = JUMP(SLJIT_C_NOT_ZERO);
# Line 3451  JUMPHERE(skipread); Line 4153  JUMPHERE(skipread);
4153    
4154  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4155  check_str_end(common, &skipread_list);  check_str_end(common, &skipread_list);
4156  peek_char(common);  peek_char(common, READ_CHAR_MAX);
4157    
4158  /* Testing char type. This is a code duplication. */  /* Testing char type. This is a code duplication. */
4159  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 3497  OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSE Line 4199  OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSE
4199  sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);  sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4200  }  }
4201    
4202  /*  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
   range format:  
   
   ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).  
   ranges[1] = first bit (0 or 1)  
   ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)  
 */  
   
 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)  
4203  {  {
4204  DEFINE_COMPILER;  DEFINE_COMPILER;
4205  struct sljit_jump *jump;  int ranges[MAX_RANGE_SIZE];
   
 if (ranges[0] < 0)  
   return FALSE;  
   
 switch(ranges[0])  
   {  
   case 1:  
   if (readch)  
     read_char(common);  
   add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));  
   return TRUE;  
   
   case 2:  
   if (readch)  
     read_char(common);  
   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);  
   add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));  
   return TRUE;  
   
   case 4:  
   if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])  
     {  
     if (readch)  
       read_char(common);  
     if (ranges[1] != 0)  
       {  
       add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));  
       add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));  
       }  
     else  
       {  
       jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);  
       add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));  
       JUMPHERE(jump);  
       }  
     return TRUE;  
     }  
   if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))  
     {  
     if (readch)  
       read_char(common);  
     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);  
     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);  
     add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));  
     return TRUE;  
     }  
   return FALSE;  
   
   default:  
   return FALSE;  
   }  
 }  
   
 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)  
 {  
 int i, bit, length;  
 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;  
   
 bit = ctypes[0] & flag;  
 ranges[0] = -1;  
 ranges[1] = bit != 0 ? 1 : 0;  
 length = 0;  
   
 for (i = 1; i < 256; i++)  
   if ((ctypes[i] & flag) != bit)  
     {  
     if (length >= MAX_RANGE_SIZE)  
       return;  
     ranges[2 + length] = i;  
     length++;  
     bit ^= flag;  
     }  
   
 if (bit != 0)  
   {  
   if (length >= MAX_RANGE_SIZE)  
     return;  
   ranges[2 + length] = 256;  
   length++;  
   }  
 ranges[0] = length;  
 }  
   
 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)  
 {  
 int ranges[2 + MAX_RANGE_SIZE];  
4206  pcre_uint8 bit, cbit, all;  pcre_uint8 bit, cbit, all;
4207  int i, byte, length = 0;  int i, byte, length = 0;
4208    
4209  bit = bits[0] & 0x1;  bit = bits[0] & 0x1;
4210  ranges[1] = bit;  /* All bits will be zero or one (since bit is zero or one). */
 /* Can be 0 or 255. */  
4211  all = -bit;  all = -bit;
4212    
4213  for (i = 0; i < 256; )  for (i = 0; i < 256; )
# Line 3615  for (i = 0; i < 256; ) Line 4222  for (i = 0; i < 256; )
4222        {        {
4223        if (length >= MAX_RANGE_SIZE)        if (length >= MAX_RANGE_SIZE)
4224          return FALSE;          return FALSE;
4225        ranges[2 + length] = i;        ranges[length] = i;
4226        length++;        length++;
4227        bit = cbit;        bit = cbit;
4228        all = -cbit;        all = -cbit;
# Line 3628  if (((bit == 0) && nclass) || ((bit == 1 Line 4235  if (((bit == 0) && nclass) || ((bit == 1
4235    {    {
4236    if (length >= MAX_RANGE_SIZE)    if (length >= MAX_RANGE_SIZE)
4237      return FALSE;      return FALSE;
4238    ranges[2 + length] = 256;    ranges[length] = 256;
4239    length++;    length++;
4240    }    }
 ranges[0] = length;  
4241    
4242  return check_ranges(common, ranges, backtracks, FALSE);  if (length < 0 || length > 4)
4243      return FALSE;
4244    
4245    bit = bits[0] & 0x1;
4246    if (invert) bit ^= 0x1;
4247    
4248    /* No character is accepted. */
4249    if (length == 0 && bit == 0)
4250      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4251    
4252    switch(length)
4253      {
4254      case 0:
4255      /* When bit != 0, all characters are accepted. */
4256      return TRUE;
4257    
4258      case 1:
4259      add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4260      return TRUE;
4261    
4262      case 2:
4263      if (ranges[0] + 1 != ranges[1])
4264        {
4265        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4266        add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4267        }
4268      else
4269        add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4270      return TRUE;
4271    
4272      case 3:
4273      if (bit != 0)
4274        {
4275        add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4276        if (ranges[0] + 1 != ranges[1])
4277          {
4278          OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4279          add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4280          }
4281        else
4282          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4283        return TRUE;
4284        }
4285    
4286      add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4287      if (ranges[1] + 1 != ranges[2])
4288        {
4289        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4290        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4291        }
4292      else
4293        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4294      return TRUE;
4295    
4296      case 4:
4297      if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4298          && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4299          && is_powerof2(ranges[2] - ranges[0]))
4300        {
4301        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4302        if (ranges[2] + 1 != ranges[3])
4303          {
4304          OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4305          add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4306          }
4307        else
4308          add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4309        return TRUE;
4310        }
4311    
4312      if (bit != 0)
4313        {
4314        i = 0;
4315        if (ranges[0] + 1 != ranges[1])
4316          {
4317          OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4318          add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4319          i = ranges[0];
4320          }
4321        else
4322          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4323    
4324        if (ranges[2] + 1 != ranges[3])
4325          {
4326          OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4327          add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4328          }
4329        else
4330          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4331        return TRUE;
4332        }
4333    
4334      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4335      add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4336      if (ranges[1] + 1 != ranges[2])
4337        {
4338        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4339        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4340        }
4341      else
4342        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4343      return TRUE;
4344    
4345      default:
4346      SLJIT_ASSERT_STOP();
4347      return FALSE;
4348      }
4349  }  }
4350    
4351  static void check_anynewline(compiler_common *common)  static void check_anynewline(compiler_common *common)
# Line 4000  return cc; Line 4712  return cc;
4712  #define SET_TYPE_OFFSET(value) \  #define SET_TYPE_OFFSET(value) \
4713    if ((value) != typeoffset) \    if ((value) != typeoffset) \
4714      { \      { \
4715      if ((value) > typeoffset) \      if ((value) < typeoffset) \
       OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \  
     else \  
4716        OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \        OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4717        else \
4718          OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4719      } \      } \
4720    typeoffset = (value);    typeoffset = (value);
4721    
4722  #define SET_CHAR_OFFSET(value) \  #define SET_CHAR_OFFSET(value) \
4723    if ((value) != charoffset) \    if ((value) != charoffset) \
4724      { \      { \
4725      if ((value) > charoffset) \      if ((value) < charoffset) \
4726        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \        OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4727      else \      else \
4728        OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4729      } \      } \
4730    charoffset = (value);    charoffset = (value);
4731    
4732  static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)  static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4733  {  {
4734  DEFINE_COMPILER;  DEFINE_COMPILER;
4735  jump_list *found = NULL;  jump_list *found = NULL;
4736  jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;  jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4737  pcre_int32 c, charoffset;  sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
4738  const pcre_uint32 *other_cases;  struct sljit_jump *jump = NULL;
4739  struct sljit_jump *jump = NULL;  pcre_uchar *ccbegin;
4740  pcre_uchar *ccbegin;  int compares, invertcmp, numberofcmps;
4741  int compares, invertcmp, numberofcmps;  #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4742  #ifdef SUPPORT_UCP  BOOL utf = common->utf;
 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;  
 BOOL charsaved = FALSE;  
 int typereg = TMP1, scriptreg = TMP1;  
 pcre_int32 typeoffset;  
 #endif  
   
 /* Although SUPPORT_UTF must be defined, we are  
    not necessary in utf mode even in 8 bit mode. */  
 detect_partial_match(common, backtracks);  
 read_char(common);  
   
 if ((*cc++ & XCL_MAP) != 0)  
   {  
   OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);  
 #ifndef COMPILE_PCRE8  
   jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);  
 #elif defined SUPPORT_UTF  
   if (common->utf)  
     jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);  
 #endif  
   
   if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))  
     {  
     OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);  
     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);  
     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);  
     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);  
     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);  
     add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));  
     }  
   
 #ifndef COMPILE_PCRE8  
   JUMPHERE(jump);  
 #elif defined SUPPORT_UTF  
   if (common->utf)  
     JUMPHERE(jump);  
4743  #endif  #endif
4744    OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);  
4745  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4746    charsaved = TRUE;  BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4747    BOOL charsaved = FALSE;
4748    int typereg = TMP1, scriptreg = TMP1;
4749    const pcre_uint32 *other_cases;
4750    sljit_uw typeoffset;
4751  #endif  #endif
   cc += 32 / sizeof(pcre_uchar);  
   }  
4752    
4753  /* Scanning the necessary info. */  /* Scanning the necessary info. */
4754    cc++;
4755  ccbegin = cc;  ccbegin = cc;
4756  compares = 0;  compares = 0;
4757    if (cc[-1] & XCL_MAP)
4758      {
4759      min = 0;
4760      cc += 32 / sizeof(pcre_uchar);
4761      }
4762    
4763  while (*cc != XCL_END)  while (*cc != XCL_END)
4764    {    {
4765    compares++;    compares++;
4766    if (*cc == XCL_SINGLE)    if (*cc == XCL_SINGLE)
4767      {      {
4768      cc += 2;      cc ++;
4769  #ifdef SUPPORT_UTF      GETCHARINCTEST(c, cc);
4770      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);      if (c > max) max = c;
4771  #endif      if (c < min) min = c;
4772  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4773      needschar = TRUE;      needschar = TRUE;
4774  #endif  #endif
4775      }      }
4776    else if (*cc == XCL_RANGE)    else if (*cc == XCL_RANGE)
4777      {      {
4778      cc += 2;      cc ++;
4779  #ifdef SUPPORT_UTF      GETCHARINCTEST(c, cc);
4780      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);      if (c < min) min = c;
4781  #endif      GETCHARINCTEST(c, cc);
4782      cc++;      if (c > max) max = c;
 #ifdef SUPPORT_UTF  
     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);  
 #endif  
4783  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4784      needschar = TRUE;      needschar = TRUE;
4785  #endif  #endif
# Line 4107  while (*cc != XCL_END) Line 4789  while (*cc != XCL_END)
4789      {      {
4790      SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);      SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4791      cc++;      cc++;
4792        if (*cc == PT_CLIST)
4793          {
4794          other_cases = PRIV(ucd_caseless_sets) + cc[1];
4795          while (*other_cases != NOTACHAR)
4796            {
4797            if (*other_cases > max) max = *other_cases;
4798            if (*other_cases < min) min = *other_cases;
4799            other_cases++;
4800            }
4801          }
4802        else
4803          {
4804          max = READ_CHAR_MAX;
4805          min = 0;
4806          }
4807    
4808      switch(*cc)      switch(*cc)
4809        {        {
4810        case PT_ANY:        case PT_ANY:
# Line 4126  while (*cc != XCL_END) Line 4824  while (*cc != XCL_END)
4824        case PT_SPACE:        case PT_SPACE:
4825        case PT_PXSPACE:        case PT_PXSPACE:
4826        case PT_WORD:        case PT_WORD:
4827          case PT_PXGRAPH:
4828          case PT_PXPRINT:
4829          case PT_PXPUNCT:
4830        needstype = TRUE;        needstype = TRUE;
4831        needschar = TRUE;        needschar = TRUE;
4832        break;        break;
# Line 4144  while (*cc != XCL_END) Line 4845  while (*cc != XCL_END)
4845  #endif  #endif
4846    }    }
4847    
4848    /* We are not necessary in utf mode even in 8 bit mode. */
4849    cc = ccbegin;
4850    detect_partial_match(common, backtracks);
4851    read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4852    
4853    if ((cc[-1] & XCL_HASPROP) == 0)
4854      {
4855      if ((cc[-1] & XCL_MAP) != 0)
4856        {
4857        jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4858        if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4859          {
4860          OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4861          OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4862          OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4863          OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4864          OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4865          add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));
4866          }
4867    
4868        add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4869        JUMPHERE(jump);
4870    
4871        cc += 32 / sizeof(pcre_uchar);
4872        }
4873      else
4874        {
4875        OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4876        add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4877        }
4878      }
4879    else if ((cc[-1] & XCL_MAP) != 0)
4880      {
4881      OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4882    #ifdef SUPPORT_UCP
4883      charsaved = TRUE;
4884    #endif
4885      if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4886        {
4887    #ifdef COMPILE_PCRE8
4888        SLJIT_ASSERT(common->utf);
4889    #endif
4890        jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4891    
4892        OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4893        OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4894        OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4895        OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4896        OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4897        add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4898    
4899        JUMPHERE(jump);
4900        }
4901    
4902      OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4903      cc += 32 / sizeof(pcre_uchar);
4904      }
4905    
4906  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4907  /* Simple register allocation. TMP1 is preferred if possible. */  /* Simple register allocation. TMP1 is preferred if possible. */
4908  if (needstype || needsscript)  if (needstype || needsscript)
# Line 4185  if (needstype || needsscript) Line 4944  if (needstype || needsscript)
4944  #endif  #endif
4945    
4946  /* Generating code. */  /* Generating code. */
 cc = ccbegin;  
4947  charoffset = 0;  charoffset = 0;
4948  numberofcmps = 0;  numberofcmps = 0;
4949  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 4201  while (*cc != XCL_END) Line 4959  while (*cc != XCL_END)
4959    if (*cc == XCL_SINGLE)    if (*cc == XCL_SINGLE)
4960      {      {
4961      cc ++;      cc ++;
4962  #ifdef SUPPORT_UTF      GETCHARINCTEST(c, cc);
     if (common->utf)  
       {  
       GETCHARINC(c, cc);  
       }  
     else  
 #endif  
       c = *cc++;  
4963    
4964      if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))      if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4965        {        {
4966        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4967        OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);        OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4968        numberofcmps++;        numberofcmps++;
4969        }        }
4970      else if (numberofcmps > 0)      else if (numberofcmps > 0)
4971        {        {
4972        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4973        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4974        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4975        numberofcmps = 0;        numberofcmps = 0;
4976        }        }
4977      else      else
4978        {        {
4979        jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);        jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4980        numberofcmps = 0;        numberofcmps = 0;
4981        }        }
4982      }      }
4983    else if (*cc == XCL_RANGE)    else if (*cc == XCL_RANGE)
4984      {      {
4985      cc ++;      cc ++;
4986  #ifdef SUPPORT_UTF      GETCHARINCTEST(c, cc);
     if (common->utf)  
       {  
       GETCHARINC(c, cc);  
       }  
     else  
 #endif  
       c = *cc++;  
4987      SET_CHAR_OFFSET(c);      SET_CHAR_OFFSET(c);
4988  #ifdef SUPPORT_UTF      GETCHARINCTEST(c, cc);
4989      if (common->utf)  
       {  
       GETCHARINC(c, cc);  
       }  
     else  
 #endif  
       c = *cc++;  
4990      if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))      if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4991        {        {
4992        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4993        OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);        OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4994        numberofcmps++;        numberofcmps++;
4995        }        }
4996      else if (numberofcmps > 0)      else if (numberofcmps > 0)
4997        {        {
4998        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4999        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5000        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5001        numberofcmps = 0;        numberofcmps = 0;
5002        }        }
5003      else      else
5004        {        {
5005        jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);        jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5006        numberofcmps = 0;        numberofcmps = 0;
5007        }        }
5008      }      }
# Line 4330  while (*cc != XCL_END) Line 5068  while (*cc != XCL_END)
5068        break;        break;
5069    
5070        case PT_WORD:        case PT_WORD:
5071        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5072        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5073        /* Fall through. */        /* Fall through. */
5074    
# Line 4378  while (*cc != XCL_END) Line 5116  while (*cc != XCL_END)
5116          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5117          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5118    
5119          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5120          OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);          OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5121    
5122          other_cases += 3;          other_cases += 3;
5123          }          }
5124        else        else
5125          {          {
5126          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5127          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5128          }          }
5129    
5130        while (*other_cases != NOTACHAR)        while (*other_cases != NOTACHAR)
5131          {          {
5132          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5133          OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);          OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5134          }          }
5135        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5136        break;        break;
5137    
5138        case PT_UCNC:        case PT_UCNC:
5139        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5140        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5141        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5142        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5143        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5144        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5145    
5146        SET_CHAR_OFFSET(0xa0);        SET_CHAR_OFFSET(0xa0);
5147        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5148        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5149        SET_CHAR_OFFSET(0);        SET_CHAR_OFFSET(0);
5150        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5151        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
5152        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5153        break;        break;
5154    
5155          case PT_PXGRAPH:
5156          /* C and Z groups are the farthest two groups. */
5157          SET_TYPE_OFFSET(ucp_Ll);
5158          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5159          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
5160    
5161          jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5162    
5163          /* In case of ucp_Cf, we overwrite the result. */
5164          SET_CHAR_OFFSET(0x2066);
5165          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5166          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5167    
5168          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5169          OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5170    
5171          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5172          OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5173    
5174          JUMPHERE(jump);
5175          jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5176          break;
5177    
5178          case PT_PXPRINT:
5179          /* C and Z groups are the farthest two groups. */
5180          SET_TYPE_OFFSET(ucp_Ll);
5181          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5182          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
5183    
5184          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5185          OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
5186    
5187          jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5188    
5189          /* In case of ucp_Cf, we overwrite the result. */
5190          SET_CHAR_OFFSET(0x2066);
5191          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5192          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5193    
5194          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5195          OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5196    
5197          JUMPHERE(jump);
5198          jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5199          break;
5200    
5201          case PT_PXPUNCT:
5202          SET_TYPE_OFFSET(ucp_Sc);
5203          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5204          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5205    
5206          SET_CHAR_OFFSET(0);
5207          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
5208          OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5209    
5210          SET_TYPE_OFFSET(ucp_Pc);
5211          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5212          OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5213          jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5214          break;
5215        }        }
5216      cc += 2;      cc += 2;
5217      }      }
# Line 4444  struct sljit_label *label; Line 5243  struct sljit_label *label;
5243  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
5244  pcre_uchar propdata[5];  pcre_uchar propdata[5];
5245  #endif  #endif
5246  #endif  #endif /* SUPPORT_UTF */
5247    
5248  switch(type)  switch(type)
5249    {    {
# Line 4469  switch(type) Line 5268  switch(type)
5268    case OP_NOT_DIGIT:    case OP_NOT_DIGIT:
5269    case OP_DIGIT:    case OP_DIGIT:
5270    /* Digits are usually 0-9, so it is worth to optimize them. */    /* Digits are usually 0-9, so it is worth to optimize them. */
   if (common->digits[0] == -2)  
     get_ctype_ranges(common, ctype_digit, common->digits);  
5271    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5272    /* Flip the starting bit in the negative case. */  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5273    if (type == OP_NOT_DIGIT)    if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5274      common->digits[1] ^= 1;      read_char7_type(common, type == OP_NOT_DIGIT);
5275    if (!check_ranges(common, common->digits, backtracks, TRUE))    else
5276      {  #endif
5277      read_char8_type(common);      read_char8_type(common, type == OP_NOT_DIGIT);
5278      OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);      /* Flip the starting bit in the negative case. */
5279      add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5280      }    add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
   if (type == OP_NOT_DIGIT)  
     common->digits[1] ^= 1;  
5281    return cc;    return cc;
5282    
5283    case OP_NOT_WHITESPACE:    case OP_NOT_WHITESPACE:
5284    case OP_WHITESPACE:    case OP_WHITESPACE:
5285    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5286    read_char8_type(common);  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5287      if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5288        read_char7_type(common, type == OP_NOT_WHITESPACE);
5289      else
5290    #endif
5291        read_char8_type(common, type == OP_NOT_WHITESPACE);
5292    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
5293    add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5294    return cc;    return cc;
# Line 4496  switch(type) Line 5296  switch(type)
5296    case OP_NOT_WORDCHAR:    case OP_NOT_WORDCHAR:
5297    case OP_WORDCHAR:    case OP_WORDCHAR:
5298    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5299    read_char8_type(common);  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5300      if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5301        read_char7_type(common, type == OP_NOT_WORDCHAR);
5302      else
5303    #endif
5304        read_char8_type(common, type == OP_NOT_WORDCHAR);
5305    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5306    add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5307    return cc;    return cc;
5308    
5309    case OP_ANY:    case OP_ANY:
5310    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5311    read_char(common);    read_char_range(common, common->nlmin, common->nlmax, TRUE);
5312    if (common->nltype == NLTYPE_FIXED && common->newline > 255)    if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5313      {      {
5314      jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);      jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
# Line 4559  switch(type) Line 5364  switch(type)
5364  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
5365    case OP_NOTPROP:    case OP_NOTPROP:
5366    case OP_PROP:    case OP_PROP:
5367    propdata[0] = 0;    propdata[0] = XCL_HASPROP;
5368    propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;    propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5369    propdata[2] = cc[0];    propdata[2] = cc[0];
5370    propdata[3] = cc[1];    propdata[3] = cc[1];
# Line 4571  switch(type) Line 5376  switch(type)
5376    
5377    case OP_ANYNL:    case OP_ANYNL:
5378    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5379    read_char(common);    read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5380    jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);    jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5381    /* We don't need to handle soft partial matching case. */    /* We don't need to handle soft partial matching case. */
5382    end_list = NULL;    end_list = NULL;
# Line 4593  switch(type) Line 5398  switch(type)
5398    case OP_NOT_HSPACE:    case OP_NOT_HSPACE:
5399    case OP_HSPACE:    case OP_HSPACE:
5400    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5401    read_char(common);    read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5402    add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5403    add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5404    return cc;    return cc;
# Line 4601  switch(type) Line 5406  switch(type)
5406    case OP_NOT_VSPACE:    case OP_NOT_VSPACE:
5407    case OP_VSPACE:    case OP_VSPACE:
5408    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5409    read_char(common);    read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5410    add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5411    add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5412    return cc;    return cc;
# Line 4700  switch(type) Line 5505  switch(type)
5505      else      else
5506        {        {
5507        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
5508        read_char(common);        read_char_range(common, common->nlmin, common->nlmax, TRUE);
5509        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5510        add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));        add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5511        add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));        add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
# Line 4748  switch(type) Line 5553  switch(type)
5553    else    else
5554      {      {
5555      skip_char_back(common);      skip_char_back(common);
5556      read_char(common);      read_char_range(common, common->nlmin, common->nlmax, TRUE);
5557      check_newlinechar(common, common->nltype, backtracks, FALSE);      check_newlinechar(common, common->nltype, backtracks, FALSE);
5558      }      }
5559    JUMPHERE(jump[0]);    JUMPHERE(jump[0]);
# Line 4799  switch(type) Line 5604  switch(type)
5604      }      }
5605    else    else
5606      {      {
5607      peek_char(common);      peek_char(common, common->nlmax);
5608      check_newlinechar(common, common->nltype, backtracks, FALSE);      check_newlinechar(common, common->nltype, backtracks, FALSE);
5609      }      }
5610    JUMPHERE(jump[0]);    JUMPHERE(jump[0]);
# Line 4823  switch(type) Line 5628  switch(type)
5628  #endif  #endif
5629      return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);      return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5630      }      }
5631    
5632    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
   read_char(common);  
5633  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
5634    if (common->utf)    if (common->utf)
5635      {      {
# Line 4833  switch(type) Line 5638  switch(type)
5638    else    else
5639  #endif  #endif
5640      c = *cc;      c = *cc;
5641    
5642    if (type == OP_CHAR || !char_has_othercase(common, cc))    if (type == OP_CHAR || !char_has_othercase(common, cc))
5643      {      {
5644        read_char_range(common, c, c, FALSE);
5645      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5646      return cc + length;      return cc + length;
5647      }      }
5648    oc = char_othercase(common, c);    oc = char_othercase(common, c);
5649      read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5650    bit = c ^ oc;    bit = c ^ oc;
5651    if (is_powerof2(bit))    if (is_powerof2(bit))
5652      {      {
# Line 4846  switch(type) Line 5654  switch(type)
5654      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5655      return cc + length;      return cc + length;
5656      }      }
5657    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);    jump[0] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c);
5658    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);    add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5659    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);    JUMPHERE(jump[0]);
   OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);  
   add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));  
5660    return cc + length;    return cc + length;
5661    
5662    case OP_NOT:    case OP_NOT:
# Line 4885  switch(type) Line 5691  switch(type)
5691  #endif /* COMPILE_PCRE8 */  #endif /* COMPILE_PCRE8 */
5692        {        {
5693        GETCHARLEN(c, cc, length);        GETCHARLEN(c, cc, length);
       read_char(common);  
5694        }        }
5695      }      }
5696    else    else
5697  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
     {  
     read_char(common);  
5698      c = *cc;      c = *cc;
     }  
5699    
5700    if (type == OP_NOT || !char_has_othercase(common, cc))    if (type == OP_NOT || !char_has_othercase(common, cc))
5701        {
5702        read_char_range(common, c, c, TRUE);
5703      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5704        }
5705    else    else
5706      {      {
5707      oc = char_othercase(common, c);      oc = char_othercase(common, c);
5708        read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5709      bit = c ^ oc;      bit = c ^ oc;
5710      if (is_powerof2(bit))      if (is_powerof2(bit))
5711        {        {
# Line 4917  switch(type) Line 5723  switch(type)
5723    case OP_CLASS:    case OP_CLASS:
5724    case OP_NCLASS:    case OP_NCLASS:
5725    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5726    read_char(common);  
5727    if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5728      bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5729      read_char_range(common, 0, bit, type == OP_NCLASS);
5730    #else
5731      read_char_range(common, 0, 255, type == OP_NCLASS);
5732    #endif
5733    
5734      if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5735      return cc + 32 / sizeof(pcre_uchar);      return cc + 32 / sizeof(pcre_uchar);
5736    
5737  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5738    jump[0] = NULL;    jump[0] = NULL;
 #ifdef COMPILE_PCRE8  
   /* This check only affects 8 bit mode. In other modes, we  
   always need to compare the value with 255. */  
5739    if (common->utf)    if (common->utf)
 #endif /* COMPILE_PCRE8 */  
5740      {      {
5741      jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);      jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, bit);
5742      if (type == OP_CLASS)      if (type == OP_CLASS)
5743        {        {
5744        add_jump(compiler, backtracks, jump[0]);        add_jump(compiler, backtracks, jump[0]);
5745        jump[0] = NULL;        jump[0] = NULL;
5746        }        }
5747      }      }
5748  #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */  #elif !defined COMPILE_PCRE8
5749      jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
5750      if (type == OP_CLASS)
5751        {
5752        add_jump(compiler, backtracks, jump[0]);
5753        jump[0] = NULL;
5754        }
5755    #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5756    
5757    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5758    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5759    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5760    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5761    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5762    add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5763    
5764  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5765    if (jump[0] != NULL)    if (jump[0] != NULL)
5766      JUMPHERE(jump[0]);      JUMPHERE(jump[0]);
5767  #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */  #endif
5768    
5769    return cc + 32 / sizeof(pcre_uchar);    return cc + 32 / sizeof(pcre_uchar);
5770    
5771  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
# Line 7280  if (*cc == OP_FAIL) Line 8099  if (*cc == OP_FAIL)
8099    return cc + 1;    return cc + 1;
8100    }    }
8101    
8102  if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)  if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
8103    {    {
8104    /* No need to check notempty conditions. */    /* No need to check notempty conditions. */
8105    if (common->accept_label == NULL)    if (common->accept_label == NULL)
# Line 7982  if (bra == OP_BRAZERO) Line 8801  if (bra == OP_BRAZERO)
8801  static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)  static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8802  {  {
8803  DEFINE_COMPILER;  DEFINE_COMPILER;
8804  int opcode, stacksize, count;  int opcode, stacksize, alt_count, alt_max;
8805  int offset = 0;  int offset = 0;
8806  int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;  int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
8807  int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;  int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
8808  pcre_uchar *cc = current->cc;  pcre_uchar *cc = current->cc;
8809  pcre_uchar *ccbegin;  pcre_uchar *ccbegin;
8810  pcre_uchar *ccprev;  pcre_uchar *ccprev;
 jump_list *jumplist = NULL;  
 jump_list *jumplistitem = NULL;  
8811  pcre_uchar bra = OP_BRA;  pcre_uchar bra = OP_BRA;
8812  pcre_uchar ket;  pcre_uchar ket;
8813  assert_backtrack *assert;  assert_backtrack *assert;
8814  BOOL has_alternatives;  BOOL has_alternatives;
8815  BOOL needs_control_head = FALSE;  BOOL needs_control_head = FALSE;
8816  struct sljit_jump *brazero = NULL;  struct sljit_jump *brazero = NULL;
8817    struct sljit_jump *alt1 = NULL;
8818    struct sljit_jump *alt2 = NULL;
8819  struct sljit_jump *once = NULL;  struct sljit_jump *once = NULL;
8820  struct sljit_jump *cond = NULL;  struct sljit_jump *cond = NULL;
8821  struct sljit_label *rmin_label = NULL;  struct sljit_label *rmin_label = NULL;
# Line 8034  if (SLJIT_UNLIKELY(opcode == OP_COND) && Line 8853  if (SLJIT_UNLIKELY(opcode == OP_COND) &&
8853  if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))  if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
8854    opcode = OP_ONCE;    opcode = OP_ONCE;
8855    
8856    alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
8857    
8858  /* Decoding the needs_control_head in framesize. */  /* Decoding the needs_control_head in framesize. */
8859  if (opcode == OP_ONCE)  if (opcode == OP_ONCE)
8860    {    {
# Line 8147  else if (SLJIT_UNLIKELY(opcode == OP_CON Line 8968  else if (SLJIT_UNLIKELY(opcode == OP_CON
8968      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8969      free_stack(common, 1);      free_stack(common, 1);
8970    
8971      jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));      alt_max = 2;
8972      if (SLJIT_UNLIKELY(!jumplistitem))      alt1 = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
       return;  
     jumplist = jumplistitem;  
     jumplistitem->next = NULL;  
     jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 1);  
8973      }      }
8974    }    }
8975  else if (*cc == OP_ALT)  else if (has_alternatives)
8976    {    {
   /* Build a jump list. Get the last successfully matched branch index. */  
8977    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8978    free_stack(common, 1);    free_stack(common, 1);
   count = 1;  
   do  
     {  
     /* Append as the last item. */  
     if (jumplist != NULL)  
       {  
       jumplistitem->next = sljit_alloc_memory(compiler, sizeof(jump_list));  
       jumplistitem = jumplistitem->next;  
       }  
     else  
       {  
       jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));  
       jumplist = jumplistitem;  
       }  
   
     if (SLJIT_UNLIKELY(!jumplistitem))  
       return;  
8979    
8980      jumplistitem->next = NULL;    if (alt_max > 4)
8981      jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, count++);      {
8982      cc += GET(cc, 1);      /* Table jump if alt_max is greater than 4. */
8983        sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)common->read_only_data_ptr);
8984        add_label_addr(common);
8985        }
8986      else
8987        {
8988        if (alt_max == 4)
8989          alt2 = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
8990        alt1 = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
8991      }      }
   while (*cc == OP_ALT);  
   
   cc = ccbegin + GET(ccbegin, 1);  
8992    }    }
8993    
8994  COMPILE_BACKTRACKINGPATH(current->top);  COMPILE_BACKTRACKINGPATH(current->top);
# Line 8219  if (SLJIT_UNLIKELY(opcode == OP_COND) || Line 9023  if (SLJIT_UNLIKELY(opcode == OP_COND) ||
9023    
9024  if (has_alternatives)  if (has_alternatives)
9025    {    {
9026    count = 1;    alt_count = sizeof(sljit_uw);
9027    do    do
9028      {      {
9029      current->top = NULL;      current->top = NULL;
# Line 8295  if (has_alternatives) Line 9099  if (has_alternatives)
9099        stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);        stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
9100    
9101      if (opcode != OP_ONCE)      if (opcode != OP_ONCE)
9102        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, count++);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
9103    
9104      if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)      if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
9105        {        {
# Line 8308  if (has_alternatives) Line 9112  if (has_alternatives)
9112    
9113      if (opcode != OP_ONCE)      if (opcode != OP_ONCE)
9114        {        {
9115        SLJIT_ASSERT(jumplist);        if (alt_max > 4)
9116        JUMPHERE(jumplist->jump);          add_label_addr(common);
9117        jumplist = jumplist->next;        else
9118            {
9119            if (alt_count != 2 * sizeof(sljit_uw))
9120              {
9121              JUMPHERE(alt1);
9122              if (alt_max == 3 && alt_count == sizeof(sljit_uw))
9123                alt2 = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
9124              }
9125            else
9126              {
9127              JUMPHERE(alt2);
9128              if (alt_max == 4)
9129                alt1 = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
9130              }
9131            }
9132          alt_count += sizeof(sljit_uw);
9133        }        }
9134    
9135      COMPILE_BACKTRACKINGPATH(current->top);      COMPILE_BACKTRACKINGPATH(current->top);
# Line 8319  if (has_alternatives) Line 9138  if (has_alternatives)
9138      SLJIT_ASSERT(!current->nextbacktracks);      SLJIT_ASSERT(!current->nextbacktracks);
9139      }      }
9140    while (*cc == OP_ALT);    while (*cc == OP_ALT);
   SLJIT_ASSERT(!jumplist);  
9141    
9142    if (cond != NULL)    if (cond != NULL)
9143      {      {
# Line 8920  pcre_uchar *ccend; Line 9738  pcre_uchar *ccend;
9738  executable_functions *functions;  executable_functions *functions;
9739  void *executable_func;  void *executable_func;
9740  sljit_uw executable_size;  sljit_uw executable_size;
9741    sljit_uw total_length;
9742    label_addr_list *label_addr;
9743  struct sljit_label *mainloop_label = NULL;  struct sljit_label *mainloop_label = NULL;
9744  struct sljit_label *continue_match_label;  struct sljit_label *continue_match_label;
9745  struct sljit_label *empty_match_found_label;  struct sljit_label *empty_match_found_label = NULL;
9746  struct sljit_label *empty_match_backtrack_label;  struct sljit_label *empty_match_backtrack_label = NULL;
9747  struct sljit_label *reset_match_label;  struct sljit_label *reset_match_label;
9748    struct sljit_label *quit_label;
9749  struct sljit_jump *jump;  struct sljit_jump *jump;
9750  struct sljit_jump *minlength_check_failed = NULL;  struct sljit_jump *minlength_check_failed = NULL;
9751  struct sljit_jump *reqbyte_notfound = NULL;  struct sljit_jump *reqbyte_notfound = NULL;
9752  struct sljit_jump *empty_match;  struct sljit_jump *empty_match = NULL;
 struct sljit_label *quit_label;  
9753    
9754  SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);  SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
9755  study = extra->study_data;  study = extra->study_data;
# Line 8942  memset(common, 0, sizeof(compiler_common Line 9762  memset(common, 0, sizeof(compiler_common
9762  rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;  rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
9763    
9764  common->start = rootbacktrack.cc;  common->start = rootbacktrack.cc;
9765    common->read_only_data = NULL;
9766    common->read_only_data_size = 0;
9767    common->read_only_data_ptr = NULL;
9768  common->fcc = tables + fcc_offset;  common->fcc = tables + fcc_offset;
9769  common->lcc = (sljit_sw)(tables + lcc_offset);  common->lcc = (sljit_sw)(tables + lcc_offset);
9770  common->mode = mode;  common->mode = mode;
9771    common->might_be_empty = study->minlength == 0;
9772  common->nltype = NLTYPE_FIXED;  common->nltype = NLTYPE_FIXED;
9773  switch(re->options & PCRE_NEWLINE_BITS)  switch(re->options & PCRE_NEWLINE_BITS)
9774    {    {
# Line 8965  switch(re->options & PCRE_NEWLINE_BITS) Line 9789  switch(re->options & PCRE_NEWLINE_BITS)
9789    case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;    case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
9790    default: return;    default: return;
9791    }    }
9792    common->nlmax = READ_CHAR_MAX;
9793    common->nlmin = 0;
9794  if ((re->options & PCRE_BSR_ANYCRLF) != 0)  if ((re->options & PCRE_BSR_ANYCRLF) != 0)
9795    common->bsr_nltype = NLTYPE_ANYCRLF;    common->bsr_nltype = NLTYPE_ANYCRLF;
9796  else if ((re->options & PCRE_BSR_UNICODE) != 0)  else if ((re->options & PCRE_BSR_UNICODE) != 0)
# Line 8977  else Line 9803  else
9803    common->bsr_nltype = NLTYPE_ANY;    common->bsr_nltype = NLTYPE_ANY;
9804  #endif  #endif
9805    }    }
9806    common->bsr_nlmax = READ_CHAR_MAX;
9807    common->bsr_nlmin = 0;
9808  common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
9809  common->ctypes = (sljit_sw)(tables + ctypes_offset);  common->ctypes = (sljit_sw)(tables + ctypes_offset);
 common->digits[0] = -2;  
9810  common->name_table = ((pcre_uchar *)re) + re->name_table_offset;  common->name_table = ((pcre_uchar *)re) + re->name_table_offset;
9811  common->name_count = re->name_count;  common->name_count = re->name_count;
9812  common->name_entry_size = re->name_entry_size;  common->name_entry_size = re->name_entry_size;
# Line 8990  common->utf = (re->options & PCRE_UTF8) Line 9817  common->utf = (re->options & PCRE_UTF8)
9817  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
9818  common->use_ucp = (re->options & PCRE_UCP) != 0;  common->use_ucp = (re->options & PCRE_UCP) != 0;
9819  #endif  #endif
9820    if (common->utf)
9821      {
9822      if (common->nltype == NLTYPE_ANY)
9823        common->nlmax = 0x2029;
9824      else if (common->nltype == NLTYPE_ANYCRLF)
9825        common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
9826      else
9827        {
9828        /* We only care about the first newline character. */
9829        common->nlmax = common->newline & 0xff;
9830        }
9831    
9832      if (common->nltype == NLTYPE_FIXED)
9833        common->nlmin = common->newline & 0xff;
9834      else
9835        common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
9836    
9837      if (common->bsr_nltype == NLTYPE_ANY)
9838        common->bsr_nlmax = 0x2029;
9839      else
9840        common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
9841      common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
9842      }
9843  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
9844  ccend = bracketend(rootbacktrack.cc);  ccend = bracketend(common->start);
9845    
9846  /* Calculate the local space size on the stack. */  /* Calculate the local space size on the stack. */
9847  common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);  common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
# Line 9004  memset(common->optimized_cbracket, 0, re Line 9854  memset(common->optimized_cbracket, 0, re
9854  memset(common->optimized_cbracket, 1, re->top_bracket + 1);  memset(common->optimized_cbracket, 1, re->top_bracket + 1);
9855  #endif  #endif
9856    
9857  SLJIT_ASSERT(*rootbacktrack.cc == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);  SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
9858  #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2  #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
9859  common->capture_last_ptr = common->ovector_start;  common->capture_last_ptr = common->ovector_start;
9860  common->ovector_start += sizeof(sljit_sw);  common->ovector_start += sizeof(sljit_sw);
9861  #endif  #endif
9862  if (!check_opcode_types(common, rootbacktrack.cc, ccend))  if (!check_opcode_types(common, common->start, ccend))
9863    {    {
9864    SLJIT_FREE(common->optimized_cbracket);    SLJIT_FREE(common->optimized_cbracket);
9865    return;    return;
# Line 9072  if (common->capture_last_ptr != 0) Line 9922  if (common->capture_last_ptr != 0)
9922  SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));  SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
9923  common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);  common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
9924    
9925  common->private_data_ptrs = (int *)SLJIT_MALLOC((ccend - rootbacktrack.cc) * sizeof(sljit_si));  total_length = ccend - common->start;
9926    common->private_data_ptrs = (sljit_si *)SLJIT_MALLOC(total_length * (sizeof(sljit_si) + (common->has_then ? 1 : 0)));
9927  if (!common->private_data_ptrs)  if (!common->private_data_ptrs)
9928    {    {
9929    SLJIT_FREE(common->optimized_cbracket);    SLJIT_FREE(common->optimized_cbracket);
9930    return;    return;
9931    }    }
9932  memset(common->private_data_ptrs, 0, (ccend - rootbacktrack.cc) * sizeof(int));  memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_si));
9933    
9934  private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);  private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
9935  set_private_data_ptrs(common, &private_data_size, ccend);  set_private_data_ptrs(common, &private_data_size, ccend);
# Line 9091  if (private_data_size > SLJIT_MAX_LOCAL_ Line 9942  if (private_data_size > SLJIT_MAX_LOCAL_
9942    
9943  if (common->has_then)  if (common->has_then)
9944    {    {
9945    common->then_offsets = (pcre_uint8 *)SLJIT_MALLOC(ccend - rootbacktrack.cc);    common->then_offsets = (pcre_uint8 *)(common->private_data_ptrs + total_length);
9946    if (!common->then_offsets)    memset(common->then_offsets, 0, total_length);
9947      set_then_offsets(common, common->start, NULL);
9948      }
9949    
9950    if (common->read_only_data_size > 0)
9951      {
9952      common->read_only_data = (sljit_uw *)SLJIT_MALLOC(common->read_only_data_size);
9953      if (common->read_only_data == NULL)
9954      {      {
9955      SLJIT_FREE(common->optimized_cbracket);      SLJIT_FREE(common->optimized_cbracket);
9956      SLJIT_FREE(common->private_data_ptrs);      SLJIT_FREE(common->private_data_ptrs);
9957      return;      return;
9958      }      }
9959    memset(common->then_offsets, 0, ccend - rootbacktrack.cc);    common->read_only_data_ptr = common->read_only_data;
   set_then_offsets(common, rootbacktrack.cc, NULL);  
9960    }    }
9961    
9962  compiler = sljit_create_compiler();  compiler = sljit_create_compiler();
# Line 9107  if (!compiler) Line 9964  if (!compiler)
9964    {    {
9965    SLJIT_FREE(common->optimized_cbracket);    SLJIT_FREE(common->optimized_cbracket);
9966    SLJIT_FREE(common->private_data_ptrs);    SLJIT_FREE(common->private_data_ptrs);
9967    if (common->has_then)    if (common->read_only_data)
9968      SLJIT_FREE(common->then_offsets);      SLJIT_FREE(common->read_only_data);
9969    return;    return;
9970    }    }
9971  common->compiler = compiler;  common->compiler = compiler;
# Line 9147  if ((re->options & PCRE_ANCHORED) == 0) Line 10004  if ((re->options & PCRE_ANCHORED) == 0)
10004    if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)    if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
10005      {      {
10006      if (mode == JIT_COMPILE && fast_forward_first_n_chars(common, (re->options & PCRE_FIRSTLINE) != 0))      if (mode == JIT_COMPILE && fast_forward_first_n_chars(common, (re->options & PCRE_FIRSTLINE) != 0))
10007        { /* Do nothing */ }        {
10008          /* If read_only_data is reallocated, we might have an allocation failure. */
10009          if (common->read_only_data_size > 0 && common->read_only_data == NULL)
10010            {
10011            sljit_free_compiler(compiler);
10012            SLJIT_FREE(common->optimized_cbracket);
10013            SLJIT_FREE(common->private_data_ptrs);
10014            return;
10015            }
10016          }
10017      else if ((re->flags & PCRE_FIRSTSET) != 0)      else if ((re->flags & PCRE_FIRSTSET) != 0)
10018        fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0);        fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0);
10019      else if ((re->flags & PCRE_STARTLINE) != 0)      else if ((re->flags & PCRE_STARTLINE) != 0)
10020        fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);        fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
10021      else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)      else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
10022        fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);        fast_forward_start_bits(common, study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
10023      }      }
10024    }    }
10025  else  else
# Line 9194  if (mode == JIT_PARTIAL_SOFT_COMPILE) Line 10060  if (mode == JIT_PARTIAL_SOFT_COMPILE)
10060  else if (mode == JIT_PARTIAL_HARD_COMPILE)  else if (mode == JIT_PARTIAL_HARD_COMPILE)
10061    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
10062    
10063  compile_matchingpath(common, rootbacktrack.cc, ccend, &rootbacktrack);  compile_matchingpath(common, common->start, ccend, &rootbacktrack);
10064  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10065    {    {
10066    sljit_free_compiler(compiler);    sljit_free_compiler(compiler);
10067    SLJIT_FREE(common->optimized_cbracket);    SLJIT_FREE(common->optimized_cbracket);
10068    SLJIT_FREE(common->private_data_ptrs);    SLJIT_FREE(common->private_data_ptrs);
10069    if (common->has_then)    if (common->read_only_data)
10070      SLJIT_FREE(common->then_offsets);      SLJIT_FREE(common->read_only_data);
10071    return;    return;
10072    }    }
10073    
10074  empty_match = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));  if (common->might_be_empty)
10075  empty_match_found_label = LABEL();    {
10076      empty_match = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
10077      empty_match_found_label = LABEL();
10078      }
10079    
10080  common->accept_label = LABEL();  common->accept_label = LABEL();
10081  if (common->accept != NULL)  if (common->accept != NULL)
# Line 9230  if (mode != JIT_COMPILE) Line 10099  if (mode != JIT_COMPILE)
10099    return_with_partial_match(common, common->quit_label);    return_with_partial_match(common, common->quit_label);
10100    }    }
10101    
10102  empty_match_backtrack_label = LABEL();  if (common->might_be_empty)
10103      empty_match_backtrack_label = LABEL();
10104  compile_backtrackingpath(common, rootbacktrack.top);  compile_backtrackingpath(common, rootbacktrack.top);
10105  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10106    {    {
10107    sljit_free_compiler(compiler);    sljit_free_compiler(compiler);
10108    SLJIT_FREE(common->optimized_cbracket);    SLJIT_FREE(common->optimized_cbracket);
10109    SLJIT_FREE(common->private_data_ptrs);    SLJIT_FREE(common->private_data_ptrs);
10110    if (common->has_then)    if (common->read_only_data)
10111      SLJIT_FREE(common->then_offsets);      SLJIT_FREE(common->read_only_data);
10112    return;    return;
10113    }    }
10114    
# Line 9266  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SL Line 10136  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SL
10136    
10137  if ((re->options & PCRE_ANCHORED) == 0)  if ((re->options & PCRE_ANCHORED) == 0)
10138    {    {
10139    if ((re->options & PCRE_FIRSTLINE) == 0)    if (common->ff_newline_shortcut != NULL)
10140      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop_label);      {
10141        if ((re->options & PCRE_FIRSTLINE) == 0)
10142          CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
10143        /* There cannot be more newlines here. */
10144        }
10145    else    else
10146      CMPTO(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0, mainloop_label);      {
10147        if ((re->options & PCRE_FIRSTLINE) == 0)
10148          CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop_label);
10149        else
10150          CMPTO(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0, mainloop_label);
10151        }
10152    }    }
10153    
10154  /* No more remaining characters. */  /* No more remaining characters. */
# Line 9284  JUMPTO(SLJIT_JUMP, common->quit_label); Line 10163  JUMPTO(SLJIT_JUMP, common->quit_label);
10163    
10164  flush_stubs(common);  flush_stubs(common);
10165    
10166  JUMPHERE(empty_match);  if (common->might_be_empty)
10167  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);    {
10168  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));    JUMPHERE(empty_match);
10169  CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label);    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10170  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));    OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
10171  CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label);    CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label);
10172  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));    OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
10173  CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);    CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label);
10174  JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
10175      CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
10176      JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
10177      }
10178    
10179  common->currententry = common->entries;  common->currententry = common->entries;
10180  common->local_exit = TRUE;  common->local_exit = TRUE;
# Line 9306  while (common->currententry != NULL) Line 10188  while (common->currententry != NULL)
10188      sljit_free_compiler(compiler);      sljit_free_compiler(compiler);
10189      SLJIT_FREE(common->optimized_cbracket);      SLJIT_FREE(common->optimized_cbracket);
10190      SLJIT_FREE(common->private_data_ptrs);      SLJIT_FREE(common->private_data_ptrs);
10191      if (common->has_then)      if (common->read_only_data)
10192        SLJIT_FREE(common->then_offsets);        SLJIT_FREE(common->read_only_data);
10193      return;      return;
10194      }      }
10195    flush_stubs(common);    flush_stubs(common);
# Line 9391  if (common->reset_match != NULL) Line 10273  if (common->reset_match != NULL)
10273    JUMPTO(SLJIT_JUMP, reset_match_label);    JUMPTO(SLJIT_JUMP, reset_match_label);
10274    }    }
10275  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
10276  #ifndef COMPILE_PCRE32  #ifdef COMPILE_PCRE8
10277  if (common->utfreadchar != NULL)  if (common->utfreadchar != NULL)
10278    {    {
10279    set_jumps(common->utfreadchar, LABEL());    set_jumps(common->utfreadchar, LABEL());
10280    do_utfreadchar(common);    do_utfreadchar(common);
10281    }    }
10282  #endif /* !COMPILE_PCRE32 */  if (common->utfreadchar16 != NULL)
10283  #ifdef COMPILE_PCRE8    {
10284      set_jumps(common->utfreadchar16, LABEL());
10285      do_utfreadchar16(common);
10286      }
10287  if (common->utfreadtype8 != NULL)  if (common->utfreadtype8 != NULL)
10288    {    {
10289    set_jumps(common->utfreadtype8, LABEL());    set_jumps(common->utfreadtype8, LABEL());
# Line 9414  if (common->getucd != NULL) Line 10299  if (common->getucd != NULL)
10299    }    }
10300  #endif  #endif
10301    
10302    SLJIT_ASSERT(common->read_only_data + (common->read_only_data_size >> SLJIT_WORD_SHIFT) == common->read_only_data_ptr);
10303  SLJIT_FREE(common->optimized_cbracket);  SLJIT_FREE(common->optimized_cbracket);
10304  SLJIT_FREE(common->private_data_ptrs);  SLJIT_FREE(common->private_data_ptrs);
 if (common->has_then)  
   SLJIT_FREE(common->then_offsets);  
10305    
10306  executable_func = sljit_generate_code(compiler);  executable_func = sljit_generate_code(compiler);
10307  executable_size = sljit_get_generated_code_size(compiler);  executable_size = sljit_get_generated_code_size(compiler);
10308    label_addr = common->label_addrs;
10309    while (label_addr != NULL)
10310      {
10311      *label_addr->addr = sljit_get_label_addr(label_addr->label);
10312      label_addr = label_addr->next;
10313      }
10314  sljit_free_compiler(compiler);  sljit_free_compiler(compiler);
10315  if (executable_func == NULL)  if (executable_func == NULL)
10316      {
10317      if (common->read_only_data)
10318        SLJIT_FREE(common->read_only_data);
10319    return;    return;
10320      }
10321    
10322  /* Reuse the function descriptor if possible. */  /* Reuse the function descriptor if possible. */
10323  if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL)  if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL)
# Line 9443  else Line 10337  else
10337    if (functions == NULL)    if (functions == NULL)
10338      {      {
10339      /* This case is highly unlikely since we just recently      /* This case is highly unlikely since we just recently
10340      freed a lot of memory. Although not impossible. */      freed a lot of memory. Not impossible though. */
10341      sljit_free_code(executable_func);      sljit_free_code(executable_func);
10342        if (common->read_only_data)
10343          SLJIT_FREE(common->read_only_data);
10344      return;      return;
10345      }      }
10346    memset(functions, 0, sizeof(executable_functions));    memset(functions, 0, sizeof(executable_functions));
# Line 9455  else Line 10351  else
10351    }    }
10352    
10353  functions->executable_funcs[mode] = executable_func;  functions->executable_funcs[mode] = executable_func;
10354    functions->read_only_data[mode] = common->read_only_data;
10355  functions->executable_sizes[mode] = executable_size;  functions->executable_sizes[mode] = executable_size;
10356  }  }
10357    
# Line 9641  for (i = 0; i < JIT_NUMBER_OF_COMPILE_MO Line 10538  for (i = 0; i < JIT_NUMBER_OF_COMPILE_MO
10538    {    {
10539    if (functions->executable_funcs[i] != NULL)    if (functions->executable_funcs[i] != NULL)
10540      sljit_free_code(functions->executable_funcs[i]);      sljit_free_code(functions->executable_funcs[i]);
10541      if (functions->read_only_data[i] != NULL)
10542        SLJIT_FREE(functions->read_only_data[i]);
10543    }    }
10544  SLJIT_FREE(functions);  SLJIT_FREE(functions);
10545  }  }

Legend:
Removed from v.1379  
changed lines
  Added in v.1452

  ViewVC Help
Powered by ViewVC 1.1.5