/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1310 by zherczeg, Sat Apr 6 06:51:09 2013 UTC revision 1442 by zherczeg, Sun Jan 12 17:17:29 2014 UTC
# Line 168  typedef struct jit_arguments { Line 168  typedef struct jit_arguments {
168    pcre_uchar *mark_ptr;    pcre_uchar *mark_ptr;
169    void *callout_data;    void *callout_data;
170    /* Everything else after. */    /* Everything else after. */
171      pcre_uint32 limit_match;
172    int real_offset_count;    int real_offset_count;
173    int offset_count;    int offset_count;
   int call_limit;  
174    pcre_uint8 notbol;    pcre_uint8 notbol;
175    pcre_uint8 noteol;    pcre_uint8 noteol;
176    pcre_uint8 notempty;    pcre_uint8 notempty;
# Line 179  typedef struct jit_arguments { Line 179  typedef struct jit_arguments {
179    
180  typedef struct executable_functions {  typedef struct executable_functions {
181    void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];    void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182      sljit_uw *read_only_data[JIT_NUMBER_OF_COMPILE_MODES];
183      sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184    PUBL(jit_callback) callback;    PUBL(jit_callback) callback;
185    void *userdata;    void *userdata;
186    pcre_uint32 top_bracket;    pcre_uint32 top_bracket;
187    sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];    pcre_uint32 limit_match;
188  } executable_functions;  } executable_functions;
189    
190  typedef struct jump_list {  typedef struct jump_list {
# Line 196  typedef struct stub_list { Line 198  typedef struct stub_list {
198    struct stub_list *next;    struct stub_list *next;
199  } stub_list;  } stub_list;
200    
201    typedef struct label_addr_list {
202      struct sljit_label *label;
203      sljit_uw *addr;
204      struct label_addr_list *next;
205    } label_addr_list;
206    
207  enum frame_types {  enum frame_types {
208    no_frame = -1,    no_frame = -1,
209    no_stack = -2    no_stack = -2
# Line 210  typedef int (SLJIT_CALL *jit_function)(j Line 218  typedef int (SLJIT_CALL *jit_function)(j
218    
219  /* The following structure is the key data type for the recursive  /* The following structure is the key data type for the recursive
220  code generator. It is allocated by compile_matchingpath, and contains  code generator. It is allocated by compile_matchingpath, and contains
221  the aguments for compile_backtrackingpath. Must be the first member  the arguments for compile_backtrackingpath. Must be the first member
222  of its descendants. */  of its descendants. */
223  typedef struct backtrack_common {  typedef struct backtrack_common {
224    /* Concatenation stack. */    /* Concatenation stack. */
# Line 305  typedef struct then_trap_backtrack { Line 313  typedef struct then_trap_backtrack {
313    int framesize;    int framesize;
314  } then_trap_backtrack;  } then_trap_backtrack;
315    
316  #define MAX_RANGE_SIZE 6  #define MAX_RANGE_SIZE 4
317    
318  typedef struct compiler_common {  typedef struct compiler_common {
319    /* The sljit ceneric compiler. */    /* The sljit ceneric compiler. */
# Line 314  typedef struct compiler_common { Line 322  typedef struct compiler_common {
322    pcre_uchar *start;    pcre_uchar *start;
323    /* Maps private data offset to each opcode. */    /* Maps private data offset to each opcode. */
324    sljit_si *private_data_ptrs;    sljit_si *private_data_ptrs;
325      /* This read-only data is available during runtime. */
326      sljit_uw *read_only_data;
327      /* The total size of the read-only data. */
328      sljit_uw read_only_data_size;
329      /* The next free entry of the read_only_data. */
330      sljit_uw *read_only_data_ptr;
331    /* Tells whether the capturing bracket is optimized. */    /* Tells whether the capturing bracket is optimized. */
332    pcre_uint8 *optimized_cbracket;    pcre_uint8 *optimized_cbracket;
333    /* Tells whether the starting offset is a target of then. */    /* Tells whether the starting offset is a target of then. */
# Line 348  typedef struct compiler_common { Line 362  typedef struct compiler_common {
362    sljit_sw lcc;    sljit_sw lcc;
363    /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */    /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
364    int mode;    int mode;
365      /* TRUE, when minlength is greater than 0. */
366      BOOL might_be_empty;
367    /* \K is found in the pattern. */    /* \K is found in the pattern. */
368    BOOL has_set_som;    BOOL has_set_som;
369    /* (*SKIP:arg) is found in the pattern. */    /* (*SKIP:arg) is found in the pattern. */
# Line 362  typedef struct compiler_common { Line 378  typedef struct compiler_common {
378    BOOL positive_assert;    BOOL positive_assert;
379    /* Newline control. */    /* Newline control. */
380    int nltype;    int nltype;
381      pcre_uint32 nlmax;
382      pcre_uint32 nlmin;
383    int newline;    int newline;
384    int bsr_nltype;    int bsr_nltype;
385      pcre_uint32 bsr_nlmax;
386      pcre_uint32 bsr_nlmin;
387    /* Dollar endonly. */    /* Dollar endonly. */
388    int endonly;    int endonly;
389    /* Tables. */    /* Tables. */
390    sljit_sw ctypes;    sljit_sw ctypes;
   int digits[2 + MAX_RANGE_SIZE];  
391    /* Named capturing brackets. */    /* Named capturing brackets. */
392    sljit_uw name_table;    pcre_uchar *name_table;
393    sljit_sw name_count;    sljit_sw name_count;
394    sljit_sw name_entry_size;    sljit_sw name_entry_size;
395    
# Line 380  typedef struct compiler_common { Line 399  typedef struct compiler_common {
399    struct sljit_label *forced_quit_label;    struct sljit_label *forced_quit_label;
400    struct sljit_label *accept_label;    struct sljit_label *accept_label;
401    stub_list *stubs;    stub_list *stubs;
402      label_addr_list *label_addrs;
403    recurse_entry *entries;    recurse_entry *entries;
404    recurse_entry *currententry;    recurse_entry *currententry;
405    jump_list *partialmatch;    jump_list *partialmatch;
# Line 403  typedef struct compiler_common { Line 423  typedef struct compiler_common {
423  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
424    BOOL use_ucp;    BOOL use_ucp;
425  #endif  #endif
 #ifndef COMPILE_PCRE32  
   jump_list *utfreadchar;  
 #endif  
426  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
427      jump_list *utfreadchar;
428      jump_list *utfreadchar16;
429    jump_list *utfreadtype8;    jump_list *utfreadtype8;
430  #endif  #endif
431  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
# Line 463  typedef struct compare_context { Line 482  typedef struct compare_context {
482  #define STACK_TOP     SLJIT_SCRATCH_REG2  #define STACK_TOP     SLJIT_SCRATCH_REG2
483  #define STACK_LIMIT   SLJIT_SAVED_REG3  #define STACK_LIMIT   SLJIT_SAVED_REG3
484  #define ARGUMENTS     SLJIT_SAVED_EREG1  #define ARGUMENTS     SLJIT_SAVED_EREG1
485  #define CALL_COUNT    SLJIT_SAVED_EREG2  #define COUNT_MATCH   SLJIT_SAVED_EREG2
486  #define RETURN_ADDR   SLJIT_TEMPORARY_EREG1  #define RETURN_ADDR   SLJIT_TEMPORARY_EREG1
487    
488  /* Local space layout. */  /* Local space layout. */
# Line 474  typedef struct compare_context { Line 493  typedef struct compare_context {
493  #define POSSESSIVE0      (2 * sizeof(sljit_sw))  #define POSSESSIVE0      (2 * sizeof(sljit_sw))
494  #define POSSESSIVE1      (3 * sizeof(sljit_sw))  #define POSSESSIVE1      (3 * sizeof(sljit_sw))
495  /* Max limit of recursions. */  /* Max limit of recursions. */
496  #define CALL_LIMIT       (4 * sizeof(sljit_sw))  #define LIMIT_MATCH      (4 * sizeof(sljit_sw))
497  /* The output vector is stored on the stack, and contains pointers  /* The output vector is stored on the stack, and contains pointers
498  to characters. The vector data is divided into two groups: the first  to characters. The vector data is divided into two groups: the first
499  group contains the start / end character pointers, and the second is  group contains the start / end character pointers, and the second is
500  the start pointers when the end of the capturing group has not yet reached. */  the start pointers when the end of the capturing group has not yet reached. */
501  #define OVECTOR_START    (common->ovector_start)  #define OVECTOR_START    (common->ovector_start)
502  #define OVECTOR(i)       (OVECTOR_START + (i) * sizeof(sljit_sw))  #define OVECTOR(i)       (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
503  #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * sizeof(sljit_sw))  #define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
504  #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])  #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
505    
506  #if defined COMPILE_PCRE8  #if defined COMPILE_PCRE8
# Line 523  the start pointers when the end of the c Line 542  the start pointers when the end of the c
542  #define GET_LOCAL_BASE(dst, dstw, offset) \  #define GET_LOCAL_BASE(dst, dstw, offset) \
543    sljit_get_local_base(compiler, (dst), (dstw), (offset))    sljit_get_local_base(compiler, (dst), (dstw), (offset))
544    
545    #define READ_CHAR_MAX 0x7fffffff
546    
547  static pcre_uchar* bracketend(pcre_uchar* cc)  static pcre_uchar* bracketend(pcre_uchar* cc)
548  {  {
549  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));  SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
# Line 532  cc += 1 + LINK_SIZE; Line 553  cc += 1 + LINK_SIZE;
553  return cc;  return cc;
554  }  }
555    
556    static int no_alternatives(pcre_uchar* cc)
557    {
558    int count = 0;
559    SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
560    do
561      {
562      cc += GET(cc, 1);
563      count++;
564      }
565    while (*cc == OP_ALT);
566    SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
567    return count;
568    }
569    
570    static int ones_in_half_byte[16] = {
571      /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
572      /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
573    };
574    
575  /* Functions whose might need modification for all new supported opcodes:  /* Functions whose might need modification for all new supported opcodes:
576   next_opcode   next_opcode
577   check_opcode_types   check_opcode_types
# Line 584  switch(*cc) Line 624  switch(*cc)
624    case OP_CRMINQUERY:    case OP_CRMINQUERY:
625    case OP_CRRANGE:    case OP_CRRANGE:
626    case OP_CRMINRANGE:    case OP_CRMINRANGE:
627      case OP_CRPOSSTAR:
628      case OP_CRPOSPLUS:
629      case OP_CRPOSQUERY:
630      case OP_CRPOSRANGE:
631    case OP_CLASS:    case OP_CLASS:
632    case OP_NCLASS:    case OP_NCLASS:
633    case OP_REF:    case OP_REF:
634    case OP_REFI:    case OP_REFI:
635      case OP_DNREF:
636      case OP_DNREFI:
637    case OP_RECURSE:    case OP_RECURSE:
638    case OP_CALLOUT:    case OP_CALLOUT:
639    case OP_ALT:    case OP_ALT:
# Line 613  switch(*cc) Line 659  switch(*cc)
659    case OP_SCBRAPOS:    case OP_SCBRAPOS:
660    case OP_SCOND:    case OP_SCOND:
661    case OP_CREF:    case OP_CREF:
662    case OP_NCREF:    case OP_DNCREF:
663    case OP_RREF:    case OP_RREF:
664    case OP_NRREF:    case OP_DNRREF:
665    case OP_DEF:    case OP_DEF:
666    case OP_BRAZERO:    case OP_BRAZERO:
667    case OP_BRAMINZERO:    case OP_BRAMINZERO:
# Line 735  switch(*cc) Line 781  switch(*cc)
781    
782  static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)  static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
783  {  {
784  pcre_uchar *name;  int count;
785  pcre_uchar *name2;  pcre_uchar *slot;
 unsigned int cbra_index;  
 int i;  
786    
787  /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */  /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
788  while (cc < ccend)  while (cc < ccend)
# Line 747  while (cc < ccend) Line 791  while (cc < ccend)
791      {      {
792      case OP_SET_SOM:      case OP_SET_SOM:
793      common->has_set_som = TRUE;      common->has_set_som = TRUE;
794        common->might_be_empty = TRUE;
795      cc += 1;      cc += 1;
796      break;      break;
797    
# Line 756  while (cc < ccend) Line 801  while (cc < ccend)
801      cc += 1 + IMM2_SIZE;      cc += 1 + IMM2_SIZE;
802      break;      break;
803    
804        case OP_BRA:
805        case OP_CBRA:
806        case OP_SBRA:
807        case OP_SCBRA:
808        count = no_alternatives(cc);
809        if (count > 4)
810          common->read_only_data_size += count * sizeof(sljit_uw);
811        cc += 1 + LINK_SIZE + (*cc == OP_CBRA || *cc == OP_SCBRA ? IMM2_SIZE : 0);
812        break;
813    
814      case OP_CBRAPOS:      case OP_CBRAPOS:
815      case OP_SCBRAPOS:      case OP_SCBRAPOS:
816      common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;      common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
# Line 772  while (cc < ccend) Line 827  while (cc < ccend)
827      break;      break;
828    
829      case OP_CREF:      case OP_CREF:
830      i = GET2(cc, 1);      common->optimized_cbracket[GET2(cc, 1)] = 0;
     common->optimized_cbracket[i] = 0;  
831      cc += 1 + IMM2_SIZE;      cc += 1 + IMM2_SIZE;
832      break;      break;
833    
834      case OP_NCREF:      case OP_DNREF:
835      cbra_index = GET2(cc, 1);      case OP_DNREFI:
836      name = (pcre_uchar *)common->name_table;      case OP_DNCREF:
837      name2 = name;      count = GET2(cc, 1 + IMM2_SIZE);
838      for (i = 0; i < common->name_count; i++)      slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
839        {      while (count-- > 0)
840        if (GET2(name, 0) == cbra_index) break;        {
841        name += common->name_entry_size;        common->optimized_cbracket[GET2(slot, 0)] = 0;
842        }        slot += common->name_entry_size;
     SLJIT_ASSERT(i != common->name_count);  
   
     for (i = 0; i < common->name_count; i++)  
       {  
       if (STRCMP_UC_UC(name2 + IMM2_SIZE, name + IMM2_SIZE) == 0)  
         common->optimized_cbracket[GET2(name2, 0)] = 0;  
       name2 += common->name_entry_size;  
843        }        }
844      cc += 1 + IMM2_SIZE;      cc += 1 + 2 * IMM2_SIZE;
845      break;      break;
846    
847      case OP_RECURSE:      case OP_RECURSE:
# Line 2022  while (list_item) Line 2069  while (list_item)
2069  common->stubs = NULL;  common->stubs = NULL;
2070  }  }
2071    
2072  static SLJIT_INLINE void decrease_call_count(compiler_common *common)  static void add_label_addr(compiler_common *common)
2073    {
2074    DEFINE_COMPILER;
2075    label_addr_list *label_addr;
2076    
2077    label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2078    if (label_addr == NULL)
2079      return;
2080    label_addr->label = LABEL();
2081    label_addr->addr = common->read_only_data_ptr;
2082    label_addr->next = common->label_addrs;
2083    common->label_addrs = label_addr;
2084    common->read_only_data_ptr++;
2085    }
2086    
2087    static SLJIT_INLINE void count_match(compiler_common *common)
2088  {  {
2089  DEFINE_COMPILER;  DEFINE_COMPILER;
2090    
2091  OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);  OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2092  add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));  add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2093  }  }
2094    
# Line 2373  return (bit < 256) ? ((0 << 8) | bit) : Line 2435  return (bit < 256) ? ((0 << 8) | bit) :
2435    
2436  static void check_partial(compiler_common *common, BOOL force)  static void check_partial(compiler_common *common, BOOL force)
2437  {  {
2438  /* Checks whether a partial matching is occured. Does not modify registers. */  /* Checks whether a partial matching is occurred. Does not modify registers. */
2439  DEFINE_COMPILER;  DEFINE_COMPILER;
2440  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
2441    
# Line 2460  else Line 2522  else
2522  JUMPHERE(jump);  JUMPHERE(jump);
2523  }  }
2524    
2525  static void read_char(compiler_common *common)  static void peek_char(compiler_common *common, pcre_uint32 max)
2526  {  {
2527  /* Reads the character into TMP1, updates STR_PTR.  /* Reads the character into TMP1, keeps STR_PTR.
2528  Does not check STR_END. TMP2 Destroyed. */  Does not check STR_END. TMP2 Destroyed. */
2529  DEFINE_COMPILER;  DEFINE_COMPILER;
2530  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2531  struct sljit_jump *jump;  struct sljit_jump *jump;
2532  #endif  #endif
2533    
2534    SLJIT_UNUSED_ARG(max);
2535    
2536  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2537  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2538  if (common->utf)  if (common->utf)
2539    {    {
2540  #if defined COMPILE_PCRE8    if (max < 128) return;
2541    
2542    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2543  #elif defined COMPILE_PCRE16    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
   jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);  
 #endif /* COMPILE_PCRE[8|16] */  
2544    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2545      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2546    JUMPHERE(jump);    JUMPHERE(jump);
2547    }    }
2548  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2549    
2550    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2551    if (common->utf)
2552      {
2553      if (max < 0xd800) return;
2554    
2555      OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2556      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2557      /* TMP2 contains the high surrogate. */
2558      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2559      OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2560      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2561      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2562      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2563      JUMPHERE(jump);
2564      }
2565    #endif
2566    }
2567    
2568    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2569    
2570    static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
2571    {
2572    /* Tells whether the character codes below 128 are enough
2573    to determine a match. */
2574    const pcre_uint8 value = nclass ? 0xff : 0;
2575    const pcre_uint8* end = bitset + 32;
2576    
2577    bitset += 16;
2578    do
2579      {
2580      if (*bitset++ != value)
2581        return FALSE;
2582      }
2583    while (bitset < end);
2584    return TRUE;
2585    }
2586    
2587    static void read_char7_type(compiler_common *common, BOOL full_read)
2588    {
2589    /* Reads the precise character type of a character into TMP1, if the character
2590    is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2591    full_read argument tells whether characters above max are accepted or not. */
2592    DEFINE_COMPILER;
2593    struct sljit_jump *jump;
2594    
2595    SLJIT_ASSERT(common->utf);
2596    
2597    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2598  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2599    
2600    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2601    
2602    if (full_read)
2603      {
2604      jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2605      OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2606      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2607      JUMPHERE(jump);
2608      }
2609  }  }
2610    
2611  static void peek_char(compiler_common *common)  #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2612    
2613    static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2614  {  {
2615  /* Reads the character into TMP1, keeps STR_PTR.  /* Reads the precise value of a character into TMP1, if the character is
2616  Does not check STR_END. TMP2 Destroyed. */  between min and max (c >= min && c <= max). Otherwise it returns with a value
2617    outside the range. Does not check STR_END. */
2618  DEFINE_COMPILER;  DEFINE_COMPILER;
2619  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2620  struct sljit_jump *jump;  struct sljit_jump *jump;
2621  #endif  #endif
2622    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2623    struct sljit_jump *jump2;
2624    #endif
2625    
2626  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);  SLJIT_UNUSED_ARG(update_str_ptr);
2627  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  SLJIT_UNUSED_ARG(min);
2628    SLJIT_UNUSED_ARG(max);
2629    SLJIT_ASSERT(min <= max);
2630    
2631    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2632    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2633    
2634    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2635  if (common->utf)  if (common->utf)
2636    {    {
2637  #if defined COMPILE_PCRE8    if (max < 128 && !update_str_ptr) return;
2638    
2639    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2640  #elif defined COMPILE_PCRE16    if (min >= 0x10000)
2641    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);      {
2642  #endif /* COMPILE_PCRE[8|16] */      OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2643    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));      if (update_str_ptr)
2644    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);        OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2645        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2646        jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2647        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2648        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2649        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2650        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2651        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2652        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2653        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2654        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2655        if (!update_str_ptr)
2656          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2657        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2658        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2659        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2660        JUMPHERE(jump2);
2661        if (update_str_ptr)
2662          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2663        }
2664      else if (min >= 0x800 && max <= 0xffff)
2665        {
2666        OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2667        if (update_str_ptr)
2668          OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2669        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2670        jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2671        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2672        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2673        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2674        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2675        if (!update_str_ptr)
2676          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2677        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2678        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2679        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2680        JUMPHERE(jump2);
2681        if (update_str_ptr)
2682          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2683        }
2684      else if (max >= 0x800)
2685        add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2686      else if (max < 128)
2687        {
2688        OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2689        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2690        }
2691      else
2692        {
2693        OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2694        if (!update_str_ptr)
2695          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2696        else
2697          OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2698        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2699        OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2700        OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2701        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2702        if (update_str_ptr)
2703          OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2704        }
2705    JUMPHERE(jump);    JUMPHERE(jump);
2706    }    }
2707  #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */  #endif
2708    
2709    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2710    if (common->utf)
2711      {
2712      if (max >= 0x10000)
2713        {
2714        OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2715        jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2716        /* TMP2 contains the high surrogate. */
2717        OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2718        OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2719        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2720        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2721        OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2722        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2723        JUMPHERE(jump);
2724        return;
2725        }
2726    
2727      if (max < 0xd800 && !update_str_ptr) return;
2728    
2729      /* Skip low surrogate if necessary. */
2730      OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2731      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2732      if (update_str_ptr)
2733        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2734      if (max >= 0xd800)
2735        OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2736      JUMPHERE(jump);
2737      }
2738    #endif
2739    }
2740    
2741    static SLJIT_INLINE void read_char(compiler_common *common)
2742    {
2743    read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2744  }  }
2745    
2746  static void read_char8_type(compiler_common *common)  static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2747  {  {
2748  /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */  /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2749  DEFINE_COMPILER;  DEFINE_COMPILER;
2750  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2751  struct sljit_jump *jump;  struct sljit_jump *jump;
2752  #endif  #endif
2753    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2754    struct sljit_jump *jump2;
2755    #endif
2756    
2757  #ifdef SUPPORT_UTF  SLJIT_UNUSED_ARG(update_str_ptr);
2758    
2759    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2760    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2761    
2762    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2763  if (common->utf)  if (common->utf)
2764    {    {
   OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);  
   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  
 #if defined COMPILE_PCRE8  
2765    /* This can be an extra read in some situations, but hopefully    /* This can be an extra read in some situations, but hopefully
2766    it is needed in most cases. */    it is needed in most cases. */
2767    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2768    jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);    jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2769    add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));    if (!update_str_ptr)
2770    JUMPHERE(jump);      {
2771  #elif defined COMPILE_PCRE16      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2772    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2773    jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);      OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2774    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);      OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2775    JUMPHERE(jump);      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2776    /* Skip low surrogate if necessary. */      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2777    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);      OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2778    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);      jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2779    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2780    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);      JUMPHERE(jump2);
2781    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);      }
2782  #elif defined COMPILE_PCRE32    else
2783    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);      add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
   jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  
   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  
2784    JUMPHERE(jump);    JUMPHERE(jump);
 #endif /* COMPILE_PCRE[8|16|32] */  
2785    return;    return;
2786    }    }
2787  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2788  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);  
2789  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  #if !defined COMPILE_PCRE8
 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32  
2790  /* The ctypes array contains only 256 values. */  /* The ctypes array contains only 256 values. */
2791  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2792  jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2793  #endif  #endif
2794  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2795  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if !defined COMPILE_PCRE8
2796  JUMPHERE(jump);  JUMPHERE(jump);
2797  #endif  #endif
2798    
2799    #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2800    if (common->utf && update_str_ptr)
2801      {
2802      /* Skip low surrogate if necessary. */
2803      OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2804      jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2805      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2806      JUMPHERE(jump);
2807      }
2808    #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2809  }  }
2810    
2811  static void skip_char_back(compiler_common *common)  static void skip_char_back(compiler_common *common)
# Line 2598  if (common->utf) Line 2843  if (common->utf)
2843  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2844  }  }
2845    
2846  static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)  static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2847  {  {
2848  /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */  /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2849  DEFINE_COMPILER;  DEFINE_COMPILER;
2850    struct sljit_jump *jump;
2851    
2852  if (nltype == NLTYPE_ANY)  if (nltype == NLTYPE_ANY)
2853    {    {
2854    add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2855    add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2856    }    }
2857  else if (nltype == NLTYPE_ANYCRLF)  else if (nltype == NLTYPE_ANYCRLF)
2858    {    {
2859    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);    if (jumpifmatch)
2860    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);      {
2861    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2862    OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2863    add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));      }
2864      else
2865        {
2866        jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2867        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2868        JUMPHERE(jump);
2869        }
2870    }    }
2871  else  else
2872    {    {
2873    SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);    SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2874    add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));    add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2875    }    }
2876  }  }
2877    
# Line 2629  else Line 2881  else
2881  static void do_utfreadchar(compiler_common *common)  static void do_utfreadchar(compiler_common *common)
2882  {  {
2883  /* Fast decoding a UTF-8 character. TMP1 contains the first byte  /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2884  of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */  of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2885  DEFINE_COMPILER;  DEFINE_COMPILER;
2886  struct sljit_jump *jump;  struct sljit_jump *jump;
2887    
2888  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);  sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2889    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2890    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2891    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2892    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2893    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2894    
2895  /* Searching for the first zero. */  /* Searching for the first zero. */
2896  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2897  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2898  /* Two byte sequence. */  /* Two byte sequence. */
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  
2899  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2900  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2901    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2902    
2903    JUMPHERE(jump);
2904    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2905    OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2906  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2907  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2908  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
 JUMPHERE(jump);  
2909    
2910  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2911  jump = JUMP(SLJIT_C_NOT_ZERO);  jump = JUMP(SLJIT_C_NOT_ZERO);
2912  /* Three byte sequence. */  /* Three byte sequence. */
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  
 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);  
 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));  
2913  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2914  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));  
2915  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
 JUMPHERE(jump);  
2916    
2917  /* Four byte sequence. */  /* Four byte sequence. */
2918  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  JUMPHERE(jump);
2919  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2920  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);  OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2921    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2922    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2923  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);  
2924  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2925  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2926    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2927    }
2928    
2929    static void do_utfreadchar16(compiler_common *common)
2930    {
2931    /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2932    of the character (>= 0xc0). Return value in TMP1. */
2933    DEFINE_COMPILER;
2934    struct sljit_jump *jump;
2935    
2936    sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2937    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2938    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2939    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2940  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  
2941  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2942  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));  
2943  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));  /* Searching for the first zero. */
2944    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2945    jump = JUMP(SLJIT_C_NOT_ZERO);
2946    /* Two byte sequence. */
2947    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2948    sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2949    
2950    JUMPHERE(jump);
2951    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
2952    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_NOT_ZERO);
2953    /* This code runs only in 8 bit mode. No need to shift the value. */
2954    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2955    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2956    OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2957    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2958  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2959  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2960  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));  /* Three byte sequence. */
2961    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2962  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2963  }  }
2964    
# Line 2700  jump = JUMP(SLJIT_C_NOT_ZERO); Line 2978  jump = JUMP(SLJIT_C_NOT_ZERO);
2978  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2979  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2980  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2981    /* The upper 5 bits are known at this point. */
2982    compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
2983  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2984  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2985  OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);  OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);  
2986  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);  OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2987  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2988    
2989  JUMPHERE(compare);  JUMPHERE(compare);
2990  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2991  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
 JUMPHERE(jump);  
2992    
2993  /* We only have types for characters less than 256. */  /* We only have types for characters less than 256. */
 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);  
 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);  
 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
 }  
   
 #elif defined COMPILE_PCRE16  
   
 static void do_utfreadchar(compiler_common *common)  
 {  
 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char  
 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */  
 DEFINE_COMPILER;  
 struct sljit_jump *jump;  
   
 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);  
 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);  
 /* Do nothing, only return. */  
 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  
   
2994  JUMPHERE(jump);  JUMPHERE(jump);
2995  /* Combine two 16 bit characters. */  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2996  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2997  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);  
 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);  
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);  
 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);  
 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));  
 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);  
2998  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2999  }  }
3000    
3001  #endif /* COMPILE_PCRE[8|16] */  #endif /* COMPILE_PCRE8 */
3002    
3003  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
3004    
# Line 2821  if (firstline) Line 3073  if (firstline)
3073      mainloop = LABEL();      mainloop = LABEL();
3074      /* Continual stores does not cause data dependency. */      /* Continual stores does not cause data dependency. */
3075      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
3076      read_char(common);      read_char_range(common, common->nlmin, common->nlmax, TRUE);
3077      check_newlinechar(common, common->nltype, &newline, TRUE);      check_newlinechar(common, common->nltype, &newline, TRUE);
3078      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3079      JUMPHERE(end);      JUMPHERE(end);
# Line 2897  if (newlinecheck) Line 3149  if (newlinecheck)
3149  return mainloop;  return mainloop;
3150  }  }
3151    
3152  #define MAX_N_CHARS 3  static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, int max_chars)
   
 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)  
3153  {  {
3154  DEFINE_COMPILER;  /* Recursive function, which scans prefix literals. */
3155  struct sljit_label *start;  int len, repeat, len_save, consumed = 0;
3156  struct sljit_jump *quit;  pcre_uint32 chr, mask;
3157  pcre_uint32 chars[MAX_N_CHARS * 2];  pcre_uchar *alternative, *cc_save, *oc;
3158  pcre_uchar *cc = common->start + 1 + LINK_SIZE;  BOOL last, any, caseless;
3159  int location = 0;  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3160  pcre_int32 len, c, bit, caseless;  pcre_uchar othercase[8];
3161  int must_stop;  #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3162    pcre_uchar othercase[2];
3163  /* We do not support alternatives now. */  #else
3164  if (*(common->start + GET(common->start, 1)) == OP_ALT)  pcre_uchar othercase[1];
3165    return FALSE;  #endif
3166    
3167    repeat = 1;
3168  while (TRUE)  while (TRUE)
3169    {    {
3170    caseless = 0;    last = TRUE;
3171    must_stop = 1;    any = FALSE;
3172    switch(*cc)    caseless = FALSE;
3173      switch (*cc)
3174      {      {
     case OP_CHAR:  
     must_stop = 0;  
     cc++;  
     break;  
   
3175      case OP_CHARI:      case OP_CHARI:
3176      caseless = 1;      caseless = TRUE;
3177      must_stop = 0;      case OP_CHAR:
3178        last = FALSE;
3179      cc++;      cc++;
3180      break;      break;
3181    
# Line 2946  while (TRUE) Line 3194  while (TRUE)
3194      cc++;      cc++;
3195      continue;      continue;
3196    
3197        case OP_ASSERT:
3198        case OP_ASSERT_NOT:
3199        case OP_ASSERTBACK:
3200        case OP_ASSERTBACK_NOT:
3201        cc = bracketend(cc);
3202        continue;
3203    
3204      case OP_PLUS:      case OP_PLUS:
3205      case OP_MINPLUS:      case OP_MINPLUS:
3206      case OP_POSPLUS:      case OP_POSPLUS:
3207      cc++;      cc++;
3208      break;      break;
3209    
3210        case OP_EXACTI:
3211        caseless = TRUE;
3212      case OP_EXACT:      case OP_EXACT:
3213        repeat = GET2(cc, 1);
3214        last = FALSE;
3215      cc += 1 + IMM2_SIZE;      cc += 1 + IMM2_SIZE;
3216      break;      break;
3217    
3218      case OP_PLUSI:      case OP_PLUSI:
3219      case OP_MINPLUSI:      case OP_MINPLUSI:
3220      case OP_POSPLUSI:      case OP_POSPLUSI:
3221      caseless = 1;      caseless = TRUE;
3222      cc++;      cc++;
3223      break;      break;
3224    
3225      case OP_EXACTI:      case OP_KET:
3226      caseless = 1;      cc += 1 + LINK_SIZE;
3227      cc += 1 + IMM2_SIZE;      continue;
3228    
3229        case OP_ALT:
3230        cc += GET(cc, 1);
3231        continue;
3232    
3233        case OP_ONCE:
3234        case OP_ONCE_NC:
3235        case OP_BRA:
3236        case OP_BRAPOS:
3237        case OP_CBRA:
3238        case OP_CBRAPOS:
3239        alternative = cc + GET(cc, 1);
3240        while (*alternative == OP_ALT)
3241          {
3242          max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars);
3243          if (max_chars == 0)
3244            return consumed;
3245          alternative += GET(alternative, 1);
3246          }
3247    
3248        if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3249          cc += IMM2_SIZE;
3250        cc += 1 + LINK_SIZE;
3251        continue;
3252    
3253        case OP_CLASS:
3254    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3255        if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3256    #endif
3257        any = TRUE;
3258        cc += 1 + 32 / sizeof(pcre_uchar);
3259      break;      break;
3260    
3261      default:      case OP_NCLASS:
3262      must_stop = 2;  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3263        if (common->utf) return consumed;
3264    #endif
3265        any = TRUE;
3266        cc += 1 + 32 / sizeof(pcre_uchar);
3267      break;      break;
3268    
3269    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3270        case OP_XCLASS:
3271    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3272        if (common->utf) return consumed;
3273    #endif
3274        any = TRUE;
3275        cc += GET(cc, 1);
3276        break;
3277    #endif
3278    
3279        case OP_DIGIT:
3280    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3281        if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3282          return consumed;
3283    #endif
3284        any = TRUE;
3285        cc++;
3286        break;
3287    
3288        case OP_WHITESPACE:
3289    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3290        if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3291          return consumed;
3292    #endif
3293        any = TRUE;
3294        cc++;
3295        break;
3296    
3297        case OP_WORDCHAR:
3298    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3299        if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3300          return consumed;
3301    #endif
3302        any = TRUE;
3303        cc++;
3304        break;
3305    
3306        case OP_NOT_DIGIT:
3307        case OP_NOT_WHITESPACE:
3308        case OP_NOT_WORDCHAR:
3309        case OP_ANY:
3310        case OP_ALLANY:
3311    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3312        if (common->utf) return consumed;
3313    #endif
3314        any = TRUE;
3315        cc++;
3316        break;
3317    
3318    #ifdef SUPPORT_UCP
3319        case OP_NOTPROP:
3320        case OP_PROP:
3321    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3322        if (common->utf) return consumed;
3323    #endif
3324        any = TRUE;
3325        cc += 1 + 2;
3326        break;
3327    #endif
3328    
3329        case OP_TYPEEXACT:
3330        repeat = GET2(cc, 1);
3331        cc += 1 + IMM2_SIZE;
3332        continue;
3333    
3334        default:
3335        return consumed;
3336      }      }
3337    
3338    if (must_stop == 2)    if (any)
3339        break;      {
3340    #if defined COMPILE_PCRE8
3341        mask = 0xff;
3342    #elif defined COMPILE_PCRE16
3343        mask = 0xffff;
3344    #elif defined COMPILE_PCRE32
3345        mask = 0xffffffff;
3346    #else
3347        SLJIT_ASSERT_STOP();
3348    #endif
3349    
3350        do
3351          {
3352          chars[0] = mask;
3353          chars[1] = mask;
3354    
3355          consumed++;
3356          if (--max_chars == 0)
3357            return consumed;
3358          chars += 2;
3359          }
3360        while (--repeat > 0);
3361    
3362        repeat = 1;
3363        continue;
3364        }
3365    
3366    len = 1;    len = 1;
3367  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3368    if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);    if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3369  #endif  #endif
3370    
3371    if (caseless && char_has_othercase(common, cc))    if (caseless && char_has_othercase(common, cc))
3372      {      {
3373      caseless = char_get_othercase_bit(common, cc);  #ifdef SUPPORT_UTF
3374      if (caseless == 0)      if (common->utf)
3375        return FALSE;        {
3376  #ifdef COMPILE_PCRE8        GETCHAR(chr, cc);
3377      caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));        if (PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3378  #else          return consumed;
3379      if ((caseless & 0x100) != 0)        }
       caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));  
3380      else      else
       caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));  
3381  #endif  #endif
3382          {
3383          chr = *cc;
3384          othercase[0] = TABLE_GET(chr, common->fcc, chr);
3385          }
3386      }      }
3387    else    else
3388      caseless = 0;      caseless = FALSE;
3389    
3390    while (len > 0 && location < MAX_N_CHARS * 2)    len_save = len;
3391      cc_save = cc;
3392      while (TRUE)
3393      {      {
3394      c = *cc;      oc = othercase;
3395      bit = 0;      do
     if (len == (caseless & 0xff))  
3396        {        {
3397        bit = caseless >> 8;        chr = *cc;
3398        c |= bit;  #ifdef COMPILE_PCRE32
3399          if (SLJIT_UNLIKELY(chr == NOTACHAR))
3400            return consumed;
3401    #endif
3402          mask = 0;
3403          if (caseless)
3404            {
3405            mask = *cc ^ *oc;
3406            chr |= mask;
3407            }
3408    
3409    #ifdef COMPILE_PCRE32
3410          if (chars[0] == NOTACHAR && chars[1] == 0)
3411    #else
3412          if (chars[0] == NOTACHAR)
3413    #endif
3414            {
3415            chars[0] = chr;
3416            chars[1] = mask;
3417            }
3418          else
3419            {
3420            mask |= chars[0] ^ chr;
3421            chr |= mask;
3422            chars[0] = chr;
3423            chars[1] |= mask;
3424            }
3425    
3426          len--;
3427          consumed++;
3428          if (--max_chars == 0)
3429            return consumed;
3430          chars += 2;
3431          cc++;
3432          oc++;
3433        }        }
3434        while (len > 0);
3435    
3436      chars[location] = c;      if (--repeat == 0)
3437      chars[location + 1] = bit;        break;
3438    
3439      len--;      len = len_save;
3440      location += 2;      cc = cc_save;
     cc++;  
3441      }      }
3442    
3443    if (location >= MAX_N_CHARS * 2 || must_stop != 0)    repeat = 1;
3444      if (last)
3445        return consumed;
3446      }
3447    }
3448    
3449    #define MAX_N_CHARS 16
3450    
3451    static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3452    {
3453    DEFINE_COMPILER;
3454    struct sljit_label *start;
3455    struct sljit_jump *quit;
3456    pcre_uint32 chars[MAX_N_CHARS * 2];
3457    pcre_uint8 ones[MAX_N_CHARS];
3458    int offsets[3];
3459    pcre_uint32 mask, byte;
3460    int i, max, from;
3461    int range_right = -1, range_len = 4 - 1;
3462    sljit_ub *update_table = NULL;
3463    BOOL in_range;
3464    
3465    /* This is even TRUE, if both are NULL. */
3466    SLJIT_ASSERT(common->read_only_data_ptr == common->read_only_data);
3467    
3468    for (i = 0; i < MAX_N_CHARS; i++)
3469      {
3470      chars[i << 1] = NOTACHAR;
3471      chars[(i << 1) + 1] = 0;
3472      }
3473    
3474    max = scan_prefix(common, common->start, chars, MAX_N_CHARS);
3475    
3476    if (max <= 1)
3477      return FALSE;
3478    
3479    for (i = 0; i < max; i++)
3480      {
3481      mask = chars[(i << 1) + 1];
3482      ones[i] = ones_in_half_byte[mask & 0xf];
3483      mask >>= 4;
3484      while (mask != 0)
3485        {
3486        ones[i] += ones_in_half_byte[mask & 0xf];
3487        mask >>= 4;
3488        }
3489      }
3490    
3491    in_range = FALSE;
3492    for (i = 0; i <= max; i++)
3493      {
3494      if (i < max && ones[i] <= 1)
3495        {
3496        if (!in_range)
3497          {
3498          in_range = TRUE;
3499          from = i;
3500          }
3501        }
3502      else if (in_range)
3503        {
3504        if ((i - from) > range_len)
3505          {
3506          range_len = i - from;
3507          range_right = i - 1;
3508          }
3509        in_range = FALSE;
3510        }
3511      }
3512    
3513    if (range_right >= 0)
3514      {
3515      /* Since no data is consumed (see the assert in the beginning
3516      of this function), this space can be reallocated. */
3517      if (common->read_only_data)
3518        SLJIT_FREE(common->read_only_data);
3519    
3520      common->read_only_data_size += 256;
3521      common->read_only_data = (sljit_uw *)SLJIT_MALLOC(common->read_only_data_size);
3522      if (common->read_only_data == NULL)
3523        return TRUE;
3524    
3525      update_table = (sljit_ub *)common->read_only_data;
3526      common->read_only_data_ptr = (sljit_uw *)(update_table + 256);
3527      memset(update_table, IN_UCHARS(range_len), 256);
3528    
3529      for (i = 0; i < range_len; i++)
3530        {
3531        byte = chars[(range_right - i) << 1] & 0xff;
3532        if (update_table[byte] > IN_UCHARS(i))
3533          update_table[byte] = IN_UCHARS(i);
3534        mask = chars[((range_right - i) << 1) + 1] & 0xff;
3535        if (mask != 0)
3536          {
3537          byte ^= mask;
3538          if (update_table[byte] > IN_UCHARS(i))
3539            update_table[byte] = IN_UCHARS(i);
3540          }
3541        }
3542      }
3543    
3544    offsets[0] = -1;
3545    /* Scan forward. */
3546    for (i = 0; i < max; i++)
3547      if (ones[i] <= 2) {
3548        offsets[0] = i;
3549      break;      break;
3550    }    }
3551    
3552  /* At least two characters are required. */  if (offsets[0] == -1)
3553  if (location < 2 * 2)    return FALSE;
3554      return FALSE;  
3555    /* Scan backward. */
3556    offsets[1] = -1;
3557    for (i = max - 1; i > offsets[0]; i--)
3558      if (ones[i] <= 2 && i != range_right)
3559        {
3560        offsets[1] = i;
3561        break;
3562        }
3563    
3564    /* This case is handled better by fast_forward_first_char. */
3565    if (offsets[1] == -1 && offsets[0] == 0)
3566      return FALSE;
3567    
3568    offsets[2] = -1;
3569    if (offsets[1] >= 0 && range_right == -1)
3570      {
3571      /* Scan from middle. */
3572      for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3573        if (ones[i] <= 2)
3574          {
3575          offsets[2] = i;
3576          break;
3577          }
3578    
3579      if (offsets[2] == -1)
3580        {
3581        for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3582          if (ones[i] <= 2)
3583            {
3584            offsets[2] = i;
3585            break;
3586            }
3587        }
3588      }
3589    
3590    SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3591    SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3592    
3593    chars[0] = chars[offsets[0] << 1];
3594    chars[1] = chars[(offsets[0] << 1) + 1];
3595    if (offsets[2] >= 0)
3596      {
3597      chars[2] = chars[offsets[2] << 1];
3598      chars[3] = chars[(offsets[2] << 1) + 1];
3599      }
3600    if (offsets[1] >= 0)
3601      {
3602      chars[4] = chars[offsets[1] << 1];
3603      chars[5] = chars[(offsets[1] << 1) + 1];
3604      }
3605    
3606    max -= 1;
3607  if (firstline)  if (firstline)
3608    {    {
3609    SLJIT_ASSERT(common->first_line_end != 0);    SLJIT_ASSERT(common->first_line_end != 0);
3610      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3611    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3612    OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));    OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3613      quit = CMP(SLJIT_C_LESS_EQUAL, STR_END, 0, TMP1, 0);
3614      OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
3615      JUMPHERE(quit);
3616    }    }
3617  else  else
3618    OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));    OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3619    
3620    #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3621    if (range_right >= 0)
3622      OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
3623    #endif
3624    
3625  start = LABEL();  start = LABEL();
3626  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3627    
3628  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));  if (range_right >= 0)
3629  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));    {
3630    #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
3631      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
3632    #else
3633      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
3634    #endif
3635    
3636    #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3637      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
3638    #else
3639      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
3640    #endif
3641      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3642      CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
3643      }
3644    
3645    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3646    if (offsets[1] >= 0)
3647      OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3648  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3649    
3650  if (chars[1] != 0)  if (chars[1] != 0)
3651    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3652  CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);  CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3653  if (location > 2 * 2)  if (offsets[2] >= 0)
3654    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3655  if (chars[3] != 0)  
3656    OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);  if (offsets[1] >= 0)
 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);  
 if (location > 2 * 2)  
3657    {    {
3658    if (chars[5] != 0)    if (chars[5] != 0)
3659      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);      OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3660    CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);    CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3661      }
3662    
3663    if (offsets[2] >= 0)
3664      {
3665      if (chars[3] != 0)
3666        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3667      CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3668    }    }
3669  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3670    
3671  JUMPHERE(quit);  JUMPHERE(quit);
3672    
3673  if (firstline)  if (firstline)
3674      {
3675      if (range_right >= 0)
3676        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3677    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3678      if (range_right >= 0)
3679        {
3680        quit = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3681        OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
3682        JUMPHERE(quit);
3683        }
3684      }
3685  else  else
3686    OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));    OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3687  return TRUE;  return TRUE;
3688  }  }
3689    
# Line 3180  firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_ Line 3803  firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_
3803  skip_char_back(common);  skip_char_back(common);
3804    
3805  loop = LABEL();  loop = LABEL();
3806  read_char(common);  read_char_range(common, common->nlmin, common->nlmax, TRUE);
3807  lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);  lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3808  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3809    foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);    foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
# Line 3209  if (firstline) Line 3832  if (firstline)
3832    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3833  }  }
3834    
3835  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3836    
3837  static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)  static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3838  {  {
3839  DEFINE_COMPILER;  DEFINE_COMPILER;
3840  struct sljit_label *start;  struct sljit_label *start;
3841  struct sljit_jump *quit;  struct sljit_jump *quit;
3842  struct sljit_jump *found = NULL;  struct sljit_jump *found = NULL;
3843  jump_list *matches = NULL;  jump_list *matches = NULL;
 pcre_uint8 inverted_start_bits[32];  
 int i;  
3844  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
3845  struct sljit_jump *jump;  struct sljit_jump *jump;
3846  #endif  #endif
3847    
 for (i = 0; i < 32; ++i)  
   inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);  
   
3848  if (firstline)  if (firstline)
3849    {    {
3850    SLJIT_ASSERT(common->first_line_end != 0);    SLJIT_ASSERT(common->first_line_end != 0);
# Line 3242  if (common->utf) Line 3860  if (common->utf)
3860    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3861  #endif  #endif
3862    
3863  if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))  if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3864    {    {
3865  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
3866    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);    jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
# Line 3251  if (!check_class_ranges(common, inverted Line 3869  if (!check_class_ranges(common, inverted
3869  #endif  #endif
3870    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3871    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3872    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3873    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3874    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3875    found = JUMP(SLJIT_C_NOT_ZERO);    found = JUMP(SLJIT_C_NOT_ZERO);
# Line 3454  JUMPHERE(skipread); Line 4072  JUMPHERE(skipread);
4072    
4073  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4074  check_str_end(common, &skipread_list);  check_str_end(common, &skipread_list);
4075  peek_char(common);  peek_char(common, READ_CHAR_MAX);
4076    
4077  /* Testing char type. This is a code duplication. */  /* Testing char type. This is a code duplication. */
4078  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 3500  OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSE Line 4118  OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSE
4118  sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);  sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4119  }  }
4120    
4121  /*  static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
   range format:  
   
   ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).  
   ranges[1] = first bit (0 or 1)  
   ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)  
 */  
   
 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)  
4122  {  {
4123  DEFINE_COMPILER;  DEFINE_COMPILER;
4124  struct sljit_jump *jump;  int ranges[MAX_RANGE_SIZE];
4125    pcre_uint8 bit, cbit, all;
4126    int i, byte, length = 0;
4127    
4128    bit = bits[0] & 0x1;
4129    /* All bits will be zero or one (since bit is zero or one). */
4130    all = -bit;
4131    
4132  if (ranges[0] < 0)  for (i = 0; i < 256; )
4133      {
4134      byte = i >> 3;
4135      if ((i & 0x7) == 0 && bits[byte] == all)
4136        i += 8;
4137      else
4138        {
4139        cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4140        if (cbit != bit)
4141          {
4142          if (length >= MAX_RANGE_SIZE)
4143            return FALSE;
4144          ranges[length] = i;
4145          length++;
4146          bit = cbit;
4147          all = -cbit;
4148          }
4149        i++;
4150        }
4151      }
4152    
4153    if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4154      {
4155      if (length >= MAX_RANGE_SIZE)
4156        return FALSE;
4157      ranges[length] = 256;
4158      length++;
4159      }
4160    
4161    if (length < 0 || length > 4)
4162    return FALSE;    return FALSE;
4163    
4164  switch(ranges[0])  bit = bits[0] & 0x1;
4165    if (invert) bit ^= 0x1;
4166    
4167    /* No character is accepted. */
4168    if (length == 0 && bit == 0)
4169      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4170    
4171    switch(length)
4172    {    {
4173      case 0:
4174      /* When bit != 0, all characters are accepted. */
4175      return TRUE;
4176    
4177    case 1:    case 1:
4178    if (readch)    add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
     read_char(common);  
   add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));  
4179    return TRUE;    return TRUE;
4180    
4181    case 2:    case 2:
4182    if (readch)    if (ranges[0] + 1 != ranges[1])
4183      read_char(common);      {
4184    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4185    add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));      add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4186        }
4187      else
4188        add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4189    return TRUE;    return TRUE;
4190    
4191    case 4:    case 3:
4192    if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])    if (bit != 0)
4193      {      {
4194      if (readch)      add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4195        read_char(common);      if (ranges[0] + 1 != ranges[1])
     if (ranges[1] != 0)  
4196        {        {
4197        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4198        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4199        }        }
4200      else      else
4201        {        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
       jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);  
       add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));  
       JUMPHERE(jump);  
       }  
4202      return TRUE;      return TRUE;
4203      }      }
4204    if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))  
4205      add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4206      if (ranges[1] + 1 != ranges[2])
4207      {      {
4208      if (readch)      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4209        read_char(common);      add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);  
     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);  
     add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));  
     return TRUE;  
4210      }      }
4211    return FALSE;    else
4212        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4213    default:    return TRUE;
   return FALSE;  
   }  
 }  
   
 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)  
 {  
 int i, bit, length;  
 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;  
   
 bit = ctypes[0] & flag;  
 ranges[0] = -1;  
 ranges[1] = bit != 0 ? 1 : 0;  
 length = 0;  
4214    
4215  for (i = 1; i < 256; i++)    case 4:
4216    if ((ctypes[i] & flag) != bit)    if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4217          && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4218          && is_powerof2(ranges[2] - ranges[0]))
4219      {      {
4220      if (length >= MAX_RANGE_SIZE)      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4221        return;      if (ranges[2] + 1 != ranges[3])
4222      ranges[2 + length] = i;        {
4223      length++;        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4224      bit ^= flag;        add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4225          }
4226        else
4227          add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4228        return TRUE;
4229      }      }
4230    
4231  if (bit != 0)    if (bit != 0)
4232    {      {
4233    if (length >= MAX_RANGE_SIZE)      i = 0;
4234      return;      if (ranges[0] + 1 != ranges[1])
   ranges[2 + length] = 256;  
   length++;  
   }  
 ranges[0] = length;  
 }  
   
 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)  
 {  
 int ranges[2 + MAX_RANGE_SIZE];  
 pcre_uint8 bit, cbit, all;  
 int i, byte, length = 0;  
   
 bit = bits[0] & 0x1;  
 ranges[1] = bit;  
 /* Can be 0 or 255. */  
 all = -bit;  
   
 for (i = 0; i < 256; )  
   {  
   byte = i >> 3;  
   if ((i & 0x7) == 0 && bits[byte] == all)  
     i += 8;  
   else  
     {  
     cbit = (bits[byte] >> (i & 0x7)) & 0x1;  
     if (cbit != bit)  
4235        {        {
4236        if (length >= MAX_RANGE_SIZE)        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4237          return FALSE;        add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4238        ranges[2 + length] = i;        i = ranges[0];
       length++;  
       bit = cbit;  
       all = -cbit;  
4239        }        }
4240      i++;      else
4241          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4242    
4243        if (ranges[2] + 1 != ranges[3])
4244          {
4245          OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4246          add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4247          }
4248        else
4249          add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4250        return TRUE;
4251      }      }
   }  
4252    
4253  if (((bit == 0) && nclass) || ((bit == 1) && !nclass))    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4254    {    add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4255    if (length >= MAX_RANGE_SIZE)    if (ranges[1] + 1 != ranges[2])
4256      return FALSE;      {
4257    ranges[2 + length] = 256;      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4258    length++;      add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4259    }      }
4260  ranges[0] = length;    else
4261        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4262      return TRUE;
4263    
4264  return check_ranges(common, ranges, backtracks, FALSE);    default:
4265      SLJIT_ASSERT_STOP();
4266      return FALSE;
4267      }
4268  }  }
4269    
4270  static void check_anynewline(compiler_common *common)  static void check_anynewline(compiler_common *common)
# Line 4003  return cc; Line 4631  return cc;
4631  #define SET_TYPE_OFFSET(value) \  #define SET_TYPE_OFFSET(value) \
4632    if ((value) != typeoffset) \    if ((value) != typeoffset) \
4633      { \      { \
4634      if ((value) > typeoffset) \      if ((value) < typeoffset) \
       OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \  
     else \  
4635        OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \        OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4636        else \
4637          OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4638      } \      } \
4639    typeoffset = (value);    typeoffset = (value);
4640    
4641  #define SET_CHAR_OFFSET(value) \  #define SET_CHAR_OFFSET(value) \
4642    if ((value) != charoffset) \    if ((value) != charoffset) \
4643      { \      { \
4644      if ((value) > charoffset) \      if ((value) < charoffset) \
4645        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \        OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4646      else \      else \
4647        OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \        OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4648      } \      } \
4649    charoffset = (value);    charoffset = (value);
4650    
# Line 4024  static void compile_xclass_matchingpath( Line 4652  static void compile_xclass_matchingpath(
4652  {  {
4653  DEFINE_COMPILER;  DEFINE_COMPILER;
4654  jump_list *found = NULL;  jump_list *found = NULL;
4655  jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;  jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4656  pcre_int32 c, charoffset;  sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
 const pcre_uint32 *other_cases;  
4657  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
4658  pcre_uchar *ccbegin;  pcre_uchar *ccbegin;
4659  int compares, invertcmp, numberofcmps;  int compares, invertcmp, numberofcmps;
4660    #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4661    BOOL utf = common->utf;
4662    #endif
4663    
4664  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4665  BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;  BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4666  BOOL charsaved = FALSE;  BOOL charsaved = FALSE;
4667  int typereg = TMP1, scriptreg = TMP1;  int typereg = TMP1, scriptreg = TMP1;
4668  pcre_int32 typeoffset;  const pcre_uint32 *other_cases;
4669    sljit_uw typeoffset;
4670  #endif  #endif
4671    
4672  /* Although SUPPORT_UTF must be defined, we are  /* Scanning the necessary info. */
4673     not necessary in utf mode even in 8 bit mode. */  cc++;
4674  detect_partial_match(common, backtracks);  ccbegin = cc;
4675  read_char(common);  compares = 0;
4676    if (cc[-1] & XCL_MAP)
 if ((*cc++ & XCL_MAP) != 0)  
4677    {    {
4678    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);    min = 0;
 #ifndef COMPILE_PCRE8  
   jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);  
 #elif defined SUPPORT_UTF  
   if (common->utf)  
     jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);  
 #endif  
   
   if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))  
     {  
     OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);  
     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);  
     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);  
     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);  
     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);  
     add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));  
     }  
   
 #ifndef COMPILE_PCRE8  
   JUMPHERE(jump);  
 #elif defined SUPPORT_UTF  
   if (common->utf)  
     JUMPHERE(jump);  
 #endif  
   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);  
 #ifdef SUPPORT_UCP  
   charsaved = TRUE;  
 #endif  
4679    cc += 32 / sizeof(pcre_uchar);    cc += 32 / sizeof(pcre_uchar);
4680    }    }
4681    
 /* Scanning the necessary info. */  
 ccbegin = cc;  
 compares = 0;  
4682  while (*cc != XCL_END)  while (*cc != XCL_END)
4683    {    {
4684    compares++;    compares++;
4685    if (*cc == XCL_SINGLE)    if (*cc == XCL_SINGLE)
4686      {      {
4687      cc += 2;      cc ++;
4688  #ifdef SUPPORT_UTF      GETCHARINCTEST(c, cc);
4689      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);      if (c > max) max = c;
4690  #endif      if (c < min) min = c;
4691  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4692      needschar = TRUE;      needschar = TRUE;
4693  #endif  #endif
4694      }      }
4695    else if (*cc == XCL_RANGE)    else if (*cc == XCL_RANGE)
4696      {      {
4697      cc += 2;      cc ++;
4698  #ifdef SUPPORT_UTF      GETCHARINCTEST(c, cc);
4699      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);      if (c < min) min = c;
4700  #endif      GETCHARINCTEST(c, cc);
4701      cc++;      if (c > max) max = c;
 #ifdef SUPPORT_UTF  
     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);  
 #endif  
4702  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4703      needschar = TRUE;      needschar = TRUE;
4704  #endif  #endif
# Line 4110  while (*cc != XCL_END) Line 4708  while (*cc != XCL_END)
4708      {      {
4709      SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);      SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4710      cc++;      cc++;
4711        if (*cc == PT_CLIST)
4712          {
4713          other_cases = PRIV(ucd_caseless_sets) + cc[1];
4714          while (*other_cases != NOTACHAR)
4715            {
4716            if (*other_cases > max) max = *other_cases;
4717            if (*other_cases < min) min = *other_cases;
4718            other_cases++;
4719            }
4720          }
4721        else
4722          {
4723          max = READ_CHAR_MAX;
4724          min = 0;
4725          }
4726    
4727      switch(*cc)      switch(*cc)
4728        {        {
4729        case PT_ANY:        case PT_ANY:
# Line 4129  while (*cc != XCL_END) Line 4743  while (*cc != XCL_END)
4743        case PT_SPACE:        case PT_SPACE:
4744        case PT_PXSPACE:        case PT_PXSPACE:
4745        case PT_WORD:        case PT_WORD:
4746          case PT_PXGRAPH:
4747          case PT_PXPRINT:
4748          case PT_PXPUNCT:
4749        needstype = TRUE;        needstype = TRUE;
4750        needschar = TRUE;        needschar = TRUE;
4751        break;        break;
# Line 4147  while (*cc != XCL_END) Line 4764  while (*cc != XCL_END)
4764  #endif  #endif
4765    }    }
4766    
4767    /* We are not necessary in utf mode even in 8 bit mode. */
4768    cc = ccbegin;
4769    detect_partial_match(common, backtracks);
4770    read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
4771    
4772    if ((cc[-1] & XCL_HASPROP) == 0)
4773      {
4774      if ((cc[-1] & XCL_MAP) != 0)
4775        {
4776        jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4777        if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4778          {
4779          OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4780          OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4781          OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4782          OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4783          OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4784          add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));
4785          }
4786    
4787        add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4788        JUMPHERE(jump);
4789    
4790        cc += 32 / sizeof(pcre_uchar);
4791        }
4792      else
4793        {
4794        OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
4795        add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, max - min));
4796        }
4797      }
4798    else if ((cc[-1] & XCL_MAP) != 0)
4799      {
4800      OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4801    #ifdef SUPPORT_UCP
4802      charsaved = TRUE;
4803    #endif
4804      if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4805        {
4806    #ifdef COMPILE_PCRE8
4807        SLJIT_ASSERT(common->utf);
4808    #endif
4809        jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4810    
4811        OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4812        OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4813        OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4814        OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4815        OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4816        add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4817    
4818        JUMPHERE(jump);
4819        }
4820    
4821      OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4822      cc += 32 / sizeof(pcre_uchar);
4823      }
4824    
4825  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4826  /* Simple register allocation. TMP1 is preferred if possible. */  /* Simple register allocation. TMP1 is preferred if possible. */
4827  if (needstype || needsscript)  if (needstype || needsscript)
# Line 4188  if (needstype || needsscript) Line 4863  if (needstype || needsscript)
4863  #endif  #endif
4864    
4865  /* Generating code. */  /* Generating code. */
 cc = ccbegin;  
4866  charoffset = 0;  charoffset = 0;
4867  numberofcmps = 0;  numberofcmps = 0;
4868  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 4204  while (*cc != XCL_END) Line 4878  while (*cc != XCL_END)
4878    if (*cc == XCL_SINGLE)    if (*cc == XCL_SINGLE)
4879      {      {
4880      cc ++;      cc ++;
4881  #ifdef SUPPORT_UTF      GETCHARINCTEST(c, cc);
     if (common->utf)  
       {  
       GETCHARINC(c, cc);  
       }  
     else  
 #endif  
       c = *cc++;  
4882    
4883      if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))      if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4884        {        {
4885        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4886        OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);        OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4887        numberofcmps++;        numberofcmps++;
4888        }        }
4889      else if (numberofcmps > 0)      else if (numberofcmps > 0)
4890        {        {
4891        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4892        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4893        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4894        numberofcmps = 0;        numberofcmps = 0;
4895        }        }
4896      else      else
4897        {        {
4898        jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);        jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4899        numberofcmps = 0;        numberofcmps = 0;
4900        }        }
4901      }      }
4902    else if (*cc == XCL_RANGE)    else if (*cc == XCL_RANGE)
4903      {      {
4904      cc ++;      cc ++;
4905  #ifdef SUPPORT_UTF      GETCHARINCTEST(c, cc);
     if (common->utf)  
       {  
       GETCHARINC(c, cc);  
       }  
     else  
 #endif  
       c = *cc++;  
4906      SET_CHAR_OFFSET(c);      SET_CHAR_OFFSET(c);
4907  #ifdef SUPPORT_UTF      GETCHARINCTEST(c, cc);
4908      if (common->utf)  
       {  
       GETCHARINC(c, cc);  
       }  
     else  
 #endif  
       c = *cc++;  
4909      if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))      if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4910        {        {
4911        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4912        OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);        OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4913        numberofcmps++;        numberofcmps++;
4914        }        }
4915      else if (numberofcmps > 0)      else if (numberofcmps > 0)
4916        {        {
4917        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4918        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4919        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4920        numberofcmps = 0;        numberofcmps = 0;
4921        }        }
4922      else      else
4923        {        {
4924        jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);        jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4925        numberofcmps = 0;        numberofcmps = 0;
4926        }        }
4927      }      }
# Line 4316  while (*cc != XCL_END) Line 4970  while (*cc != XCL_END)
4970    
4971        case PT_SPACE:        case PT_SPACE:
4972        case PT_PXSPACE:        case PT_PXSPACE:
       if (*cc == PT_SPACE)  
         {  
         OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);  
         jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);  
         }  
4973        SET_CHAR_OFFSET(9);        SET_CHAR_OFFSET(9);
4974        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
4975        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4976        if (*cc == PT_SPACE)  
4977          JUMPHERE(jump);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
4978          OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4979    
4980          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
4981          OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4982    
4983        SET_TYPE_OFFSET(ucp_Zl);        SET_TYPE_OFFSET(ucp_Zl);
4984        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
# Line 4334  while (*cc != XCL_END) Line 4987  while (*cc != XCL_END)
4987        break;        break;
4988    
4989        case PT_WORD:        case PT_WORD:
4990        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
4991        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4992        /* Fall through. */        /* Fall through. */
4993    
# Line 4382  while (*cc != XCL_END) Line 5035  while (*cc != XCL_END)
5035          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5036          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5037    
5038          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5039          OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);          OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5040    
5041          other_cases += 3;          other_cases += 3;
5042          }          }
5043        else        else
5044          {          {
5045          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5046          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5047          }          }
5048    
5049        while (*other_cases != NOTACHAR)        while (*other_cases != NOTACHAR)
5050          {          {
5051          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5052          OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);          OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5053          }          }
5054        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5055        break;        break;
5056    
5057        case PT_UCNC:        case PT_UCNC:
5058        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5059        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5060        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5061        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5062        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5063        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5064    
5065        SET_CHAR_OFFSET(0xa0);        SET_CHAR_OFFSET(0xa0);
5066        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5067        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);        OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5068        SET_CHAR_OFFSET(0);        SET_CHAR_OFFSET(0);
5069        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5070        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);        OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
5071        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);        jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5072        break;        break;
5073    
5074          case PT_PXGRAPH:
5075          /* C and Z groups are the farthest two groups. */
5076          SET_TYPE_OFFSET(ucp_Ll);
5077          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5078          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
5079    
5080          jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5081    
5082          /* In case of ucp_Cf, we overwrite the result. */
5083          SET_CHAR_OFFSET(0x2066);
5084          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5085          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5086    
5087          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5088          OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5089    
5090          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5091          OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5092    
5093          JUMPHERE(jump);
5094          jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5095          break;
5096    
5097          case PT_PXPRINT:
5098          /* C and Z groups are the farthest two groups. */
5099          SET_TYPE_OFFSET(ucp_Ll);
5100          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5101          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
5102    
5103          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5104          OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
5105    
5106          jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5107    
5108          /* In case of ucp_Cf, we overwrite the result. */
5109          SET_CHAR_OFFSET(0x2066);
5110          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5111          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5112    
5113          OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5114          OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5115    
5116          JUMPHERE(jump);
5117          jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5118          break;
5119    
5120          case PT_PXPUNCT:
5121          SET_TYPE_OFFSET(ucp_Sc);
5122          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5123          OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
5124    
5125          SET_CHAR_OFFSET(0);
5126          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
5127          OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5128    
5129          SET_TYPE_OFFSET(ucp_Pc);
5130          OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5131          OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
5132          jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
5133          break;
5134        }        }
5135      cc += 2;      cc += 2;
5136      }      }
# Line 4448  struct sljit_label *label; Line 5162  struct sljit_label *label;
5162  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
5163  pcre_uchar propdata[5];  pcre_uchar propdata[5];
5164  #endif  #endif
5165  #endif  #endif /* SUPPORT_UTF */
5166    
5167  switch(type)  switch(type)
5168    {    {
# Line 4473  switch(type) Line 5187  switch(type)
5187    case OP_NOT_DIGIT:    case OP_NOT_DIGIT:
5188    case OP_DIGIT:    case OP_DIGIT:
5189    /* Digits are usually 0-9, so it is worth to optimize them. */    /* Digits are usually 0-9, so it is worth to optimize them. */
   if (common->digits[0] == -2)  
     get_ctype_ranges(common, ctype_digit, common->digits);  
5190    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5191    /* Flip the starting bit in the negative case. */  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5192    if (type == OP_NOT_DIGIT)    if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
5193      common->digits[1] ^= 1;      read_char7_type(common, type == OP_NOT_DIGIT);
5194    if (!check_ranges(common, common->digits, backtracks, TRUE))    else
5195      {  #endif
5196      read_char8_type(common);      read_char8_type(common, type == OP_NOT_DIGIT);
5197      OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);      /* Flip the starting bit in the negative case. */
5198      add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
5199      }    add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
   if (type == OP_NOT_DIGIT)  
     common->digits[1] ^= 1;  
5200    return cc;    return cc;
5201    
5202    case OP_NOT_WHITESPACE:    case OP_NOT_WHITESPACE:
5203    case OP_WHITESPACE:    case OP_WHITESPACE:
5204    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5205    read_char8_type(common);  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5206      if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
5207        read_char7_type(common, type == OP_NOT_WHITESPACE);
5208      else
5209    #endif
5210        read_char8_type(common, type == OP_NOT_WHITESPACE);
5211    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
5212    add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5213    return cc;    return cc;
# Line 4500  switch(type) Line 5215  switch(type)
5215    case OP_NOT_WORDCHAR:    case OP_NOT_WORDCHAR:
5216    case OP_WORDCHAR:    case OP_WORDCHAR:
5217    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5218    read_char8_type(common);  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5219      if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
5220        read_char7_type(common, type == OP_NOT_WORDCHAR);
5221      else
5222    #endif
5223        read_char8_type(common, type == OP_NOT_WORDCHAR);
5224    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
5225    add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
5226    return cc;    return cc;
5227    
5228    case OP_ANY:    case OP_ANY:
5229    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5230    read_char(common);    read_char_range(common, common->nlmin, common->nlmax, TRUE);
5231    if (common->nltype == NLTYPE_FIXED && common->newline > 255)    if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5232      {      {
5233      jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);      jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
# Line 4563  switch(type) Line 5283  switch(type)
5283  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
5284    case OP_NOTPROP:    case OP_NOTPROP:
5285    case OP_PROP:    case OP_PROP:
5286    propdata[0] = 0;    propdata[0] = XCL_HASPROP;
5287    propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;    propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5288    propdata[2] = cc[0];    propdata[2] = cc[0];
5289    propdata[3] = cc[1];    propdata[3] = cc[1];
# Line 4575  switch(type) Line 5295  switch(type)
5295    
5296    case OP_ANYNL:    case OP_ANYNL:
5297    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5298    read_char(common);    read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5299    jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);    jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5300    /* We don't need to handle soft partial matching case. */    /* We don't need to handle soft partial matching case. */
5301    end_list = NULL;    end_list = NULL;
# Line 4597  switch(type) Line 5317  switch(type)
5317    case OP_NOT_HSPACE:    case OP_NOT_HSPACE:
5318    case OP_HSPACE:    case OP_HSPACE:
5319    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5320    read_char(common);    read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5321    add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5322    add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5323    return cc;    return cc;
# Line 4605  switch(type) Line 5325  switch(type)
5325    case OP_NOT_VSPACE:    case OP_NOT_VSPACE:
5326    case OP_VSPACE:    case OP_VSPACE:
5327    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5328    read_char(common);    read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5329    add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));    add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5330    add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5331    return cc;    return cc;
# Line 4704  switch(type) Line 5424  switch(type)
5424      else      else
5425        {        {
5426        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
5427        read_char(common);        read_char_range(common, common->nlmin, common->nlmax, TRUE);
5428        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));        add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5429        add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));        add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5430        add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));        add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
# Line 4752  switch(type) Line 5472  switch(type)
5472    else    else
5473      {      {
5474      skip_char_back(common);      skip_char_back(common);
5475      read_char(common);      read_char_range(common, common->nlmin, common->nlmax, TRUE);
5476      check_newlinechar(common, common->nltype, backtracks, FALSE);      check_newlinechar(common, common->nltype, backtracks, FALSE);
5477      }      }
5478    JUMPHERE(jump[0]);    JUMPHERE(jump[0]);
# Line 4803  switch(type) Line 5523  switch(type)
5523      }      }
5524    else    else
5525      {      {
5526      peek_char(common);      peek_char(common, common->nlmax);
5527      check_newlinechar(common, common->nltype, backtracks, FALSE);      check_newlinechar(common, common->nltype, backtracks, FALSE);
5528      }      }
5529    JUMPHERE(jump[0]);    JUMPHERE(jump[0]);
# Line 4827  switch(type) Line 5547  switch(type)
5547  #endif  #endif
5548      return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);      return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5549      }      }
5550    
5551    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
   read_char(common);  
5552  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
5553    if (common->utf)    if (common->utf)
5554      {      {
# Line 4837  switch(type) Line 5557  switch(type)
5557    else    else
5558  #endif  #endif
5559      c = *cc;      c = *cc;
5560    
5561    if (type == OP_CHAR || !char_has_othercase(common, cc))    if (type == OP_CHAR || !char_has_othercase(common, cc))
5562      {      {
5563        read_char_range(common, c, c, FALSE);
5564      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5565      return cc + length;      return cc + length;
5566      }      }
5567    oc = char_othercase(common, c);    oc = char_othercase(common, c);
5568      read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5569    bit = c ^ oc;    bit = c ^ oc;
5570    if (is_powerof2(bit))    if (is_powerof2(bit))
5571      {      {
# Line 4850  switch(type) Line 5573  switch(type)
5573      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));      add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5574      return cc + length;      return cc + length;
5575      }      }
5576    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);    jump[0] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c);
5577    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);    add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5578    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);    JUMPHERE(jump[0]);
   OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);  
   add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));  
5579    return cc + length;    return cc + length;
5580    
5581    case OP_NOT:    case OP_NOT:
# Line 4889  switch(type) Line 5610  switch(type)
5610  #endif /* COMPILE_PCRE8 */  #endif /* COMPILE_PCRE8 */
5611        {        {
5612        GETCHARLEN(c, cc, length);        GETCHARLEN(c, cc, length);
       read_char(common);  
5613        }        }
5614      }      }
5615    else    else
5616  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
     {  
     read_char(common);  
5617      c = *cc;      c = *cc;
     }  
5618    
5619    if (type == OP_NOT || !char_has_othercase(common, cc))    if (type == OP_NOT || !char_has_othercase(common, cc))
5620        {
5621        read_char_range(common, c, c, TRUE);
5622      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5623        }
5624    else    else
5625      {      {
5626      oc = char_othercase(common, c);      oc = char_othercase(common, c);
5627        read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5628      bit = c ^ oc;      bit = c ^ oc;
5629      if (is_powerof2(bit))      if (is_powerof2(bit))
5630        {        {
# Line 4921  switch(type) Line 5642  switch(type)
5642    case OP_CLASS:    case OP_CLASS:
5643    case OP_NCLASS:    case OP_NCLASS:
5644    detect_partial_match(common, backtracks);    detect_partial_match(common, backtracks);
5645    read_char(common);  
5646    if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5647      bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5648      read_char_range(common, 0, bit, type == OP_NCLASS);
5649    #else
5650      read_char_range(common, 0, 255, type == OP_NCLASS);
5651    #endif
5652    
5653      if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5654      return cc + 32 / sizeof(pcre_uchar);      return cc + 32 / sizeof(pcre_uchar);
5655    
5656  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5657    jump[0] = NULL;    jump[0] = NULL;
 #ifdef COMPILE_PCRE8  
   /* This check only affects 8 bit mode. In other modes, we  
   always need to compare the value with 255. */  
5658    if (common->utf)    if (common->utf)
 #endif /* COMPILE_PCRE8 */  
5659      {      {
5660      jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);      jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, bit);
5661      if (type == OP_CLASS)      if (type == OP_CLASS)
5662        {        {
5663        add_jump(compiler, backtracks, jump[0]);        add_jump(compiler, backtracks, jump[0]);
5664        jump[0] = NULL;        jump[0] = NULL;
5665        }        }
5666      }      }
5667  #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */  #elif !defined COMPILE_PCRE8
5668      jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
5669      if (type == OP_CLASS)
5670        {
5671        add_jump(compiler, backtracks, jump[0]);
5672        jump[0] = NULL;
5673        }
5674    #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5675    
5676    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5677    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5678    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);    OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5679    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5680    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5681    add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));    add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5682    
5683  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5684    if (jump[0] != NULL)    if (jump[0] != NULL)
5685      JUMPHERE(jump[0]);      JUMPHERE(jump[0]);
5686  #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */  #endif
5687    
5688    return cc + 32 / sizeof(pcre_uchar);    return cc + 32 / sizeof(pcre_uchar);
5689    
5690  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
# Line 5044  if (context.length > 0) Line 5778  if (context.length > 0)
5778    add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));    add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5779    
5780    context.sourcereg = -1;    context.sourcereg = -1;
5781  #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED  #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5782    context.ucharptr = 0;    context.ucharptr = 0;
5783  #endif  #endif
5784    do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);    do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5785    return cc;    return cc;
   }  
   
 /* A non-fixed length character will be checked if length == 0. */  
 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);  
 }  
   
 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)  
 {  
 DEFINE_COMPILER;  
 int offset = GET2(cc, 1) << 1;  
   
 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));  
 if (!common->jscript_compat)  
   {  
   if (backtracks == NULL)  
     {  
     /* OVECTOR(1) contains the "string begin - 1" constant. */  
     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));  
     OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);  
     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));  
     OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);  
     return JUMP(SLJIT_C_NOT_ZERO);  
     }  
   add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));  
5786    }    }
5787  return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));  
5788    /* A non-fixed length character will be checked if length == 0. */
5789    return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5790  }  }
5791    
5792  /* Forward definitions. */  /* Forward definitions. */
# Line 5109  static void compile_backtrackingpath(com Line 5821  static void compile_backtrackingpath(com
5821    
5822  #define BACKTRACK_AS(type) ((type *)backtrack)  #define BACKTRACK_AS(type) ((type *)backtrack)
5823    
5824  static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)  static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5825    {
5826    /* The OVECTOR offset goes to TMP2. */
5827    DEFINE_COMPILER;
5828    int count = GET2(cc, 1 + IMM2_SIZE);
5829    pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5830    unsigned int offset;
5831    jump_list *found = NULL;
5832    
5833    SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5834    
5835    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5836    
5837    count--;
5838    while (count-- > 0)
5839      {
5840      offset = GET2(slot, 0) << 1;
5841      GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5842      add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5843      slot += common->name_entry_size;
5844      }
5845    
5846    offset = GET2(slot, 0) << 1;
5847    GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5848    if (backtracks != NULL && !common->jscript_compat)
5849      add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5850    
5851    set_jumps(found, LABEL());
5852    }
5853    
5854    static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5855  {  {
5856  DEFINE_COMPILER;  DEFINE_COMPILER;
5857  int offset = GET2(cc, 1) << 1;  BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5858    int offset = 0;
5859  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
5860  struct sljit_jump *partial;  struct sljit_jump *partial;
5861  struct sljit_jump *nopartial;  struct sljit_jump *nopartial;
5862    
5863  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));  if (ref)
5864  /* OVECTOR(1) contains the "string begin - 1" constant. */    {
5865  if (withchecks && !common->jscript_compat)    offset = GET2(cc, 1) << 1;
5866    add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5867      /* OVECTOR(1) contains the "string begin - 1" constant. */
5868      if (withchecks && !common->jscript_compat)
5869        add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5870      }
5871    else
5872      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5873    
5874  #if defined SUPPORT_UTF && defined SUPPORT_UCP  #if defined SUPPORT_UTF && defined SUPPORT_UCP
5875  if (common->utf && *cc == OP_REFI)  if (common->utf && *cc == OP_REFI)
5876    {    {
5877    SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);    SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5878    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));    if (ref)
5879        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5880      else
5881        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5882    
5883    if (withchecks)    if (withchecks)
5884      jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);      jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5885    
# Line 5151  if (common->utf && *cc == OP_REFI) Line 5904  if (common->utf && *cc == OP_REFI)
5904  else  else
5905  #endif /* SUPPORT_UTF && SUPPORT_UCP */  #endif /* SUPPORT_UTF && SUPPORT_UCP */
5906    {    {
5907    OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);    if (ref)
5908        OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5909      else
5910        OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
5911    
5912    if (withchecks)    if (withchecks)
5913      jump = JUMP(SLJIT_C_ZERO);      jump = JUMP(SLJIT_C_ZERO);
5914    
# Line 5188  if (jump != NULL) Line 5945  if (jump != NULL)
5945    else    else
5946      JUMPHERE(jump);      JUMPHERE(jump);
5947    }    }
 return cc + 1 + IMM2_SIZE;  
5948  }  }
5949    
5950  static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)  static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5951  {  {
5952  DEFINE_COMPILER;  DEFINE_COMPILER;
5953    BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5954  backtrack_common *backtrack;  backtrack_common *backtrack;
5955  pcre_uchar type;  pcre_uchar type;
5956    int offset = 0;
5957  struct sljit_label *label;  struct sljit_label *label;
5958  struct sljit_jump *zerolength;  struct sljit_jump *zerolength;
5959  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
# Line 5205  BOOL minimize; Line 5963  BOOL minimize;
5963    
5964  PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);  PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5965    
5966    if (ref)
5967      offset = GET2(cc, 1) << 1;
5968    else
5969      cc += IMM2_SIZE;
5970  type = cc[1 + IMM2_SIZE];  type = cc[1 + IMM2_SIZE];
5971    
5972    SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
5973  minimize = (type & 0x1) != 0;  minimize = (type & 0x1) != 0;
5974  switch(type)  switch(type)
5975    {    {
# Line 5243  if (!minimize) Line 6007  if (!minimize)
6007    if (min == 0)    if (min == 0)
6008      {      {
6009      allocate_stack(common, 2);      allocate_stack(common, 2);
6010        if (ref)
6011          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6012      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6013      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6014      /* Temporary release of STR_PTR. */      /* Temporary release of STR_PTR. */
6015      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6016      zerolength = compile_ref_checks(common, ccbegin, NULL);      /* Handles both invalid and empty cases. Since the minimum repeat,
6017        is zero the invalid case is basically the same as an empty case. */
6018        if (ref)
6019          zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6020        else
6021          {
6022          compile_dnref_search(common, ccbegin, NULL);
6023          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6024          OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
6025          zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6026          }
6027      /* Restore if not zero length. */      /* Restore if not zero length. */
6028      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6029      }      }
6030    else    else
6031      {      {
6032      allocate_stack(common, 1);      allocate_stack(common, 1);
6033        if (ref)
6034          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6035      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6036      zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);      if (ref)
6037          {
6038          add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6039          zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6040          }
6041        else
6042          {
6043          compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6044          OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6045          OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
6046          zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6047          }
6048      }      }
6049    
6050    if (min > 1 || max > 1)    if (min > 1 || max > 1)
6051      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
6052    
6053    label = LABEL();    label = LABEL();
6054      if (!ref)
6055        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6056    compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);    compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
6057    
6058    if (min > 1 || max > 1)    if (min > 1 || max > 1)
# Line 5292  if (!minimize) Line 6083  if (!minimize)
6083    JUMPHERE(zerolength);    JUMPHERE(zerolength);
6084    BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();    BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6085    
6086    decrease_call_count(common);    count_match(common);
6087    return cc;    return cc;
6088    }    }
6089    
6090  allocate_stack(common, 2);  allocate_stack(common, ref ? 2 : 3);
6091    if (ref)
6092      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6093  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6094  if (type != OP_CRMINSTAR)  if (type != OP_CRMINSTAR)
6095    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6096    
6097  if (min == 0)  if (min == 0)
6098    {    {
6099    zerolength = compile_ref_checks(common, ccbegin, NULL);    /* Handles both invalid and empty cases. Since the minimum repeat,
6100      is zero the invalid case is basically the same as an empty case. */
6101      if (ref)
6102        zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6103      else
6104        {
6105        compile_dnref_search(common, ccbegin, NULL);
6106        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6107        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6108        zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6109        }
6110      /* Length is non-zero, we can match real repeats. */
6111    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6112    jump = JUMP(SLJIT_JUMP);    jump = JUMP(SLJIT_JUMP);
6113    }    }
6114  else  else
6115    zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);    {
6116      if (ref)
6117        {
6118        add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6119        zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6120        }
6121      else
6122        {
6123        compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
6124        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6125        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
6126        zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6127        }
6128      }
6129    
6130  BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();  BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6131  if (max > 0)  if (max > 0)
6132    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
6133    
6134    if (!ref)
6135      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
6136  compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);  compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
6137  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6138    
# Line 5331  if (jump != NULL) Line 6150  if (jump != NULL)
6150    JUMPHERE(jump);    JUMPHERE(jump);
6151  JUMPHERE(zerolength);  JUMPHERE(zerolength);
6152    
6153  decrease_call_count(common);  count_match(common);
6154  return cc;  return cc;
6155  }  }
6156    
# Line 5901  common->accept = save_accept; Line 6720  common->accept = save_accept;
6720  return cc + 1 + LINK_SIZE;  return cc + 1 + LINK_SIZE;
6721  }  }
6722    
 static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)  
 {  
 int condition = FALSE;  
 pcre_uchar *slotA = name_table;  
 pcre_uchar *slotB;  
 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];  
 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];  
 sljit_sw no_capture;  
 int i;  
   
 locals += refno & 0xff;  
 refno >>= 8;  
 no_capture = locals[1];  
   
 for (i = 0; i < name_count; i++)  
   {  
   if (GET2(slotA, 0) == refno) break;  
   slotA += name_entry_size;  
   }  
   
 if (i < name_count)  
   {  
   /* Found a name for the number - there can be only one; duplicate names  
   for different numbers are allowed, but not vice versa. First scan down  
   for duplicates. */  
   
   slotB = slotA;  
   while (slotB > name_table)  
     {  
     slotB -= name_entry_size;  
     if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)  
       {  
       condition = locals[GET2(slotB, 0) << 1] != no_capture;  
       if (condition) break;  
       }  
     else break;  
     }  
   
   /* Scan up for duplicates */  
   if (!condition)  
     {  
     slotB = slotA;  
     for (i++; i < name_count; i++)  
       {  
       slotB += name_entry_size;  
       if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)  
         {  
         condition = locals[GET2(slotB, 0) << 1] != no_capture;  
         if (condition) break;  
         }  
       else break;  
       }  
     }  
   }  
 return condition;  
 }  
   
 static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)  
 {  
 int condition = FALSE;  
 pcre_uchar *slotA = name_table;  
 pcre_uchar *slotB;  
 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];  
 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];  
 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];  
 sljit_uw i;  
   
 for (i = 0; i < name_count; i++)  
   {  
   if (GET2(slotA, 0) == recno) break;  
   slotA += name_entry_size;  
   }  
   
 if (i < name_count)  
   {  
   /* Found a name for the number - there can be only one; duplicate  
   names for different numbers are allowed, but not vice versa. First  
   scan down for duplicates. */  
   
   slotB = slotA;  
   while (slotB > name_table)  
     {  
     slotB -= name_entry_size;  
     if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)  
       {  
       condition = GET2(slotB, 0) == group_num;  
       if (condition) break;  
       }  
     else break;  
     }  
   
   /* Scan up for duplicates */  
   if (!condition)  
     {  
     slotB = slotA;  
     for (i++; i < name_count; i++)  
       {  
       slotB += name_entry_size;  
       if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)  
         {  
         condition = GET2(slotB, 0) == group_num;  
         if (condition) break;  
         }  
       else break;  
       }  
     }  
   }  
 return condition;  
 }  
   
6723  static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)  static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6724  {  {
6725  DEFINE_COMPILER;  DEFINE_COMPILER;
# Line 6143  backtrack_common *backtrack; Line 6852  backtrack_common *backtrack;
6852  pcre_uchar opcode;  pcre_uchar opcode;
6853  int private_data_ptr = 0;  int private_data_ptr = 0;
6854  int offset = 0;  int offset = 0;
6855  int stacksize;  int i, stacksize;
6856  int repeat_ptr = 0, repeat_length = 0;  int repeat_ptr = 0, repeat_length = 0;
6857  int repeat_type = 0, repeat_count = 0;  int repeat_type = 0, repeat_count = 0;
6858  pcre_uchar *ccbegin;  pcre_uchar *ccbegin;
6859  pcre_uchar *matchingpath;  pcre_uchar *matchingpath;
6860    pcre_uchar *slot;
6861  pcre_uchar bra = OP_BRA;  pcre_uchar bra = OP_BRA;
6862  pcre_uchar ket;  pcre_uchar ket;
6863  assert_backtrack *assert;  assert_backtrack *assert;
# Line 6197  SLJIT_ASSERT(!((bra == OP_BRAZERO && ket Line 6907  SLJIT_ASSERT(!((bra == OP_BRAZERO && ket
6907  cc += GET(cc, 1);  cc += GET(cc, 1);
6908    
6909  has_alternatives = *cc == OP_ALT;  has_alternatives = *cc == OP_ALT;
6910  if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))  if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
6911    {    has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) ? FALSE : TRUE;
   has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;  
   if (*matchingpath == OP_NRREF)  
     {  
     stacksize = GET2(matchingpath, 1);  
     if (common->currententry == NULL || stacksize == RREF_ANY)  
       has_alternatives = FALSE;  
     else if (common->currententry->start == 0)  
       has_alternatives = stacksize != 0;  
     else  
       has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);  
     }  
   }  
6912    
6913  if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))  if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6914    opcode = OP_SCOND;    opcode = OP_SCOND;
# Line 6447  if (opcode == OP_COND || opcode == OP_SC Line 7145  if (opcode == OP_COND || opcode == OP_SC
7145        CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));        CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
7146      matchingpath += 1 + IMM2_SIZE;      matchingpath += 1 + IMM2_SIZE;
7147      }      }
7148    else if (*matchingpath == OP_NCREF)    else if (*matchingpath == OP_DNCREF)
7149      {      {
7150      SLJIT_ASSERT(has_alternatives);      SLJIT_ASSERT(has_alternatives);
     stacksize = GET2(matchingpath, 1);  
     jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));  
   
     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);  
     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);  
     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);  
     OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));  
     GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);  
     OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);  
     sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));  
     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);  
     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));  
7151    
7152      JUMPHERE(jump);      i = GET2(matchingpath, 1 + IMM2_SIZE);
7153      matchingpath += 1 + IMM2_SIZE;      slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
7154        OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
7155        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
7156        OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
7157        slot += common->name_entry_size;
7158        i--;
7159        while (i-- > 0)
7160          {
7161          OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
7162          OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0);
7163          slot += common->name_entry_size;
7164          }
7165        OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
7166        add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_C_ZERO));
7167        matchingpath += 1 + 2 * IMM2_SIZE;
7168      }      }
7169    else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)    else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF)
7170      {      {
7171      /* Never has other case. */      /* Never has other case. */
7172      BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;      BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
7173        SLJIT_ASSERT(!has_alternatives);
7174    
7175      stacksize = GET2(matchingpath, 1);      if (*matchingpath == OP_RREF)
     if (common->currententry == NULL)  
       stacksize = 0;  
     else if (stacksize == RREF_ANY)  
       stacksize = 1;  
     else if (common->currententry->start == 0)  
       stacksize = stacksize == 0;  
     else  
       stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);  
   
     if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)  
7176        {        {
7177        SLJIT_ASSERT(!has_alternatives);        stacksize = GET2(matchingpath, 1);
7178          if (common->currententry == NULL)
7179            stacksize = 0;
7180          else if (stacksize == RREF_ANY)
7181            stacksize = 1;
7182          else if (common->currententry->start == 0)
7183            stacksize = stacksize == 0;
7184          else
7185            stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
7186    
7187        if (stacksize != 0)        if (stacksize != 0)
7188          matchingpath += 1 + IMM2_SIZE;          matchingpath += 1 + IMM2_SIZE;
7189          }
7190        else
7191          {
7192          if (common->currententry == NULL || common->currententry->start == 0)
7193            stacksize = 0;
7194        else        else
7195          {          {
7196            stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
7197            slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
7198            i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
7199            while (stacksize > 0)
7200              {
7201              if ((int)GET2(slot, 0) == i)
7202                break;
7203              slot += common->name_entry_size;
7204              stacksize--;
7205              }
7206            }
7207    
7208          if (stacksize != 0)
7209            matchingpath += 1 + 2 * IMM2_SIZE;
7210          }
7211    
7212          /* The stacksize == 0 is a common "else" case. */
7213          if (stacksize == 0)
7214            {
7215          if (*cc == OP_ALT)          if (*cc == OP_ALT)
7216            {            {
7217            matchingpath = cc + 1 + LINK_SIZE;            matchingpath = cc + 1 + LINK_SIZE;
# Line 6496  if (opcode == OP_COND || opcode == OP_SC Line 7220  if (opcode == OP_COND || opcode == OP_SC
7220          else          else
7221            matchingpath = cc;            matchingpath = cc;
7222          }          }
       }  
     else  
       {  
       SLJIT_ASSERT(has_alternatives);  
   
       stacksize = GET2(matchingpath, 1);  
       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);  
       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);  
       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);  
       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));  
       OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);  
       GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);  
       OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);  
       sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));  
       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);  
       add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));  
       matchingpath += 1 + IMM2_SIZE;  
       }  
7223      }      }
7224    else    else
7225      {      {
# Line 6664  if (bra == OP_BRAMINZERO) Line 7370  if (bra == OP_BRAMINZERO)
7370    }    }
7371    
7372  if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)  if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
7373    decrease_call_count(common);    count_match(common);
7374    
7375  /* Skip the other alternatives. */  /* Skip the other alternatives. */
7376  while (*cc == OP_ALT)  while (*cc == OP_ALT)
# Line 6951  if (!zero) Line 7657  if (!zero)
7657    
7658  /* None of them matched. */  /* None of them matched. */
7659  set_jumps(emptymatch, LABEL());  set_jumps(emptymatch, LABEL());
7660  decrease_call_count(common);  count_match(common);
7661  return cc + 1 + LINK_SIZE;  return cc + 1 + LINK_SIZE;
7662  }  }
7663    
7664  static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)  static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *max, int *min, pcre_uchar **end)
7665  {  {
7666  int class_len;  int class_len;
7667    
# Line 6991  else if (*opcode >= OP_TYPESTAR && *opco Line 7697  else if (*opcode >= OP_TYPESTAR && *opco
7697    }    }
7698  else  else
7699    {    {
7700    SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);    SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
7701    *type = *opcode;    *type = *opcode;
7702    cc++;    cc++;
7703    class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);    class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
# Line 7002  else Line 7708  else
7708      if (end != NULL)      if (end != NULL)
7709        *end = cc + class_len;        *end = cc + class_len;
7710      }      }
7711      else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
7712        {
7713        *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
7714        if (end != NULL)
7715          *end = cc + class_len;
7716        }
7717    else    else
7718      {      {
7719      SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);      SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
7720      *arg1 = GET2(cc, (class_len + IMM2_SIZE));      *max = GET2(cc, (class_len + IMM2_SIZE));
7721      *arg2 = GET2(cc, class_len);      *min = GET2(cc, class_len);
7722    
7723      if (*arg2 == 0)      if (*min == 0)
7724        {        {
7725        SLJIT_ASSERT(*arg1 != 0);        SLJIT_ASSERT(*max != 0);
7726        *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;        *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : (*opcode == OP_CRMINRANGE ? OP_MINUPTO : OP_POSUPTO);
7727        }        }
7728      if (*arg1 == *arg2)      if (*max == *min)
7729        *opcode = OP_EXACT;        *opcode = OP_EXACT;
7730    
7731      if (end != NULL)      if (end != NULL)
# Line 7024  else Line 7736  else
7736    
7737  if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)  if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
7738    {    {
7739    *arg1 = GET2(cc, 0);    *max = GET2(cc, 0);
7740    cc += IMM2_SIZE;    cc += IMM2_SIZE;
7741    }    }
7742    
# Line 7053  DEFINE_COMPILER; Line 7765  DEFINE_COMPILER;
7765  backtrack_common *backtrack;  backtrack_common *backtrack;
7766  pcre_uchar opcode;  pcre_uchar opcode;
7767  pcre_uchar type;  pcre_uchar type;
7768  int arg1 = -1, arg2 = -1;  int max = -1, min = -1;
7769  pcre_uchar* end;  pcre_uchar* end;
7770  jump_list *nomatch = NULL;  jump_list *nomatch = NULL;
7771  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
# Line 7066  int tmp_base, tmp_offset; Line 7778  int tmp_base, tmp_offset;
7778    
7779  PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);  PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
7780    
7781  cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);  cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, &end);
7782    
7783  switch(type)  switch(type)
7784    {    {
# Line 7137  switch(opcode) Line 7849  switch(opcode)
7849        {        {
7850        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
7851        OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);        OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7852        if (opcode == OP_CRRANGE && arg2 > 0)        if (opcode == OP_CRRANGE && min > 0)
7853          CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);          CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
7854        if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))        if (opcode == OP_UPTO || (opcode == OP_CRRANGE && max > 0))
7855          jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);          jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
7856        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
7857        }        }
7858    
# Line 7167  switch(opcode) Line 7879  switch(opcode)
7879      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
7880      if (opcode <= OP_PLUS)      if (opcode <= OP_PLUS)
7881        JUMPTO(SLJIT_JUMP, label);        JUMPTO(SLJIT_JUMP, label);
7882      else if (opcode == OP_CRRANGE && arg1 == 0)      else if (opcode == OP_CRRANGE && max == 0)
7883        {        {
7884        OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);        OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
7885        JUMPTO(SLJIT_JUMP, label);        JUMPTO(SLJIT_JUMP, label);
# Line 7177  switch(opcode) Line 7889  switch(opcode)
7889        OP1(SLJIT_MOV, TMP1, 0, base, offset1);        OP1(SLJIT_MOV, TMP1, 0, base, offset1);
7890        OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);        OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
7891        OP1(SLJIT_MOV, base, offset1, TMP1, 0);        OP1(SLJIT_MOV, base, offset1, TMP1, 0);
7892        CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);        CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, max + 1, label);
7893        }        }
7894      set_jumps(nomatch, LABEL());      set_jumps(nomatch, LABEL());
7895      if (opcode == OP_CRRANGE)      if (opcode == OP_CRRANGE)
7896        add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1));        add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, min + 1));
7897      OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);      OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
7898      }      }
7899    BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();    BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
# Line 7219  switch(opcode) Line 7931  switch(opcode)
7931    break;    break;
7932    
7933    case OP_EXACT:    case OP_EXACT:
7934    OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, arg1);    OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
7935    label = LABEL();    label = LABEL();
7936    compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);    compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7937    OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);    OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
# Line 7232  switch(opcode) Line 7944  switch(opcode)
7944    if (opcode == OP_POSPLUS)    if (opcode == OP_POSPLUS)
7945      compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);      compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7946    if (opcode == OP_POSUPTO)    if (opcode == OP_POSUPTO)
7947      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, arg1);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, max);
7948    OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);    OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
7949    label = LABEL();    label = LABEL();
7950    compile_char1_matchingpath(common, type, cc, &nomatch);    compile_char1_matchingpath(common, type, cc, &nomatch);
# Line 7256  switch(opcode) Line 7968  switch(opcode)
7968    OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);    OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
7969    break;    break;
7970    
7971      case OP_CRPOSRANGE:
7972      /* Combination of OP_EXACT and OP_POSSTAR or OP_POSUPTO */
7973      OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, min);
7974      label = LABEL();
7975      compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
7976      OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
7977      JUMPTO(SLJIT_C_NOT_ZERO, label);
7978    
7979      if (max != 0)
7980        {
7981        SLJIT_ASSERT(max - min > 0);
7982        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, max - min);
7983        }
7984      OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
7985      label = LABEL();
7986      compile_char1_matchingpath(common, type, cc, &nomatch);
7987      OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
7988      if (max == 0)
7989        JUMPTO(SLJIT_JUMP, label);
7990      else
7991        {
7992        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
7993        JUMPTO(SLJIT_C_NOT_ZERO, label);
7994        }
7995      set_jumps(nomatch, LABEL());
7996      OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
7997      break;
7998    
7999    default:    default:
8000    SLJIT_ASSERT_STOP();    SLJIT_ASSERT_STOP();
8001    break;    break;
8002    }    }
8003    
8004  decrease_call_count(common);  count_match(common);
8005  return end;  return end;
8006  }  }
8007    
# Line 7278  if (*cc == OP_FAIL) Line 8018  if (*cc == OP_FAIL)
8018    return cc + 1;    return cc + 1;
8019    }    }
8020    
8021  if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)  if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
8022    {    {
8023    /* No need to check notempty conditions. */    /* No need to check notempty conditions. */
8024    if (common->accept_label == NULL)    if (common->accept_label == NULL)
# Line 7533  while (cc < ccend) Line 8273  while (cc < ccend)
8273    
8274      case OP_CLASS:      case OP_CLASS:
8275      case OP_NCLASS:      case OP_NCLASS:
8276      if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)      if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE)
8277        cc = compile_iterator_matchingpath(common, cc, parent);        cc = compile_iterator_matchingpath(common, cc, parent);
8278      else      else
8279        cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);        cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
# Line 7541  while (cc < ccend) Line 8281  while (cc < ccend)
8281    
8282  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
8283      case OP_XCLASS:      case OP_XCLASS:
8284      if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)      if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
8285        cc = compile_iterator_matchingpath(common, cc, parent);        cc = compile_iterator_matchingpath(common, cc, parent);
8286      else      else
8287        cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);        cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
# Line 7550  while (cc < ccend) Line 8290  while (cc < ccend)
8290    
8291      case OP_REF:      case OP_REF:
8292      case OP_REFI:      case OP_REFI:
8293      if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)      if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
8294          cc = compile_ref_iterator_matchingpath(common, cc, parent);
8295        else
8296          {
8297          compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
8298          cc += 1 + IMM2_SIZE;
8299          }
8300        break;
8301    
8302        case OP_DNREF:
8303        case OP_DNREFI:
8304        if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
8305        cc = compile_ref_iterator_matchingpath(common, cc, parent);        cc = compile_ref_iterator_matchingpath(common, cc, parent);
8306      else      else
8307        cc = compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);        {
8308          compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
8309          compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
8310          cc += 1 + 2 * IMM2_SIZE;
8311          }
8312      break;      break;
8313    
8314      case OP_RECURSE:      case OP_RECURSE:
# Line 7588  while (cc < ccend) Line 8343  while (cc < ccend)
8343        }        }
8344      BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();      BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
8345      if (cc[1] > OP_ASSERTBACK_NOT)      if (cc[1] > OP_ASSERTBACK_NOT)
8346        decrease_call_count(common);        count_match(common);
8347      break;      break;
8348    
8349      case OP_ONCE:      case OP_ONCE:
# Line 7706  DEFINE_COMPILER; Line 8461  DEFINE_COMPILER;
8461  pcre_uchar *cc = current->cc;  pcre_uchar *cc = current->cc;
8462  pcre_uchar opcode;  pcre_uchar opcode;
8463  pcre_uchar type;  pcre_uchar type;
8464  int arg1 = -1, arg2 = -1;  int max = -1, min = -1;
8465  struct sljit_label *label = NULL;  struct sljit_label *label = NULL;
8466  struct sljit_jump *jump = NULL;  struct sljit_jump *jump = NULL;
8467  jump_list *jumplist = NULL;  jump_list *jumplist = NULL;
# Line 7715  int base = (private_data_ptr == 0) ? SLJ Line 8470  int base = (private_data_ptr == 0) ? SLJ
8470  int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;  int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
8471  int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);  int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
8472    
8473  cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL);  cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, NULL);
8474    
8475  switch(opcode)  switch(opcode)
8476    {    {
# Line 7734  switch(opcode) Line 8489  switch(opcode)
8489    else    else
8490      {      {
8491      if (opcode == OP_UPTO)      if (opcode == OP_UPTO)
8492        arg2 = 0;        min = 0;
8493      if (opcode <= OP_PLUS)      if (opcode <= OP_PLUS)
8494        {        {
8495        OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);        OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
# Line 7744  switch(opcode) Line 8499  switch(opcode)
8499        {        {
8500        OP1(SLJIT_MOV, TMP1, 0, base, offset1);        OP1(SLJIT_MOV, TMP1, 0, base, offset1);
8501        OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);        OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
8502        jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, arg2 + 1);        jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, min + 1);
8503        OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1);        OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1);
8504        }        }
8505      skip_char_back(common);      skip_char_back(common);
# Line 7789  switch(opcode) Line 8544  switch(opcode)
8544    OP1(SLJIT_MOV, base, offset1, TMP1, 0);    OP1(SLJIT_MOV, base, offset1, TMP1, 0);
8545    
8546    if (opcode == OP_CRMINRANGE)    if (opcode == OP_CRMINRANGE)
8547      CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label);      CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min + 1, label);
8548    
8549    if (opcode == OP_CRMINRANGE && arg1 == 0)    if (opcode == OP_CRMINRANGE && max == 0)
8550      JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);      JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
8551    else    else
8552      CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_backtrack)->matchingpath);      CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, max + 2, CURRENT_AS(iterator_backtrack)->matchingpath);
8553    
8554    set_jumps(jumplist, LABEL());    set_jumps(jumplist, LABEL());
8555    if (private_data_ptr == 0)    if (private_data_ptr == 0)
# Line 7829  switch(opcode) Line 8584  switch(opcode)
8584    
8585    case OP_EXACT:    case OP_EXACT:
8586    case OP_POSPLUS:    case OP_POSPLUS:
8587      case OP_CRPOSRANGE:
8588    set_jumps(current->topbacktracks, LABEL());    set_jumps(current->topbacktracks, LABEL());
8589    break;    break;
8590    
# Line 7847  static SLJIT_INLINE void compile_ref_ite Line 8603  static SLJIT_INLINE void compile_ref_ite
8603  {  {
8604  DEFINE_COMPILER;  DEFINE_COMPILER;
8605  pcre_uchar *cc = current->cc;  pcre_uchar *cc = current->cc;
8606    BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
8607  pcre_uchar type;  pcre_uchar type;
8608    
8609  type = cc[1 + IMM2_SIZE];  type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
8610    
8611  if ((type & 0x1) == 0)  if ((type & 0x1) == 0)
8612    {    {
8613      /* Maximize case. */
8614    set_jumps(current->topbacktracks, LABEL());    set_jumps(current->topbacktracks, LABEL());
8615    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8616    free_stack(common, 1);    free_stack(common, 1);
# Line 7862  if ((type & 0x1) == 0) Line 8621  if ((type & 0x1) == 0)
8621  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8622  CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);  CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
8623  set_jumps(current->topbacktracks, LABEL());  set_jumps(current->topbacktracks, LABEL());
8624  free_stack(common, 2);  free_stack(common, ref ? 2 : 3);
8625  }  }
8626    
8627  static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)  static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
# Line 7961  if (bra == OP_BRAZERO) Line 8720  if (bra == OP_BRAZERO)
8720  static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)  static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
8721  {  {
8722  DEFINE_COMPILER;  DEFINE_COMPILER;
8723  int opcode, stacksize, count;  int opcode, stacksize, alt_count, alt_max;
8724  int offset = 0;  int offset = 0;
8725  int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;  int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
8726  int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;  int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
8727  pcre_uchar *cc = current->cc;  pcre_uchar *cc = current->cc;
8728  pcre_uchar *ccbegin;  pcre_uchar *ccbegin;
8729  pcre_uchar *ccprev;  pcre_uchar *ccprev;
 jump_list *jumplist = NULL;  
 jump_list *jumplistitem = NULL;  
8730  pcre_uchar bra = OP_BRA;  pcre_uchar bra = OP_BRA;
8731  pcre_uchar ket;  pcre_uchar ket;
8732  assert_backtrack *assert;  assert_backtrack *assert;
8733  BOOL has_alternatives;  BOOL has_alternatives;
8734  BOOL needs_control_head = FALSE;  BOOL needs_control_head = FALSE;
8735  struct sljit_jump *brazero = NULL;  struct sljit_jump *brazero = NULL;
8736    struct sljit_jump *alt1 = NULL;
8737    struct sljit_jump *alt2 = NULL;
8738  struct sljit_jump *once = NULL;  struct sljit_jump *once = NULL;
8739  struct sljit_jump *cond = NULL;  struct sljit_jump *cond = NULL;
8740  struct sljit_label *rmin_label = NULL;  struct sljit_label *rmin_label = NULL;
# Line 8013  if (SLJIT_UNLIKELY(opcode == OP_COND) && Line 8772  if (SLJIT_UNLIKELY(opcode == OP_COND) &&
8772  if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))  if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
8773    opcode = OP_ONCE;    opcode = OP_ONCE;
8774    
8775    alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
8776    
8777  /* Decoding the needs_control_head in framesize. */  /* Decoding the needs_control_head in framesize. */
8778  if (opcode == OP_ONCE)  if (opcode == OP_ONCE)
8779    {    {
# Line 8126  else if (SLJIT_UNLIKELY(opcode == OP_CON Line 8887  else if (SLJIT_UNLIKELY(opcode == OP_CON
8887      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8888      free_stack(common, 1);      free_stack(common, 1);
8889    
8890      jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));      alt_max = 2;
8891      if (SLJIT_UNLIKELY(!jumplistitem))      alt1 = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
       return;  
     jumplist = jumplistitem;  
     jumplistitem->next = NULL;  
     jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 1);  
8892      }      }
8893    }    }
8894  else if (*cc == OP_ALT)  else if (has_alternatives)
8895    {    {
   /* Build a jump list. Get the last successfully matched branch index. */  
8896    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8897    free_stack(common, 1);    free_stack(common, 1);
   count = 1;  
   do  
     {  
     /* Append as the last item. */  
     if (jumplist != NULL)  
       {  
       jumplistitem->next = sljit_alloc_memory(compiler, sizeof(jump_list));  
       jumplistitem = jumplistitem->next;  
       }  
     else  
       {  
       jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));  
       jumplist = jumplistitem;  
       }  
8898    
8899      if (SLJIT_UNLIKELY(!jumplistitem))    if (alt_max > 4)
8900        return;      {
8901        /* Table jump if alt_max is greater than 4. */
8902      jumplistitem->next = NULL;      sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)common->read_only_data_ptr);
8903      jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, count++);      add_label_addr(common);
8904      cc += GET(cc, 1);      }
8905      else
8906        {
8907        if (alt_max == 4)
8908          alt2 = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
8909        alt1 = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
8910      }      }
   while (*cc == OP_ALT);  
   
   cc = ccbegin + GET(ccbegin, 1);  
8911    }    }
8912    
8913  COMPILE_BACKTRACKINGPATH(current->top);  COMPILE_BACKTRACKINGPATH(current->top);
# Line 8198  if (SLJIT_UNLIKELY(opcode == OP_COND) || Line 8942  if (SLJIT_UNLIKELY(opcode == OP_COND) ||
8942    
8943  if (has_alternatives)  if (has_alternatives)
8944    {    {
8945    count = 1;    alt_count = sizeof(sljit_uw);
8946    do    do
8947      {      {
8948      current->top = NULL;      current->top = NULL;
# Line 8226  if (has_alternatives) Line 8970  if (has_alternatives)
8970          return;          return;
8971        }        }
8972    
8973      /* Instructions after the current alternative is succesfully matched. */      /* Instructions after the current alternative is successfully matched. */
8974      /* There is a similar code in compile_bracket_matchingpath. */      /* There is a similar code in compile_bracket_matchingpath. */
8975      if (opcode == OP_ONCE)      if (opcode == OP_ONCE)
8976        match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);        match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
# Line 8274  if (has_alternatives) Line 9018  if (has_alternatives)
9018        stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);        stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
9019    
9020      if (opcode != OP_ONCE)      if (opcode != OP_ONCE)
9021        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, count++);        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
9022    
9023      if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)      if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
9024        {        {
# Line 8287  if (has_alternatives) Line 9031  if (has_alternatives)
9031    
9032      if (opcode != OP_ONCE)      if (opcode != OP_ONCE)
9033        {        {
9034        SLJIT_ASSERT(jumplist);        if (alt_max > 4)
9035        JUMPHERE(jumplist->jump);          add_label_addr(common);
9036        jumplist = jumplist->next;        else
9037            {
9038            if (alt_count != 2 * sizeof(sljit_uw))
9039              {
9040              JUMPHERE(alt1);
9041              if (alt_max == 3 && alt_count == sizeof(sljit_uw))
9042                alt2 = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
9043              }
9044            else
9045              {
9046              JUMPHERE(alt2);
9047              if (alt_max == 4)
9048                alt1 = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
9049              }
9050            }
9051          alt_count += sizeof(sljit_uw);
9052        }        }
9053    
9054      COMPILE_BACKTRACKINGPATH(current->top);      COMPILE_BACKTRACKINGPATH(current->top);
# Line 8298  if (has_alternatives) Line 9057  if (has_alternatives)
9057      SLJIT_ASSERT(!current->nextbacktracks);      SLJIT_ASSERT(!current->nextbacktracks);
9058      }      }
9059    while (*cc == OP_ALT);    while (*cc == OP_ALT);
   SLJIT_ASSERT(!jumplist);  
9060    
9061    if (cond != NULL)    if (cond != NULL)
9062      {      {
# Line 8663  while (current) Line 9421  while (current)
9421    
9422      case OP_REF:      case OP_REF:
9423      case OP_REFI:      case OP_REFI:
9424        case OP_DNREF:
9425        case OP_DNREFI:
9426      compile_ref_iterator_backtrackingpath(common, current);      compile_ref_iterator_backtrackingpath(common, current);
9427      break;      break;
9428    
# Line 8897  pcre_uchar *ccend; Line 9657  pcre_uchar *ccend;
9657  executable_functions *functions;  executable_functions *functions;
9658  void *executable_func;  void *executable_func;
9659  sljit_uw executable_size;  sljit_uw executable_size;
9660    sljit_uw total_length;
9661    label_addr_list *label_addr;
9662  struct sljit_label *mainloop_label = NULL;  struct sljit_label *mainloop_label = NULL;
9663  struct sljit_label *continue_match_label;  struct sljit_label *continue_match_label;
9664  struct sljit_label *empty_match_found_label;  struct sljit_label *empty_match_found_label = NULL;
9665  struct sljit_label *empty_match_backtrack_label;  struct sljit_label *empty_match_backtrack_label = NULL;
9666  struct sljit_label *reset_match_label;  struct sljit_label *reset_match_label;
9667    struct sljit_label *quit_label;
9668  struct sljit_jump *jump;  struct sljit_jump *jump;
9669  struct sljit_jump *minlength_check_failed = NULL;  struct sljit_jump *minlength_check_failed = NULL;
9670  struct sljit_jump *reqbyte_notfound = NULL;  struct sljit_jump *reqbyte_notfound = NULL;
9671  struct sljit_jump *empty_match;  struct sljit_jump *empty_match = NULL;
 struct sljit_label *quit_label;  
9672    
9673  SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);  SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
9674  study = extra->study_data;  study = extra->study_data;
# Line 8919  memset(common, 0, sizeof(compiler_common Line 9681  memset(common, 0, sizeof(compiler_common
9681  rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;  rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
9682    
9683  common->start = rootbacktrack.cc;  common->start = rootbacktrack.cc;
9684    common->read_only_data = NULL;
9685    common->read_only_data_size = 0;
9686    common->read_only_data_ptr = NULL;
9687  common->fcc = tables + fcc_offset;  common->fcc = tables + fcc_offset;
9688  common->lcc = (sljit_sw)(tables + lcc_offset);  common->lcc = (sljit_sw)(tables + lcc_offset);
9689  common->mode = mode;  common->mode = mode;
9690    common->might_be_empty = study->minlength == 0;
9691  common->nltype = NLTYPE_FIXED;  common->nltype = NLTYPE_FIXED;
9692  switch(re->options & PCRE_NEWLINE_BITS)  switch(re->options & PCRE_NEWLINE_BITS)
9693    {    {
# Line 8942  switch(re->options & PCRE_NEWLINE_BITS) Line 9708  switch(re->options & PCRE_NEWLINE_BITS)
9708    case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;    case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
9709    default: return;    default: return;
9710    }    }
9711    common->nlmax = READ_CHAR_MAX;
9712    common->nlmin = 0;
9713  if ((re->options & PCRE_BSR_ANYCRLF) != 0)  if ((re->options & PCRE_BSR_ANYCRLF) != 0)
9714    common->bsr_nltype = NLTYPE_ANYCRLF;    common->bsr_nltype = NLTYPE_ANYCRLF;
9715  else if ((re->options & PCRE_BSR_UNICODE) != 0)  else if ((re->options & PCRE_BSR_UNICODE) != 0)
# Line 8954  else Line 9722  else
9722    common->bsr_nltype = NLTYPE_ANY;    common->bsr_nltype = NLTYPE_ANY;
9723  #endif  #endif
9724    }    }
9725    common->bsr_nlmax = READ_CHAR_MAX;
9726    common->bsr_nlmin = 0;
9727  common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
9728  common->ctypes = (sljit_sw)(tables + ctypes_offset);  common->ctypes = (sljit_sw)(tables + ctypes_offset);
9729  common->digits[0] = -2;  common->name_table = ((pcre_uchar *)re) + re->name_table_offset;
 common->name_table = (sljit_sw)((pcre_uchar *)re + re->name_table_offset);  
9730  common->name_count = re->name_count;  common->name_count = re->name_count;
9731  common->name_entry_size = re->name_entry_size;  common->name_entry_size = re->name_entry_size;
9732  common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;  common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
# Line 8967  common->utf = (re->options & PCRE_UTF8) Line 9736  common->utf = (re->options & PCRE_UTF8)
9736  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
9737  common->use_ucp = (re->options & PCRE_UCP) != 0;  common->use_ucp = (re->options & PCRE_UCP) != 0;
9738  #endif  #endif
9739    if (common->utf)
9740      {
9741      if (common->nltype == NLTYPE_ANY)
9742        common->nlmax = 0x2029;
9743      else if (common->nltype == NLTYPE_ANYCRLF)
9744        common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
9745      else
9746        {
9747        /* We only care about the first newline character. */
9748        common->nlmax = common->newline & 0xff;
9749        }
9750    
9751      if (common->nltype == NLTYPE_FIXED)
9752        common->nlmin = common->newline & 0xff;
9753      else
9754        common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
9755    
9756      if (common->bsr_nltype == NLTYPE_ANY)
9757        common->bsr_nlmax = 0x2029;
9758      else
9759        common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
9760      common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
9761      }
9762  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
9763  ccend = bracketend(rootbacktrack.cc);  ccend = bracketend(common->start);
9764    
9765  /* Calculate the local space size on the stack. */  /* Calculate the local space size on the stack. */
9766  common->ovector_start = CALL_LIMIT + sizeof(sljit_sw);  common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
9767  common->optimized_cbracket = (pcre_uint8 *)SLJIT_MALLOC(re->top_bracket + 1);  common->optimized_cbracket = (pcre_uint8 *)SLJIT_MALLOC(re->top_bracket + 1);
9768  if (!common->optimized_cbracket)  if (!common->optimized_cbracket)
9769    return;    return;
# Line 8981  memset(common->optimized_cbracket, 0, re Line 9773  memset(common->optimized_cbracket, 0, re
9773  memset(common->optimized_cbracket, 1, re->top_bracket + 1);  memset(common->optimized_cbracket, 1, re->top_bracket + 1);
9774  #endif  #endif
9775    
9776  SLJIT_ASSERT(*rootbacktrack.cc == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);  SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
9777  #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2  #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
9778  common->capture_last_ptr = common->ovector_start;  common->capture_last_ptr = common->ovector_start;
9779  common->ovector_start += sizeof(sljit_sw);  common->ovector_start += sizeof(sljit_sw);
9780  #endif  #endif
9781  if (!check_opcode_types(common, rootbacktrack.cc, ccend))  if (!check_opcode_types(common, common->start, ccend))
9782    {    {
9783    SLJIT_FREE(common->optimized_cbracket);    SLJIT_FREE(common->optimized_cbracket);
9784    return;    return;
# Line 9049  if (common->capture_last_ptr != 0) Line 9841  if (common->capture_last_ptr != 0)
9841  SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));  SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
9842  common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);  common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
9843    
9844  common->private_data_ptrs = (int *)SLJIT_MALLOC((ccend - rootbacktrack.cc) * sizeof(sljit_si));  total_length = ccend - common->start;
9845    common->private_data_ptrs = (sljit_si *)SLJIT_MALLOC(total_length * (sizeof(sljit_si) + (common->has_then ? 1 : 0)));
9846  if (!common->private_data_ptrs)  if (!common->private_data_ptrs)
9847    {    {
9848    SLJIT_FREE(common->optimized_cbracket);    SLJIT_FREE(common->optimized_cbracket);
9849    return;    return;
9850    }    }
9851  memset(common->private_data_ptrs, 0, (ccend - rootbacktrack.cc) * sizeof(int));  memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_si));
9852    
9853  private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);  private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
9854  set_private_data_ptrs(common, &private_data_size, ccend);  set_private_data_ptrs(common, &private_data_size, ccend);
# Line 9068  if (private_data_size > SLJIT_MAX_LOCAL_ Line 9861  if (private_data_size > SLJIT_MAX_LOCAL_
9861    
9862  if (common->has_then)  if (common->has_then)
9863    {    {
9864    common->then_offsets = (pcre_uint8 *)SLJIT_MALLOC(ccend - rootbacktrack.cc);    common->then_offsets = (pcre_uint8 *)(common->private_data_ptrs + total_length);
9865    if (!common->then_offsets)    memset(common->then_offsets, 0, total_length);
9866      set_then_offsets(common, common->start, NULL);
9867      }
9868    
9869    if (common->read_only_data_size > 0)
9870      {
9871      common->read_only_data = (sljit_uw *)SLJIT_MALLOC(common->read_only_data_size);
9872      if (common->read_only_data == NULL)
9873      {      {
9874      SLJIT_FREE(common->optimized_cbracket);      SLJIT_FREE(common->optimized_cbracket);
9875      SLJIT_FREE(common->private_data_ptrs);      SLJIT_FREE(common->private_data_ptrs);
9876      return;      return;
9877      }      }
9878    memset(common->then_offsets, 0, ccend - rootbacktrack.cc);    common->read_only_data_ptr = common->read_only_data;
   set_then_offsets(common, rootbacktrack.cc, NULL);  
9879    }    }
9880    
9881  compiler = sljit_create_compiler();  compiler = sljit_create_compiler();
# Line 9084  if (!compiler) Line 9883  if (!compiler)
9883    {    {
9884    SLJIT_FREE(common->optimized_cbracket);    SLJIT_FREE(common->optimized_cbracket);
9885    SLJIT_FREE(common->private_data_ptrs);    SLJIT_FREE(common->private_data_ptrs);
9886    if (common->has_then)    if (common->read_only_data)
9887      SLJIT_FREE(common->then_offsets);      SLJIT_FREE(common->read_only_data);
9888    return;    return;
9889    }    }
9890  common->compiler = compiler;  common->compiler = compiler;
# Line 9103  OP1(SLJIT_MOV, TMP1, 0, SLJIT_SAVED_REG1 Line 9902  OP1(SLJIT_MOV, TMP1, 0, SLJIT_SAVED_REG1
9902  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
9903  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
9904  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
9905  OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, call_limit));  OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
9906  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
9907  OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));  OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
9908  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT, TMP1, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LIMIT_MATCH, TMP1, 0);
9909    
9910  if (mode == JIT_PARTIAL_SOFT_COMPILE)  if (mode == JIT_PARTIAL_SOFT_COMPILE)
9911    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
# Line 9124  if ((re->options & PCRE_ANCHORED) == 0) Line 9923  if ((re->options & PCRE_ANCHORED) == 0)
9923    if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)    if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
9924      {      {
9925      if (mode == JIT_COMPILE && fast_forward_first_n_chars(common, (re->options & PCRE_FIRSTLINE) != 0))      if (mode == JIT_COMPILE && fast_forward_first_n_chars(common, (re->options & PCRE_FIRSTLINE) != 0))
9926        { /* Do nothing */ }        {
9927          /* If read_only_data is reallocated, we might have an allocation failure. */
9928          if (common->read_only_data_size > 0 && common->read_only_data == NULL)
9929            {
9930            sljit_free_compiler(compiler);
9931            SLJIT_FREE(common->optimized_cbracket);
9932            SLJIT_FREE(common->private_data_ptrs);
9933            return;
9934            }
9935          }
9936      else if ((re->flags & PCRE_FIRSTSET) != 0)      else if ((re->flags & PCRE_FIRSTSET) != 0)
9937        fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0);        fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0);
9938      else if ((re->flags & PCRE_STARTLINE) != 0)      else if ((re->flags & PCRE_STARTLINE) != 0)
9939        fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);        fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
9940      else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)      else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
9941        fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);        fast_forward_start_bits(common, study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
9942      }      }
9943    }    }
9944  else  else
# Line 9148  if (common->req_char_ptr != 0) Line 9956  if (common->req_char_ptr != 0)
9956  /* Store the current STR_PTR in OVECTOR(0). */  /* Store the current STR_PTR in OVECTOR(0). */
9957  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
9958  /* Copy the limit of allowed recursions. */  /* Copy the limit of allowed recursions. */
9959  OP1(SLJIT_MOV, CALL_COUNT, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT);  OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LIMIT_MATCH);
9960  if (common->capture_last_ptr != 0)  if (common->capture_last_ptr != 0)
9961    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, -1);    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, -1);
9962    
# Line 9171  if (mode == JIT_PARTIAL_SOFT_COMPILE) Line 9979  if (mode == JIT_PARTIAL_SOFT_COMPILE)
9979  else if (mode == JIT_PARTIAL_HARD_COMPILE)  else if (mode == JIT_PARTIAL_HARD_COMPILE)
9980    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
9981    
9982  compile_matchingpath(common, rootbacktrack.cc, ccend, &rootbacktrack);  compile_matchingpath(common, common->start, ccend, &rootbacktrack);
9983  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9984    {    {
9985    sljit_free_compiler(compiler);    sljit_free_compiler(compiler);
9986    SLJIT_FREE(common->optimized_cbracket);    SLJIT_FREE(common->optimized_cbracket);
9987    SLJIT_FREE(common->private_data_ptrs);    SLJIT_FREE(common->private_data_ptrs);
9988    if (common->has_then)    if (common->read_only_data)
9989      SLJIT_FREE(common->then_offsets);      SLJIT_FREE(common->read_only_data);
9990    return;    return;
9991    }    }
9992    
9993  empty_match = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));  if (common->might_be_empty)
9994  empty_match_found_label = LABEL();    {
9995      empty_match = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
9996      empty_match_found_label = LABEL();
9997      }
9998    
9999  common->accept_label = LABEL();  common->accept_label = LABEL();
10000  if (common->accept != NULL)  if (common->accept != NULL)
# Line 9207  if (mode != JIT_COMPILE) Line 10018  if (mode != JIT_COMPILE)
10018    return_with_partial_match(common, common->quit_label);    return_with_partial_match(common, common->quit_label);
10019    }    }
10020    
10021  empty_match_backtrack_label = LABEL();  if (common->might_be_empty)
10022      empty_match_backtrack_label = LABEL();
10023  compile_backtrackingpath(common, rootbacktrack.top);  compile_backtrackingpath(common, rootbacktrack.top);
10024  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10025    {    {
10026    sljit_free_compiler(compiler);    sljit_free_compiler(compiler);
10027    SLJIT_FREE(common->optimized_cbracket);    SLJIT_FREE(common->optimized_cbracket);
10028    SLJIT_FREE(common->private_data_ptrs);    SLJIT_FREE(common->private_data_ptrs);
10029    if (common->has_then)    if (common->read_only_data)
10030      SLJIT_FREE(common->then_offsets);      SLJIT_FREE(common->read_only_data);
10031    return;    return;
10032    }    }
10033    
# Line 9261  JUMPTO(SLJIT_JUMP, common->quit_label); Line 10073  JUMPTO(SLJIT_JUMP, common->quit_label);
10073    
10074  flush_stubs(common);  flush_stubs(common);
10075    
10076  JUMPHERE(empty_match);  if (common->might_be_empty)
10077  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);    {
10078  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));    JUMPHERE(empty_match);
10079  CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label);    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10080  OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));    OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
10081  CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label);    CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label);
10082  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));    OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
10083  CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);    CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label);
10084  JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
10085      CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
10086      JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
10087      }
10088    
10089  common->currententry = common->entries;  common->currententry = common->entries;
10090  common->local_exit = TRUE;  common->local_exit = TRUE;
# Line 9283  while (common->currententry != NULL) Line 10098  while (common->currententry != NULL)
10098      sljit_free_compiler(compiler);      sljit_free_compiler(compiler);